mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 16:19:15 +08:00
update vulkan samples with SPIR-V shaders
This commit is contained in:
parent
5c3ec60fae
commit
7a5b3e6c8c
|
@ -19,8 +19,17 @@ For Linux:
|
||||||
-- Install "libxcb1-dev" and "xorg-dev" as GLFW3 is depended on it
|
-- Install "libxcb1-dev" and "xorg-dev" as GLFW3 is depended on it
|
||||||
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
|
||||||
For Linux aarch64(L4T):
|
For Linux aarch64(L4T):
|
||||||
-- Install GLFW3 library using "sudo apt-get install libglfw3-dev" this will provide glfw3
|
-- Install GLFW3 library using "sudo apt-get install libglfw3-dev" this will provide glfw3
|
||||||
-- install above will also provide libvulkan-dev as dependencies
|
-- install above will also provide libvulkan-dev as dependencies
|
||||||
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
-- Pass path to vulkan sdk while building 'make VULKAN_SDK_PATH=<PATH_TO_VULKAN_SDK>', VULKAN_SDK_PATH in this scenario is typically "/usr"
|
-- Pass path to vulkan sdk while building 'make VULKAN_SDK_PATH=<PATH_TO_VULKAN_SDK>', VULKAN_SDK_PATH in this scenario is typically "/usr"
|
||||||
|
|
||||||
|
|
||||||
|
For Shader changes:
|
||||||
|
-- Update the sinewave.vert and/or sinewave.frag shader source file as required
|
||||||
|
-- Use the glslc shader compiler from the installed Vulkan SDK's bin directory to compile shaders as:
|
||||||
|
glslc sinewave.vert -o vert.spv
|
||||||
|
glslc sinewave.frag -o frag.spv
|
||||||
|
** Make sure to add glslc's path in your PATH environment variable **
|
||||||
|
|
BIN
Samples/simpleVulkan/frag.spv
Normal file
BIN
Samples/simpleVulkan/frag.spv
Normal file
Binary file not shown.
|
@ -92,9 +92,9 @@ class VulkanCudaSineWave : public VulkanBaseApp {
|
||||||
}
|
}
|
||||||
// Add our compiled vulkan shader files
|
// Add our compiled vulkan shader files
|
||||||
char *vertex_shader_path =
|
char *vertex_shader_path =
|
||||||
sdkFindFilePath("sinewave.vert", execution_path.c_str());
|
sdkFindFilePath("vert.spv", execution_path.c_str());
|
||||||
char *fragment_shader_path =
|
char *fragment_shader_path =
|
||||||
sdkFindFilePath("sinewave.frag", execution_path.c_str());
|
sdkFindFilePath("frag.spv", execution_path.c_str());
|
||||||
m_shaderFiles.push_back(
|
m_shaderFiles.push_back(
|
||||||
std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path));
|
std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path));
|
||||||
m_shaderFiles.push_back(
|
m_shaderFiles.push_back(
|
||||||
|
|
BIN
Samples/simpleVulkan/vert.spv
Normal file
BIN
Samples/simpleVulkan/vert.spv
Normal file
Binary file not shown.
35
Samples/simpleVulkanMMAP/Build_instructions.txt
Normal file
35
Samples/simpleVulkanMMAP/Build_instructions.txt
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
For Windows:
|
||||||
|
Follow these steps once you have installed Vulkan SDK for Windows from https://www.lunarg.com/vulkan-sdk/
|
||||||
|
-- Install GLFW3 library at suitable location
|
||||||
|
-- Open the simpleVulkan VS project file.
|
||||||
|
To add the GLFW3 library path
|
||||||
|
-- Right click on Project name "simpleVulkan" click on "Properties"
|
||||||
|
-- In Property pages window go to Linker -> General. Here in "Additional Libraries Directories" edit and add path to glfw3dll.lib
|
||||||
|
To add the GLFW3 headers path
|
||||||
|
-- Right click on Project name "simpleVulkan" click on "Properties"
|
||||||
|
-- In Property pages window go to "VC++ Directories" section. Here in "Include Directories" edit and add path to GLFW3 headers include directory location.
|
||||||
|
** Make sure to add path to glfw3.dll in your PATH environment variable**
|
||||||
|
|
||||||
|
|
||||||
|
For Linux:
|
||||||
|
-- Install the Vulkan SDK from https://www.lunarg.com/vulkan-sdk/ and follow environment setup instructions.
|
||||||
|
-- Install GLFW3 library through your OS package repository. For example: apt-get for Ubuntu and dnf for RHEL/CentOS. Below is for Ubuntu:
|
||||||
|
sudo apt-get install libglfw3
|
||||||
|
sudo apt-get install libglfw3-dev
|
||||||
|
-- Install "libxcb1-dev" and "xorg-dev" as GLFW3 is depended on it
|
||||||
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
|
||||||
|
For Linux aarch64(L4T):
|
||||||
|
-- Install GLFW3 library using "sudo apt-get install libglfw3-dev" this will provide glfw3
|
||||||
|
-- install above will also provide libvulkan-dev as dependencies
|
||||||
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
|
-- Pass path to vulkan sdk while building 'make VULKAN_SDK_PATH=<PATH_TO_VULKAN_SDK>', VULKAN_SDK_PATH in this scenario is typically "/usr"
|
||||||
|
|
||||||
|
|
||||||
|
For Shader changes:
|
||||||
|
-- Update the montecarlo.vert and/or montecarlo.frag shader source file as required
|
||||||
|
-- Use the glslc shader compiler from the installed Vulkan SDK's bin directory to compile shaders as:
|
||||||
|
glslc montecarlo.vert -o vert.spv
|
||||||
|
glslc montecarlo.frag -o frag.spv
|
||||||
|
** Make sure to add glslc's path in your PATH environment variable **
|
|
@ -25,9 +25,9 @@
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* See: https://www.piday.org/million/
|
* See: https://www.piday.org/million/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "MonteCarloPi.h"
|
#include "MonteCarloPi.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -37,15 +37,16 @@
|
||||||
|
|
||||||
#define ROUND_UP_TO_GRANULARITY(x, n) (((x + n - 1) / n) * n)
|
#define ROUND_UP_TO_GRANULARITY(x, n) (((x + n - 1) / n) * n)
|
||||||
|
|
||||||
// `ipcHandleTypeFlag` specifies the platform specific handle type this sample
|
// `ipcHandleTypeFlag` specifies the platform specific handle type this sample
|
||||||
// uses for importing and exporting memory allocation. On Linux this sample
|
// uses for importing and exporting memory allocation. On Linux this sample
|
||||||
// specifies the type as CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR meaning that
|
// specifies the type as CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR meaning that
|
||||||
// file descriptors will be used. On Windows this sample specifies the type as
|
// file descriptors will be used. On Windows this sample specifies the type as
|
||||||
// CU_MEM_HANDLE_TYPE_WIN32 meaning that NT HANDLEs will be used. The
|
// CU_MEM_HANDLE_TYPE_WIN32 meaning that NT HANDLEs will be used. The
|
||||||
// ipcHandleTypeFlag variable is a convenience variable and is passed by value
|
// ipcHandleTypeFlag variable is a convenience variable and is passed by value
|
||||||
// to individual requests.
|
// to individual requests.
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
CUmemAllocationHandleType ipcHandleTypeFlag = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
|
CUmemAllocationHandleType ipcHandleTypeFlag =
|
||||||
|
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
|
||||||
#else
|
#else
|
||||||
CUmemAllocationHandleType ipcHandleTypeFlag = CU_MEM_HANDLE_TYPE_WIN32;
|
CUmemAllocationHandleType ipcHandleTypeFlag = CU_MEM_HANDLE_TYPE_WIN32;
|
||||||
#endif
|
#endif
|
||||||
|
@ -53,223 +54,248 @@ CUmemAllocationHandleType ipcHandleTypeFlag = CU_MEM_HANDLE_TYPE_WIN32;
|
||||||
// Windows-specific LPSECURITYATTRIBUTES
|
// Windows-specific LPSECURITYATTRIBUTES
|
||||||
void getDefaultSecurityDescriptor(CUmemAllocationProp *prop) {
|
void getDefaultSecurityDescriptor(CUmemAllocationProp *prop) {
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
return;
|
return;
|
||||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
static const char sddl[] = "D:P(OA;;GARCSDWDWOCCDCLCSWLODTWPRPCRFA;;;WD)";
|
static const char sddl[] = "D:P(OA;;GARCSDWDWOCCDCLCSWLODTWPRPCRFA;;;WD)";
|
||||||
static OBJECT_ATTRIBUTES objAttributes;
|
static OBJECT_ATTRIBUTES objAttributes;
|
||||||
static bool objAttributesConfigured = false;
|
static bool objAttributesConfigured = false;
|
||||||
|
|
||||||
if (!objAttributesConfigured) {
|
if (!objAttributesConfigured) {
|
||||||
PSECURITY_DESCRIPTOR secDesc;
|
PSECURITY_DESCRIPTOR secDesc;
|
||||||
BOOL result = ConvertStringSecurityDescriptorToSecurityDescriptorA(
|
BOOL result = ConvertStringSecurityDescriptorToSecurityDescriptorA(
|
||||||
sddl, SDDL_REVISION_1, &secDesc, NULL);
|
sddl, SDDL_REVISION_1, &secDesc, NULL);
|
||||||
if (result == 0) {
|
if (result == 0) {
|
||||||
printf("IPC failure: getDefaultSecurityDescriptor Failed! (%d)\n",
|
printf("IPC failure: getDefaultSecurityDescriptor Failed! (%d)\n",
|
||||||
GetLastError());
|
GetLastError());
|
||||||
}
|
|
||||||
|
|
||||||
InitializeObjectAttributes(&objAttributes, NULL, 0, NULL, secDesc);
|
|
||||||
|
|
||||||
objAttributesConfigured = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prop->win32HandleMetaData = &objAttributes;
|
InitializeObjectAttributes(&objAttributes, NULL, 0, NULL, secDesc);
|
||||||
return;
|
|
||||||
|
objAttributesConfigured = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
prop->win32HandleMetaData = &objAttributes;
|
||||||
|
return;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void monte_carlo_kernel(vec2 *xyVector, float *pointsInsideCircle, float *numPointsInCircle, unsigned int numPoints, float time)
|
__global__ void monte_carlo_kernel(vec2 *xyVector, float *pointsInsideCircle,
|
||||||
{
|
float *numPointsInCircle,
|
||||||
const size_t stride = gridDim.x * blockDim.x;
|
unsigned int numPoints, float time) {
|
||||||
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
|
const size_t stride = gridDim.x * blockDim.x;
|
||||||
float count = 0.0f;
|
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
float count = 0.0f;
|
||||||
|
|
||||||
curandState rgnState;
|
curandState rgnState;
|
||||||
curand_init((unsigned long long)time, tid, 0, &rgnState);
|
curand_init((unsigned long long)time, tid, 0, &rgnState);
|
||||||
|
|
||||||
for (; tid < numPoints; tid += stride) {
|
for (; tid < numPoints; tid += stride) {
|
||||||
float x = curand_uniform(&rgnState);
|
float x = curand_uniform(&rgnState);
|
||||||
float y = curand_uniform(&rgnState);
|
float y = curand_uniform(&rgnState);
|
||||||
x = (2.0f * x) - 1.0f;
|
x = (2.0f * x) - 1.0f;
|
||||||
y = (2.0f * y) - 1.0f;
|
y = (2.0f * y) - 1.0f;
|
||||||
xyVector[tid][0] = x;
|
xyVector[tid][0] = x;
|
||||||
xyVector[tid][1] = y;
|
xyVector[tid][1] = y;
|
||||||
|
|
||||||
// Compute the distance of this point form the center(0, 0)
|
// Compute the distance of this point form the center(0, 0)
|
||||||
float dist = sqrtf((x*x) + (y*y));
|
float dist = sqrtf((x * x) + (y * y));
|
||||||
|
|
||||||
// If distance is less than the radius of the unit circle, the point lies in the circle.
|
// If distance is less than the radius of the unit circle, the point lies in
|
||||||
pointsInsideCircle[tid] = (dist <= 1.0f);
|
// the circle.
|
||||||
count += (dist <= 1.0f);
|
pointsInsideCircle[tid] = (dist <= 1.0f);
|
||||||
}
|
count += (dist <= 1.0f);
|
||||||
atomicAdd(numPointsInCircle, count);
|
}
|
||||||
|
atomicAdd(numPointsInCircle, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
MonteCarloPiSimulation::MonteCarloPiSimulation(size_t num_points) :
|
MonteCarloPiSimulation::MonteCarloPiSimulation(size_t num_points)
|
||||||
m_xyVector(nullptr),
|
: m_xyVector(nullptr),
|
||||||
m_pointsInsideCircle(nullptr),
|
m_pointsInsideCircle(nullptr),
|
||||||
m_totalPointsInsideCircle(0),
|
m_totalPointsInsideCircle(0),
|
||||||
m_totalPointsSimulated(0),
|
m_totalPointsSimulated(0),
|
||||||
m_numPoints(num_points)
|
m_numPoints(num_points) {}
|
||||||
{
|
|
||||||
|
MonteCarloPiSimulation::~MonteCarloPiSimulation() {
|
||||||
|
if (m_numPointsInCircle) {
|
||||||
|
checkCudaErrors(cudaFree(m_numPointsInCircle));
|
||||||
|
m_numPointsInCircle = nullptr;
|
||||||
|
}
|
||||||
|
if (m_hostNumPointsInCircle) {
|
||||||
|
checkCudaErrors(cudaFreeHost(m_hostNumPointsInCircle));
|
||||||
|
m_hostNumPointsInCircle = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanupSimulationAllocations();
|
||||||
}
|
}
|
||||||
|
|
||||||
MonteCarloPiSimulation::~MonteCarloPiSimulation()
|
void MonteCarloPiSimulation::initSimulation(int cudaDevice,
|
||||||
{
|
cudaStream_t stream) {
|
||||||
if (m_numPointsInCircle) {
|
m_cudaDevice = cudaDevice;
|
||||||
checkCudaErrors(cudaFree(m_numPointsInCircle));
|
getIdealExecutionConfiguration();
|
||||||
m_numPointsInCircle = nullptr;
|
|
||||||
}
|
|
||||||
if (m_hostNumPointsInCircle) {
|
|
||||||
checkCudaErrors(cudaFreeHost(m_hostNumPointsInCircle));
|
|
||||||
m_hostNumPointsInCircle = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanupSimulationAllocations();
|
// Allocate a position buffer that contains random location of the points in
|
||||||
|
// XY cartesian plane.
|
||||||
|
// Allocate a bitmap buffer which holds information of whether a point in the
|
||||||
|
// position buffer is inside the unit circle or not.
|
||||||
|
setupSimulationAllocations();
|
||||||
|
|
||||||
|
checkCudaErrors(
|
||||||
|
cudaMalloc((float **)&m_numPointsInCircle, sizeof(*m_numPointsInCircle)));
|
||||||
|
checkCudaErrors(cudaMallocHost((float **)&m_hostNumPointsInCircle,
|
||||||
|
sizeof(*m_hostNumPointsInCircle)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::initSimulation(int cudaDevice, cudaStream_t stream)
|
void MonteCarloPiSimulation::stepSimulation(float time, cudaStream_t stream) {
|
||||||
{
|
checkCudaErrors(cudaMemsetAsync(m_numPointsInCircle, 0,
|
||||||
m_cudaDevice = cudaDevice;
|
sizeof(*m_numPointsInCircle), stream));
|
||||||
getIdealExecutionConfiguration();
|
|
||||||
|
|
||||||
// Allocate a position buffer that contains random location of the points in XY cartesian plane.
|
monte_carlo_kernel<<<m_blocks, m_threads, 0, stream>>>(
|
||||||
// Allocate a bitmap buffer which holds information of whether a point in the position buffer is inside the unit circle or not.
|
m_xyVector, m_pointsInsideCircle, m_numPointsInCircle, m_numPoints, time);
|
||||||
setupSimulationAllocations();
|
getLastCudaError("Failed to launch CUDA simulation");
|
||||||
|
|
||||||
checkCudaErrors(cudaMalloc((float **)&m_numPointsInCircle, sizeof(*m_numPointsInCircle)));
|
checkCudaErrors(cudaMemcpyAsync(m_hostNumPointsInCircle, m_numPointsInCircle,
|
||||||
checkCudaErrors(cudaMallocHost((float **)&m_hostNumPointsInCircle, sizeof(*m_hostNumPointsInCircle)));
|
sizeof(*m_numPointsInCircle),
|
||||||
|
cudaMemcpyDeviceToHost, stream));
|
||||||
|
|
||||||
|
// Queue up a stream callback to compute and print the PI value.
|
||||||
|
checkCudaErrors(
|
||||||
|
cudaLaunchHostFunc(stream, this->computePiCallback, (void *)this));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::stepSimulation(float time, cudaStream_t stream)
|
void MonteCarloPiSimulation::computePiCallback(void *args) {
|
||||||
{
|
MonteCarloPiSimulation *cbData = (MonteCarloPiSimulation *)args;
|
||||||
|
cbData->m_totalPointsInsideCircle += *(cbData->m_hostNumPointsInCircle);
|
||||||
checkCudaErrors(cudaMemsetAsync(m_numPointsInCircle, 0, sizeof(*m_numPointsInCircle), stream));
|
cbData->m_totalPointsSimulated += cbData->m_numPoints;
|
||||||
|
double piValue = 4.0 * ((double)cbData->m_totalPointsInsideCircle /
|
||||||
monte_carlo_kernel << < m_blocks, m_threads, 0, stream >> > (m_xyVector, m_pointsInsideCircle, m_numPointsInCircle, m_numPoints, time);
|
(double)cbData->m_totalPointsSimulated);
|
||||||
getLastCudaError("Failed to launch CUDA simulation");
|
printf("Approximate Pi value for %zd data points: %lf \n",
|
||||||
|
cbData->m_totalPointsSimulated, piValue);
|
||||||
checkCudaErrors(cudaMemcpyAsync(m_hostNumPointsInCircle, m_numPointsInCircle, sizeof(*m_numPointsInCircle), cudaMemcpyDeviceToHost, stream));
|
|
||||||
|
|
||||||
// Queue up a stream callback to compute and print the PI value.
|
|
||||||
checkCudaErrors(cudaLaunchHostFunc(stream, this->computePiCallback, (void *)this));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::computePiCallback(void *args)
|
void MonteCarloPiSimulation::getIdealExecutionConfiguration() {
|
||||||
{
|
int warpSize = 0;
|
||||||
MonteCarloPiSimulation *cbData = (MonteCarloPiSimulation *)args;
|
int multiProcessorCount = 0;
|
||||||
cbData->m_totalPointsInsideCircle += *(cbData->m_hostNumPointsInCircle);
|
|
||||||
cbData->m_totalPointsSimulated += cbData->m_numPoints;
|
checkCudaErrors(cudaSetDevice(m_cudaDevice));
|
||||||
double piValue = 4.0 * ((double)cbData->m_totalPointsInsideCircle / (double)cbData->m_totalPointsSimulated);
|
checkCudaErrors(
|
||||||
printf("Approximate Pi value for %zd data points: %lf \n", cbData->m_totalPointsSimulated, piValue);
|
cudaDeviceGetAttribute(&warpSize, cudaDevAttrWarpSize, m_cudaDevice));
|
||||||
|
|
||||||
|
// We don't need large block sizes, since there's not much inter-thread
|
||||||
|
// communication
|
||||||
|
m_threads = warpSize;
|
||||||
|
|
||||||
|
// Use the occupancy calculator and fill the gpu as best as we can
|
||||||
|
checkCudaErrors(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&m_blocks, monte_carlo_kernel, warpSize, 0));
|
||||||
|
|
||||||
|
checkCudaErrors(cudaDeviceGetAttribute(
|
||||||
|
&multiProcessorCount, cudaDevAttrMultiProcessorCount, m_cudaDevice));
|
||||||
|
m_blocks *= multiProcessorCount;
|
||||||
|
|
||||||
|
// Go ahead and the clamp the blocks to the minimum needed for this
|
||||||
|
// height/width
|
||||||
|
m_blocks =
|
||||||
|
std::min(m_blocks, (int)((m_numPoints + m_threads - 1) / m_threads));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::getIdealExecutionConfiguration()
|
void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
{
|
CUdeviceptr d_ptr = 0U;
|
||||||
int warpSize = 0;
|
size_t granularity = 0;
|
||||||
int multiProcessorCount = 0;
|
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle;
|
||||||
|
|
||||||
checkCudaErrors(cudaSetDevice(m_cudaDevice));
|
CUmemAllocationProp allocProp = {};
|
||||||
checkCudaErrors(cudaDeviceGetAttribute(&warpSize, cudaDevAttrWarpSize, m_cudaDevice));
|
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
||||||
|
allocProp.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
||||||
|
allocProp.location.id = m_cudaDevice;
|
||||||
|
allocProp.win32HandleMetaData = NULL;
|
||||||
|
allocProp.requestedHandleTypes = ipcHandleTypeFlag;
|
||||||
|
|
||||||
// We don't need large block sizes, since there's not much inter-thread communication
|
// Windows-specific LPSECURITYATTRIBUTES is required when
|
||||||
m_threads = warpSize;
|
// CU_MEM_HANDLE_TYPE_WIN32 is used. The security attribute defines the scope
|
||||||
|
// of which exported allocations may be tranferred to other processes. For all
|
||||||
|
// other handle types, pass NULL.
|
||||||
|
getDefaultSecurityDescriptor(&allocProp);
|
||||||
|
|
||||||
// Use the occupancy calculator and fill the gpu as best as we can
|
// Get the recommended granularity for m_cudaDevice.
|
||||||
checkCudaErrors(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&m_blocks, monte_carlo_kernel, warpSize, 0));
|
checkCudaErrors(cuMemGetAllocationGranularity(
|
||||||
|
&granularity, &allocProp, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
|
||||||
|
|
||||||
checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, m_cudaDevice));
|
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
|
||||||
m_blocks *= multiProcessorCount;
|
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
|
||||||
|
|
||||||
// Go ahead and the clamp the blocks to the minimum needed for this height/width
|
size_t xyPositionSize =
|
||||||
m_blocks = std::min(m_blocks, (int)((m_numPoints + m_threads - 1) / m_threads));
|
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
|
||||||
|
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
|
||||||
|
m_totalAllocationSize = (xyPositionSize + inCircleSize);
|
||||||
|
|
||||||
|
// Reserve the required contiguous VA space for the allocations
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemAddressReserve(&d_ptr, m_totalAllocationSize, granularity, 0U, 0));
|
||||||
|
|
||||||
|
// Create the allocations as a pinned allocation on this device.
|
||||||
|
// Create an allocation to store all the positions of points on the xy plane
|
||||||
|
// and a second allocation which stores information if the corresponding
|
||||||
|
// position is inside the unit circle or not.
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
|
||||||
|
|
||||||
|
// Export the allocation to a platform-specific handle. The type of handle
|
||||||
|
// requested here must match the requestedHandleTypes field in the prop
|
||||||
|
// structure passed to cuMemCreate. The handle obtained here will be passed to
|
||||||
|
// vulkan to import the allocation.
|
||||||
|
checkCudaErrors(cuMemExportToShareableHandle(
|
||||||
|
(void *)&m_posShareableHandle, cudaPositionHandle, ipcHandleTypeFlag, 0));
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
|
||||||
|
cudaInCircleHandle, ipcHandleTypeFlag, 0));
|
||||||
|
|
||||||
|
CUdeviceptr va_position = d_ptr;
|
||||||
|
CUdeviceptr va_InCircle = va_position + xyPositionSize;
|
||||||
|
m_pointsInsideCircle = (float *)va_InCircle;
|
||||||
|
m_xyVector = (vec2 *)va_position;
|
||||||
|
|
||||||
|
// Assign the chunk to the appropriate VA range
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
|
||||||
|
|
||||||
|
// Release the handles for the allocation. Since the allocation is currently
|
||||||
|
// mapped to a VA range with a previous call to cuMemMap the actual freeing of
|
||||||
|
// memory allocation will happen on an eventual call to cuMemUnmap. Thus the
|
||||||
|
// allocation will be kept live until it is unmapped.
|
||||||
|
checkCudaErrors(cuMemRelease(cudaPositionHandle));
|
||||||
|
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
|
||||||
|
|
||||||
|
CUmemAccessDesc accessDescriptor = {};
|
||||||
|
accessDescriptor.location.id = m_cudaDevice;
|
||||||
|
accessDescriptor.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
||||||
|
accessDescriptor.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
|
||||||
|
|
||||||
|
// Apply the access descriptor to the whole VA range. Essentially enables
|
||||||
|
// Read-Write access to the range.
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::setupSimulationAllocations()
|
void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
||||||
{
|
if (m_xyVector && m_pointsInsideCircle) {
|
||||||
CUdeviceptr d_ptr = 0U;
|
// Unmap the mapped virtual memory region
|
||||||
size_t granularity = 0;
|
// Since the handles to the mapped backing stores have already been released
|
||||||
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle;
|
// by cuMemRelease, and these are the only/last mappings referencing them,
|
||||||
|
// The backing stores will be freed.
|
||||||
|
checkCudaErrors(cuMemUnmap((CUdeviceptr)m_xyVector, m_totalAllocationSize));
|
||||||
|
|
||||||
CUmemAllocationProp allocProp = { };
|
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
|
||||||
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
|
||||||
allocProp.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
|
||||||
allocProp.location.id = m_cudaDevice;
|
|
||||||
allocProp.win32HandleMetaData = NULL;
|
|
||||||
allocProp.requestedHandleTypes = ipcHandleTypeFlag;
|
|
||||||
|
|
||||||
// Windows-specific LPSECURITYATTRIBUTES is required when
|
// Free the virtual address region.
|
||||||
// CU_MEM_HANDLE_TYPE_WIN32 is used. The security attribute defines the scope
|
checkCudaErrors(
|
||||||
// of which exported allocations may be tranferred to other processes. For all
|
cuMemAddressFree((CUdeviceptr)m_xyVector, m_totalAllocationSize));
|
||||||
// other handle types, pass NULL.
|
|
||||||
getDefaultSecurityDescriptor(&allocProp);
|
|
||||||
|
|
||||||
// Get the recommended granularity for m_cudaDevice.
|
m_xyVector = nullptr;
|
||||||
checkCudaErrors(cuMemGetAllocationGranularity(&granularity, &allocProp, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
|
m_pointsInsideCircle = nullptr;
|
||||||
|
}
|
||||||
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
|
|
||||||
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
|
|
||||||
|
|
||||||
size_t xyPositionSize = ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
|
|
||||||
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
|
|
||||||
m_totalAllocationSize = (xyPositionSize + inCircleSize);
|
|
||||||
|
|
||||||
// Reserve the required contiguous VA space for the allocations
|
|
||||||
checkCudaErrors(cuMemAddressReserve(&d_ptr, m_totalAllocationSize, granularity, 0U, 0));
|
|
||||||
|
|
||||||
// Create the allocations as a pinned allocation on this device.
|
|
||||||
// Create an allocation to store all the positions of points on the xy plane and a second
|
|
||||||
// allocation which stores information if the corresponding position is inside the unit circle or not.
|
|
||||||
checkCudaErrors(cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
|
|
||||||
checkCudaErrors(cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
|
|
||||||
|
|
||||||
// Export the allocation to a platform-specific handle. The type of handle
|
|
||||||
// requested here must match the requestedHandleTypes field in the prop
|
|
||||||
// structure passed to cuMemCreate. The handle obtained here will be passed to vulkan
|
|
||||||
// to import the allocation.
|
|
||||||
checkCudaErrors(cuMemExportToShareableHandle((void *)&m_posShareableHandle, cudaPositionHandle, ipcHandleTypeFlag, 0));
|
|
||||||
checkCudaErrors(cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle, cudaInCircleHandle, ipcHandleTypeFlag, 0));
|
|
||||||
|
|
||||||
CUdeviceptr va_position = d_ptr;
|
|
||||||
CUdeviceptr va_InCircle = va_position + xyPositionSize;
|
|
||||||
m_pointsInsideCircle = (float *)va_InCircle;
|
|
||||||
m_xyVector = (vec2 *)va_position;
|
|
||||||
|
|
||||||
// Assign the chunk to the appropriate VA range
|
|
||||||
checkCudaErrors(cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
|
|
||||||
checkCudaErrors(cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
|
|
||||||
|
|
||||||
// Release the handles for the allocation. Since the allocation is currently mapped to a VA range
|
|
||||||
// with a previous call to cuMemMap the actual freeing of memory allocation will happen on an eventual call to
|
|
||||||
// cuMemUnmap. Thus the allocation will be kept live until it is unmapped.
|
|
||||||
checkCudaErrors(cuMemRelease(cudaPositionHandle));
|
|
||||||
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
|
|
||||||
|
|
||||||
CUmemAccessDesc accessDescriptor = {};
|
|
||||||
accessDescriptor.location.id = m_cudaDevice;
|
|
||||||
accessDescriptor.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
|
||||||
accessDescriptor.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
|
|
||||||
|
|
||||||
// Apply the access descriptor to the whole VA range. Essentially enables Read-Write access to the range.
|
|
||||||
checkCudaErrors(cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
void MonteCarloPiSimulation::cleanupSimulationAllocations()
|
|
||||||
{
|
|
||||||
if (m_xyVector && m_pointsInsideCircle) {
|
|
||||||
// Unmap the mapped virtual memory region
|
|
||||||
// Since the handles to the mapped backing stores have already been released
|
|
||||||
// by cuMemRelease, and these are the only/last mappings referencing them,
|
|
||||||
// The backing stores will be freed.
|
|
||||||
checkCudaErrors(cuMemUnmap((CUdeviceptr)m_xyVector, m_totalAllocationSize));
|
|
||||||
|
|
||||||
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
|
|
||||||
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
|
|
||||||
|
|
||||||
// Free the virtual address region.
|
|
||||||
checkCudaErrors(cuMemAddressFree((CUdeviceptr)m_xyVector, m_totalAllocationSize));
|
|
||||||
|
|
||||||
m_xyVector = nullptr;
|
|
||||||
m_pointsInsideCircle = nullptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,62 +39,57 @@
|
||||||
|
|
||||||
typedef float vec2[2];
|
typedef float vec2[2];
|
||||||
|
|
||||||
class MonteCarloPiSimulation
|
class MonteCarloPiSimulation {
|
||||||
{
|
size_t m_numPoints;
|
||||||
size_t m_numPoints;
|
|
||||||
|
|
||||||
// Pointers to Cuda allocated buffers which are imported and used by vulkan as vertex buffer
|
// Pointers to Cuda allocated buffers which are imported and used by vulkan as
|
||||||
vec2 *m_xyVector;
|
// vertex buffer
|
||||||
float *m_pointsInsideCircle;
|
vec2 *m_xyVector;
|
||||||
|
float *m_pointsInsideCircle;
|
||||||
|
|
||||||
// Pointers to device and host allocated memories storing number of points that are inside the unit circle
|
// Pointers to device and host allocated memories storing number of points
|
||||||
float *m_numPointsInCircle;
|
// that are inside the unit circle
|
||||||
float *m_hostNumPointsInCircle;
|
float *m_numPointsInCircle;
|
||||||
|
float *m_hostNumPointsInCircle;
|
||||||
|
|
||||||
int m_blocks, m_threads;
|
int m_blocks, m_threads;
|
||||||
|
|
||||||
// Total size of allocations created by cuMemMap Apis. This size is the sum of sizes of
|
// Total size of allocations created by cuMemMap Apis. This size is the sum of
|
||||||
// m_xyVector and m_pointsInsideCircle buffers.
|
// sizes of m_xyVector and m_pointsInsideCircle buffers.
|
||||||
size_t m_totalAllocationSize;
|
size_t m_totalAllocationSize;
|
||||||
|
|
||||||
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows), used for sharing cuda
|
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
|
||||||
// allocated memory with Vulkan
|
// used for sharing cuda
|
||||||
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle;
|
// allocated memory with Vulkan
|
||||||
|
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle;
|
||||||
|
|
||||||
// Cuda Device corresponding to the Vulkan Physical device
|
// Cuda Device corresponding to the Vulkan Physical device
|
||||||
int m_cudaDevice;
|
int m_cudaDevice;
|
||||||
|
|
||||||
// Track and accumulate total points that have been simulated since start of the sample.
|
// Track and accumulate total points that have been simulated since start of
|
||||||
// The idea is to get a closer approximation to PI with time.
|
// the sample. The idea is to get a closer approximation to PI with time.
|
||||||
size_t m_totalPointsInsideCircle;
|
size_t m_totalPointsInsideCircle;
|
||||||
size_t m_totalPointsSimulated;
|
size_t m_totalPointsSimulated;
|
||||||
|
|
||||||
void setupSimulationAllocations();
|
void setupSimulationAllocations();
|
||||||
void cleanupSimulationAllocations();
|
void cleanupSimulationAllocations();
|
||||||
void getIdealExecutionConfiguration();
|
void getIdealExecutionConfiguration();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MonteCarloPiSimulation(size_t num_points);
|
MonteCarloPiSimulation(size_t num_points);
|
||||||
~MonteCarloPiSimulation();
|
~MonteCarloPiSimulation();
|
||||||
void initSimulation(int cudaDevice, cudaStream_t stream = 0);
|
void initSimulation(int cudaDevice, cudaStream_t stream = 0);
|
||||||
void stepSimulation(float time, cudaStream_t stream = 0);
|
void stepSimulation(float time, cudaStream_t stream = 0);
|
||||||
static void computePiCallback(void *args);
|
static void computePiCallback(void *args);
|
||||||
|
|
||||||
size_t getNumPoints() const {
|
size_t getNumPoints() const { return m_numPoints; }
|
||||||
return m_numPoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
float getNumPointsInCircle() const {
|
float getNumPointsInCircle() const { return *m_hostNumPointsInCircle; }
|
||||||
return *m_hostNumPointsInCircle;
|
|
||||||
}
|
|
||||||
|
|
||||||
ShareableHandle &getPositionShareableHandle() {
|
|
||||||
return m_posShareableHandle;
|
|
||||||
}
|
|
||||||
ShareableHandle &getInCircleShareableHandle() {
|
|
||||||
return m_inCircleShareableHandle;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
ShareableHandle &getPositionShareableHandle() { return m_posShareableHandle; }
|
||||||
|
ShareableHandle &getInCircleShareableHandle() {
|
||||||
|
return m_inCircleShareableHandle;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __PISIM_H__
|
#endif // __PISIM_H__
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,101 +40,119 @@
|
||||||
|
|
||||||
struct GLFWwindow;
|
struct GLFWwindow;
|
||||||
|
|
||||||
class VulkanBaseApp
|
class VulkanBaseApp {
|
||||||
{
|
public:
|
||||||
public:
|
VulkanBaseApp(const std::string& appName, bool enableValidation = false);
|
||||||
VulkanBaseApp(const std::string& appName, bool enableValidation = false);
|
static VkExternalSemaphoreHandleTypeFlagBits getDefaultSemaphoreHandleType();
|
||||||
static VkExternalSemaphoreHandleTypeFlagBits getDefaultSemaphoreHandleType();
|
static VkExternalMemoryHandleTypeFlagBits getDefaultMemHandleType();
|
||||||
static VkExternalMemoryHandleTypeFlagBits getDefaultMemHandleType();
|
virtual ~VulkanBaseApp();
|
||||||
virtual ~VulkanBaseApp();
|
void init();
|
||||||
void init();
|
void* getMemHandle(VkDeviceMemory memory,
|
||||||
void *getMemHandle(VkDeviceMemory memory, VkExternalMemoryHandleTypeFlagBits handleType);
|
VkExternalMemoryHandleTypeFlagBits handleType);
|
||||||
void *getSemaphoreHandle(VkSemaphore semaphore, VkExternalSemaphoreHandleTypeFlagBits handleType);
|
void* getSemaphoreHandle(VkSemaphore semaphore,
|
||||||
bool isVkPhysicalDeviceUuid(void *Uuid);
|
VkExternalSemaphoreHandleTypeFlagBits handleType);
|
||||||
void createExternalSemaphore(VkSemaphore& semaphore, VkExternalSemaphoreHandleTypeFlagBits handleType);
|
bool isVkPhysicalDeviceUuid(void* Uuid);
|
||||||
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory);
|
void createExternalSemaphore(
|
||||||
void createExternalBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkExternalMemoryHandleTypeFlagsKHR extMemHandleType, VkBuffer& buffer, VkDeviceMemory& bufferMemory);
|
VkSemaphore& semaphore, VkExternalSemaphoreHandleTypeFlagBits handleType);
|
||||||
void importExternalBuffer(void *handle, VkExternalMemoryHandleTypeFlagBits handleType, size_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& memory);
|
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
|
||||||
void copyBuffer(VkBuffer dst, VkBuffer src, VkDeviceSize size);
|
VkMemoryPropertyFlags properties, VkBuffer& buffer,
|
||||||
VkCommandBuffer beginSingleTimeCommands();
|
VkDeviceMemory& bufferMemory);
|
||||||
void endSingleTimeCommands(VkCommandBuffer commandBuffer);
|
void createExternalBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
|
||||||
void mainLoop();
|
VkMemoryPropertyFlags properties,
|
||||||
protected:
|
VkExternalMemoryHandleTypeFlagsKHR extMemHandleType,
|
||||||
const std::string m_appName;
|
VkBuffer& buffer, VkDeviceMemory& bufferMemory);
|
||||||
const bool m_enableValidation;
|
void importExternalBuffer(void* handle,
|
||||||
VkInstance m_instance;
|
VkExternalMemoryHandleTypeFlagBits handleType,
|
||||||
VkDebugUtilsMessengerEXT m_debugMessenger;
|
size_t size, VkBufferUsageFlags usage,
|
||||||
VkSurfaceKHR m_surface;
|
VkMemoryPropertyFlags properties, VkBuffer& buffer,
|
||||||
VkPhysicalDevice m_physicalDevice;
|
VkDeviceMemory& memory);
|
||||||
uint8_t m_deviceUUID[VK_UUID_SIZE];
|
void copyBuffer(VkBuffer dst, VkBuffer src, VkDeviceSize size);
|
||||||
VkDevice m_device;
|
VkCommandBuffer beginSingleTimeCommands();
|
||||||
VkQueue m_graphicsQueue;
|
void endSingleTimeCommands(VkCommandBuffer commandBuffer);
|
||||||
VkQueue m_presentQueue;
|
void mainLoop();
|
||||||
VkSwapchainKHR m_swapChain;
|
|
||||||
std::vector<VkImage> m_swapChainImages;
|
|
||||||
VkFormat m_swapChainFormat;
|
|
||||||
VkExtent2D m_swapChainExtent;
|
|
||||||
std::vector<VkImageView> m_swapChainImageViews;
|
|
||||||
std::vector<std::pair<VkShaderStageFlagBits, std::string> > m_shaderFiles;
|
|
||||||
VkRenderPass m_renderPass;
|
|
||||||
VkPipelineLayout m_pipelineLayout;
|
|
||||||
VkPipeline m_graphicsPipeline;
|
|
||||||
std::vector<VkFramebuffer> m_swapChainFramebuffers;
|
|
||||||
VkCommandPool m_commandPool;
|
|
||||||
std::vector<VkCommandBuffer> m_commandBuffers;
|
|
||||||
std::vector<VkSemaphore> m_imageAvailableSemaphores;
|
|
||||||
std::vector<VkSemaphore> m_renderFinishedSemaphores;
|
|
||||||
std::vector<VkFence> m_inFlightFences;
|
|
||||||
std::vector<VkBuffer> m_uniformBuffers;
|
|
||||||
std::vector<VkDeviceMemory> m_uniformMemory;
|
|
||||||
VkDescriptorSetLayout m_descriptorSetLayout;
|
|
||||||
VkDescriptorPool m_descriptorPool;
|
|
||||||
std::vector<VkDescriptorSet> m_descriptorSets;
|
|
||||||
|
|
||||||
VkImage m_depthImage;
|
protected:
|
||||||
VkDeviceMemory m_depthImageMemory;
|
const std::string m_appName;
|
||||||
VkImageView m_depthImageView;
|
const bool m_enableValidation;
|
||||||
size_t m_currentFrame;
|
VkInstance m_instance;
|
||||||
bool m_framebufferResized;
|
VkDebugUtilsMessengerEXT m_debugMessenger;
|
||||||
|
VkSurfaceKHR m_surface;
|
||||||
|
VkPhysicalDevice m_physicalDevice;
|
||||||
|
uint8_t m_deviceUUID[VK_UUID_SIZE];
|
||||||
|
VkDevice m_device;
|
||||||
|
VkQueue m_graphicsQueue;
|
||||||
|
VkQueue m_presentQueue;
|
||||||
|
VkSwapchainKHR m_swapChain;
|
||||||
|
std::vector<VkImage> m_swapChainImages;
|
||||||
|
VkFormat m_swapChainFormat;
|
||||||
|
VkExtent2D m_swapChainExtent;
|
||||||
|
std::vector<VkImageView> m_swapChainImageViews;
|
||||||
|
std::vector<std::pair<VkShaderStageFlagBits, std::string> > m_shaderFiles;
|
||||||
|
VkRenderPass m_renderPass;
|
||||||
|
VkPipelineLayout m_pipelineLayout;
|
||||||
|
VkPipeline m_graphicsPipeline;
|
||||||
|
std::vector<VkFramebuffer> m_swapChainFramebuffers;
|
||||||
|
VkCommandPool m_commandPool;
|
||||||
|
std::vector<VkCommandBuffer> m_commandBuffers;
|
||||||
|
std::vector<VkSemaphore> m_imageAvailableSemaphores;
|
||||||
|
std::vector<VkSemaphore> m_renderFinishedSemaphores;
|
||||||
|
std::vector<VkFence> m_inFlightFences;
|
||||||
|
std::vector<VkBuffer> m_uniformBuffers;
|
||||||
|
std::vector<VkDeviceMemory> m_uniformMemory;
|
||||||
|
VkDescriptorSetLayout m_descriptorSetLayout;
|
||||||
|
VkDescriptorPool m_descriptorPool;
|
||||||
|
std::vector<VkDescriptorSet> m_descriptorSets;
|
||||||
|
|
||||||
virtual void initVulkanApp() {}
|
VkImage m_depthImage;
|
||||||
virtual void fillRenderingCommandBuffer(VkCommandBuffer& buffer) {}
|
VkDeviceMemory m_depthImageMemory;
|
||||||
virtual std::vector<const char *> getRequiredExtensions() const;
|
VkImageView m_depthImageView;
|
||||||
virtual std::vector<const char *> getRequiredDeviceExtensions() const;
|
size_t m_currentFrame;
|
||||||
virtual void getVertexDescriptions(std::vector<VkVertexInputBindingDescription>& bindingDesc, std::vector<VkVertexInputAttributeDescription>& attribDesc);
|
bool m_framebufferResized;
|
||||||
virtual void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo& info);
|
|
||||||
virtual void getWaitFrameSemaphores(std::vector<VkSemaphore>& wait, std::vector< VkPipelineStageFlags>& waitStages) const;
|
|
||||||
virtual void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const;
|
|
||||||
virtual VkDeviceSize getUniformSize() const;
|
|
||||||
virtual void updateUniformBuffer(uint32_t imageIndex, size_t globalFrame);
|
|
||||||
virtual void drawFrame();
|
|
||||||
private:
|
|
||||||
GLFWwindow *m_window;
|
|
||||||
|
|
||||||
void initWindow();
|
virtual void initVulkanApp() {}
|
||||||
void initVulkan();
|
virtual void fillRenderingCommandBuffer(VkCommandBuffer& buffer) {}
|
||||||
void createInstance();
|
virtual std::vector<const char*> getRequiredExtensions() const;
|
||||||
void createSurface();
|
virtual std::vector<const char*> getRequiredDeviceExtensions() const;
|
||||||
void createDevice();
|
virtual void getVertexDescriptions(
|
||||||
void createSwapChain();
|
std::vector<VkVertexInputBindingDescription>& bindingDesc,
|
||||||
void createImageViews();
|
std::vector<VkVertexInputAttributeDescription>& attribDesc);
|
||||||
void createRenderPass();
|
virtual void getAssemblyStateInfo(
|
||||||
void createDescriptorSetLayout();
|
VkPipelineInputAssemblyStateCreateInfo& info);
|
||||||
void createGraphicsPipeline();
|
virtual void getWaitFrameSemaphores(
|
||||||
void createFramebuffers();
|
std::vector<VkSemaphore>& wait,
|
||||||
void createCommandPool();
|
std::vector<VkPipelineStageFlags>& waitStages) const;
|
||||||
void createDepthResources();
|
virtual void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const;
|
||||||
void createUniformBuffers();
|
virtual VkDeviceSize getUniformSize() const;
|
||||||
void createDescriptorPool();
|
virtual void updateUniformBuffer(uint32_t imageIndex, size_t globalFrame);
|
||||||
void createDescriptorSets();
|
virtual void drawFrame();
|
||||||
void createCommandBuffers();
|
|
||||||
void createSyncObjects();
|
|
||||||
|
|
||||||
void cleanupSwapChain();
|
private:
|
||||||
void recreateSwapChain();
|
GLFWwindow* m_window;
|
||||||
|
|
||||||
bool isSuitableDevice(VkPhysicalDevice dev) const;
|
void initWindow();
|
||||||
static void resizeCallback(GLFWwindow *window, int width, int height);
|
void initVulkan();
|
||||||
|
void createInstance();
|
||||||
|
void createSurface();
|
||||||
|
void createDevice();
|
||||||
|
void createSwapChain();
|
||||||
|
void createImageViews();
|
||||||
|
void createRenderPass();
|
||||||
|
void createDescriptorSetLayout();
|
||||||
|
void createGraphicsPipeline();
|
||||||
|
void createFramebuffers();
|
||||||
|
void createCommandPool();
|
||||||
|
void createDepthResources();
|
||||||
|
void createUniformBuffers();
|
||||||
|
void createDescriptorPool();
|
||||||
|
void createDescriptorSets();
|
||||||
|
void createCommandBuffers();
|
||||||
|
void createSyncObjects();
|
||||||
|
|
||||||
|
void cleanupSwapChain();
|
||||||
|
void recreateSwapChain();
|
||||||
|
|
||||||
|
bool isSuitableDevice(VkPhysicalDevice dev) const;
|
||||||
|
static void resizeCallback(GLFWwindow* window, int width, int height);
|
||||||
};
|
};
|
||||||
|
|
||||||
void readFile(std::istream& s, std::vector<char>& data);
|
void readFile(std::istream& s, std::vector<char>& data);
|
||||||
|
|
|
@ -35,41 +35,48 @@
|
||||||
#include <helper_cuda.h>
|
#include <helper_cuda.h>
|
||||||
|
|
||||||
bool isDeviceCompatible(void *Uuid, size_t size) {
|
bool isDeviceCompatible(void *Uuid, size_t size) {
|
||||||
|
int cudaDevice = cudaInvalidDeviceId;
|
||||||
|
int deviceCount;
|
||||||
|
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
|
||||||
|
|
||||||
int cudaDevice = cudaInvalidDeviceId;
|
for (int i = 0; i < deviceCount; ++i) {
|
||||||
int deviceCount;
|
cudaDeviceProp devProp = {};
|
||||||
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
|
checkCudaErrors(cudaGetDeviceProperties(&devProp, i));
|
||||||
|
if (!memcmp(&devProp.uuid, Uuid, size)) {
|
||||||
for (int i = 0; i < deviceCount; ++i) {
|
cudaDevice = i;
|
||||||
cudaDeviceProp devProp = { };
|
break;
|
||||||
checkCudaErrors(cudaGetDeviceProperties(&devProp, i));
|
|
||||||
if (!memcmp(&devProp.uuid, Uuid, size)) {
|
|
||||||
cudaDevice = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cudaDevice == cudaInvalidDeviceId) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (cudaDevice == cudaInvalidDeviceId) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
int deviceSupportsHandle = 0;
|
int deviceSupportsHandle = 0;
|
||||||
int attributeVal = 0;
|
int attributeVal = 0;
|
||||||
int deviceComputeMode = 0;
|
int deviceComputeMode = 0;
|
||||||
|
|
||||||
checkCudaErrors(cuDeviceGetAttribute(&deviceComputeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, cudaDevice));
|
checkCudaErrors(cuDeviceGetAttribute(
|
||||||
checkCudaErrors(cuDeviceGetAttribute(&attributeVal, CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED, cudaDevice));
|
&deviceComputeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, cudaDevice));
|
||||||
|
checkCudaErrors(cuDeviceGetAttribute(
|
||||||
|
&attributeVal, CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
|
||||||
|
cudaDevice));
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
checkCudaErrors(cuDeviceGetAttribute(&deviceSupportsHandle, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, cudaDevice));
|
checkCudaErrors(cuDeviceGetAttribute(
|
||||||
|
&deviceSupportsHandle,
|
||||||
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED,
|
||||||
|
cudaDevice));
|
||||||
#else
|
#else
|
||||||
checkCudaErrors(cuDeviceGetAttribute(&deviceSupportsHandle, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, cudaDevice));
|
checkCudaErrors(cuDeviceGetAttribute(
|
||||||
|
&deviceSupportsHandle,
|
||||||
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, cudaDevice));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ((deviceComputeMode != CU_COMPUTEMODE_DEFAULT) || !attributeVal || !deviceSupportsHandle) {
|
if ((deviceComputeMode != CU_COMPUTEMODE_DEFAULT) || !attributeVal ||
|
||||||
return false;
|
!deviceSupportsHandle) {
|
||||||
}
|
return false;
|
||||||
return true;
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __VKCUDA_H__
|
#endif // __VKCUDA_H__
|
||||||
|
|
||||||
|
|
BIN
Samples/simpleVulkanMMAP/frag.spv
Normal file
BIN
Samples/simpleVulkanMMAP/frag.spv
Normal file
Binary file not shown.
|
@ -25,11 +25,12 @@
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This sample demonstrates CUDA Interop with Vulkan using cuMemMap APIs.
|
* This sample demonstrates CUDA Interop with Vulkan using cuMemMap APIs.
|
||||||
* Allocating device memory and updating values in those allocations are performed by CUDA
|
* Allocating device memory and updating values in those allocations are
|
||||||
* and the contents of the allocation are visualized by Vulkan.
|
* performed by CUDA and the contents of the allocation are visualized by
|
||||||
*/
|
* Vulkan.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "VulkanBaseApp.h"
|
#include "VulkanBaseApp.h"
|
||||||
|
|
||||||
|
@ -55,25 +56,23 @@
|
||||||
|
|
||||||
std::string execution_path;
|
std::string execution_path;
|
||||||
|
|
||||||
class VulkanCudaPi : public VulkanBaseApp
|
class VulkanCudaPi : public VulkanBaseApp {
|
||||||
{
|
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
|
||||||
typedef struct UniformBufferObject_st {
|
|
||||||
float frame;
|
|
||||||
} UniformBufferObject;
|
|
||||||
|
|
||||||
VkBuffer m_inCircleBuffer, m_xyPositionBuffer;
|
VkBuffer m_inCircleBuffer, m_xyPositionBuffer;
|
||||||
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory;
|
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory;
|
||||||
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
|
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
|
||||||
MonteCarloPiSimulation m_sim;
|
MonteCarloPiSimulation m_sim;
|
||||||
UniformBufferObject m_ubo;
|
UniformBufferObject m_ubo;
|
||||||
cudaStream_t m_stream;
|
cudaStream_t m_stream;
|
||||||
cudaExternalSemaphore_t m_cudaWaitSemaphore, m_cudaSignalSemaphore;
|
cudaExternalSemaphore_t m_cudaWaitSemaphore, m_cudaSignalSemaphore;
|
||||||
using chrono_tp = std::chrono::time_point<std::chrono::high_resolution_clock>;
|
using chrono_tp = std::chrono::time_point<std::chrono::high_resolution_clock>;
|
||||||
chrono_tp m_lastTime;
|
chrono_tp m_lastTime;
|
||||||
size_t m_lastFrame;
|
size_t m_lastFrame;
|
||||||
public:
|
|
||||||
VulkanCudaPi(size_t num_points) :
|
public:
|
||||||
VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
|
VulkanCudaPi(size_t num_points)
|
||||||
|
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
|
||||||
m_inCircleBuffer(VK_NULL_HANDLE),
|
m_inCircleBuffer(VK_NULL_HANDLE),
|
||||||
m_xyPositionBuffer(VK_NULL_HANDLE),
|
m_xyPositionBuffer(VK_NULL_HANDLE),
|
||||||
m_inCircleMemory(VK_NULL_HANDLE),
|
m_inCircleMemory(VK_NULL_HANDLE),
|
||||||
|
@ -86,232 +85,268 @@ public:
|
||||||
m_cudaWaitSemaphore(),
|
m_cudaWaitSemaphore(),
|
||||||
m_cudaSignalSemaphore(),
|
m_cudaSignalSemaphore(),
|
||||||
m_lastFrame(0) {
|
m_lastFrame(0) {
|
||||||
|
// Add our compiled vulkan shader files
|
||||||
|
char* vertex_shader_path =
|
||||||
|
sdkFindFilePath("vert.spv", execution_path.c_str());
|
||||||
|
char* fragment_shader_path =
|
||||||
|
sdkFindFilePath("frag.spv", execution_path.c_str());
|
||||||
|
m_shaderFiles.push_back(
|
||||||
|
std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path));
|
||||||
|
m_shaderFiles.push_back(
|
||||||
|
std::make_pair(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_path));
|
||||||
|
}
|
||||||
|
|
||||||
// Add our compiled vulkan shader files
|
~VulkanCudaPi() {
|
||||||
char* vertex_shader_path = sdkFindFilePath("montecarlo.vert", execution_path.c_str());
|
if (m_stream) {
|
||||||
char* fragment_shader_path = sdkFindFilePath("montecarlo.frag", execution_path.c_str());
|
// Make sure there's no pending work before we start tearing down
|
||||||
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path));
|
checkCudaErrors(cudaStreamSynchronize(m_stream));
|
||||||
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_path));
|
checkCudaErrors(cudaStreamDestroy(m_stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
~VulkanCudaPi() {
|
if (m_vkSignalSemaphore != VK_NULL_HANDLE) {
|
||||||
if (m_stream) {
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaSignalSemaphore));
|
||||||
// Make sure there's no pending work before we start tearing down
|
vkDestroySemaphore(m_device, m_vkSignalSemaphore, nullptr);
|
||||||
checkCudaErrors(cudaStreamSynchronize(m_stream));
|
}
|
||||||
checkCudaErrors(cudaStreamDestroy(m_stream));
|
if (m_vkWaitSemaphore != VK_NULL_HANDLE) {
|
||||||
}
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaWaitSemaphore));
|
||||||
|
vkDestroySemaphore(m_device, m_vkWaitSemaphore, nullptr);
|
||||||
|
}
|
||||||
|
if (m_xyPositionBuffer != VK_NULL_HANDLE) {
|
||||||
|
vkDestroyBuffer(m_device, m_xyPositionBuffer, nullptr);
|
||||||
|
}
|
||||||
|
if (m_xyPositionMemory != VK_NULL_HANDLE) {
|
||||||
|
vkFreeMemory(m_device, m_xyPositionMemory, nullptr);
|
||||||
|
}
|
||||||
|
if (m_inCircleBuffer != VK_NULL_HANDLE) {
|
||||||
|
vkDestroyBuffer(m_device, m_inCircleBuffer, nullptr);
|
||||||
|
}
|
||||||
|
if (m_inCircleMemory != VK_NULL_HANDLE) {
|
||||||
|
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m_vkSignalSemaphore != VK_NULL_HANDLE) {
|
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
|
||||||
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaSignalSemaphore));
|
VkBuffer vertexBuffers[] = {m_inCircleBuffer, m_xyPositionBuffer};
|
||||||
vkDestroySemaphore(m_device, m_vkSignalSemaphore, nullptr);
|
VkDeviceSize offsets[] = {0, 0};
|
||||||
}
|
vkCmdBindVertexBuffers(commandBuffer, 0,
|
||||||
if (m_vkWaitSemaphore != VK_NULL_HANDLE) {
|
sizeof(vertexBuffers) / sizeof(vertexBuffers[0]),
|
||||||
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaWaitSemaphore));
|
vertexBuffers, offsets);
|
||||||
vkDestroySemaphore(m_device, m_vkWaitSemaphore, nullptr);
|
vkCmdDraw(commandBuffer, (uint32_t)(m_sim.getNumPoints()), 1, 0, 0);
|
||||||
}
|
}
|
||||||
if (m_xyPositionBuffer != VK_NULL_HANDLE) {
|
|
||||||
vkDestroyBuffer(m_device, m_xyPositionBuffer, nullptr);
|
void getVertexDescriptions(
|
||||||
}
|
std::vector<VkVertexInputBindingDescription>& bindingDesc,
|
||||||
if (m_xyPositionMemory != VK_NULL_HANDLE) {
|
std::vector<VkVertexInputAttributeDescription>& attribDesc) {
|
||||||
vkFreeMemory(m_device, m_xyPositionMemory, nullptr);
|
bindingDesc.resize(2);
|
||||||
}
|
attribDesc.resize(2);
|
||||||
if (m_inCircleBuffer != VK_NULL_HANDLE) {
|
|
||||||
vkDestroyBuffer(m_device, m_inCircleBuffer, nullptr);
|
bindingDesc[0].binding = 0;
|
||||||
}
|
bindingDesc[0].stride = sizeof(float);
|
||||||
if (m_inCircleMemory != VK_NULL_HANDLE) {
|
bindingDesc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
||||||
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
|
|
||||||
}
|
bindingDesc[1].binding = 1;
|
||||||
|
bindingDesc[1].stride = sizeof(vec2);
|
||||||
|
bindingDesc[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
||||||
|
|
||||||
|
attribDesc[0].binding = 0;
|
||||||
|
attribDesc[0].location = 0;
|
||||||
|
attribDesc[0].format = VK_FORMAT_R32_SFLOAT;
|
||||||
|
attribDesc[0].offset = 0;
|
||||||
|
|
||||||
|
attribDesc[1].binding = 1;
|
||||||
|
attribDesc[1].location = 1;
|
||||||
|
attribDesc[1].format = VK_FORMAT_R32G32_SFLOAT;
|
||||||
|
attribDesc[1].offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo& info) {
|
||||||
|
info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||||
|
info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||||
|
info.primitiveRestartEnable = VK_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void getWaitFrameSemaphores(
|
||||||
|
std::vector<VkSemaphore>& wait,
|
||||||
|
std::vector<VkPipelineStageFlags>& waitStages) const {
|
||||||
|
if (m_currentFrame != 0) {
|
||||||
|
// Have vulkan wait until cuda is done with the vertex buffer before
|
||||||
|
// rendering
|
||||||
|
// We don't do this on the first frame, as the wait semaphore hasn't been
|
||||||
|
// initialized yet
|
||||||
|
wait.push_back(m_vkWaitSemaphore);
|
||||||
|
// We want to wait until all the pipeline commands are complete before
|
||||||
|
// letting cuda work
|
||||||
|
waitStages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const {
|
||||||
|
// Add this semaphore for vulkan to signal once the vertex buffer is ready
|
||||||
|
// for cuda to modify
|
||||||
|
signal.push_back(m_vkSignalSemaphore);
|
||||||
|
}
|
||||||
|
|
||||||
|
void initVulkanApp() {
|
||||||
|
const size_t nVerts = m_sim.getNumPoints();
|
||||||
|
|
||||||
|
// Obtain cuda device id for the device corresponding to the Vulkan physical
|
||||||
|
// device
|
||||||
|
int deviceCount;
|
||||||
|
int cudaDevice = cudaInvalidDeviceId;
|
||||||
|
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
|
||||||
|
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||||
|
cudaDeviceProp devProp = {};
|
||||||
|
checkCudaErrors(cudaGetDeviceProperties(&devProp, dev));
|
||||||
|
if (isVkPhysicalDeviceUuid(&devProp.uuid)) {
|
||||||
|
cudaDevice = dev;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cudaDevice == cudaInvalidDeviceId) {
|
||||||
|
throw std::runtime_error("No Suitable device found!");
|
||||||
}
|
}
|
||||||
|
|
||||||
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
|
// On the corresponding cuda device, create the cuda stream we'll using
|
||||||
VkBuffer vertexBuffers[] = { m_inCircleBuffer, m_xyPositionBuffer };
|
checkCudaErrors(cudaSetDevice(cudaDevice));
|
||||||
VkDeviceSize offsets[] = { 0, 0 };
|
checkCudaErrors(
|
||||||
vkCmdBindVertexBuffers(commandBuffer, 0, sizeof(vertexBuffers) / sizeof(vertexBuffers[0]), vertexBuffers, offsets);
|
cudaStreamCreateWithFlags(&m_stream, cudaStreamNonBlocking));
|
||||||
vkCmdDraw(commandBuffer, (uint32_t)(m_sim.getNumPoints()), 1, 0, 0);
|
m_sim.initSimulation(cudaDevice, m_stream);
|
||||||
|
|
||||||
|
importExternalBuffer(
|
||||||
|
(void*)(uintptr_t)m_sim.getPositionShareableHandle(),
|
||||||
|
getDefaultMemHandleType(), nVerts * sizeof(vec2),
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_xyPositionBuffer,
|
||||||
|
m_xyPositionMemory);
|
||||||
|
|
||||||
|
importExternalBuffer(
|
||||||
|
(void*)(uintptr_t)m_sim.getInCircleShareableHandle(),
|
||||||
|
getDefaultMemHandleType(), nVerts * sizeof(float),
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
|
||||||
|
m_inCircleMemory);
|
||||||
|
|
||||||
|
// Create the semaphore vulkan will signal when it's done with the vertex
|
||||||
|
// buffer
|
||||||
|
createExternalSemaphore(m_vkSignalSemaphore,
|
||||||
|
getDefaultSemaphoreHandleType());
|
||||||
|
// Create the semaphore vulkan will wait for before using the vertex buffer
|
||||||
|
createExternalSemaphore(m_vkWaitSemaphore, getDefaultSemaphoreHandleType());
|
||||||
|
// Import the semaphore cuda will use -- vulkan's signal will be cuda's wait
|
||||||
|
importCudaExternalSemaphore(m_cudaWaitSemaphore, m_vkSignalSemaphore,
|
||||||
|
getDefaultSemaphoreHandleType());
|
||||||
|
// Import the semaphore cuda will use -- cuda's signal will be vulkan's wait
|
||||||
|
importCudaExternalSemaphore(m_cudaSignalSemaphore, m_vkWaitSemaphore,
|
||||||
|
getDefaultSemaphoreHandleType());
|
||||||
|
}
|
||||||
|
|
||||||
|
void importCudaExternalSemaphore(
|
||||||
|
cudaExternalSemaphore_t& cudaSem, VkSemaphore& vkSem,
|
||||||
|
VkExternalSemaphoreHandleTypeFlagBits handleType) {
|
||||||
|
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc = {};
|
||||||
|
|
||||||
|
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) {
|
||||||
|
externalSemaphoreHandleDesc.type =
|
||||||
|
cudaExternalSemaphoreHandleTypeOpaqueWin32;
|
||||||
|
} else if (handleType &
|
||||||
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) {
|
||||||
|
externalSemaphoreHandleDesc.type =
|
||||||
|
cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
|
||||||
|
} else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
|
||||||
|
externalSemaphoreHandleDesc.type =
|
||||||
|
cudaExternalSemaphoreHandleTypeOpaqueFd;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("Unknown handle type requested!");
|
||||||
}
|
}
|
||||||
|
|
||||||
void getVertexDescriptions(std::vector<VkVertexInputBindingDescription>& bindingDesc, std::vector<VkVertexInputAttributeDescription>& attribDesc) {
|
|
||||||
bindingDesc.resize(2);
|
|
||||||
attribDesc.resize(2);
|
|
||||||
|
|
||||||
bindingDesc[0].binding = 0;
|
|
||||||
bindingDesc[0].stride = sizeof(float);
|
|
||||||
bindingDesc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
||||||
|
|
||||||
bindingDesc[1].binding = 1;
|
|
||||||
bindingDesc[1].stride = sizeof(vec2);
|
|
||||||
bindingDesc[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
||||||
|
|
||||||
attribDesc[0].binding = 0;
|
|
||||||
attribDesc[0].location = 0;
|
|
||||||
attribDesc[0].format = VK_FORMAT_R32_SFLOAT;
|
|
||||||
attribDesc[0].offset = 0;
|
|
||||||
|
|
||||||
attribDesc[1].binding = 1;
|
|
||||||
attribDesc[1].location = 1;
|
|
||||||
attribDesc[1].format = VK_FORMAT_R32G32_SFLOAT;
|
|
||||||
attribDesc[1].offset = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo& info) {
|
|
||||||
info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
|
||||||
info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
||||||
info.primitiveRestartEnable = VK_FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
void getWaitFrameSemaphores(std::vector<VkSemaphore>& wait, std::vector< VkPipelineStageFlags>& waitStages) const {
|
|
||||||
if (m_currentFrame != 0) {
|
|
||||||
// Have vulkan wait until cuda is done with the vertex buffer before rendering
|
|
||||||
// We don't do this on the first frame, as the wait semaphore hasn't been initialized yet
|
|
||||||
wait.push_back(m_vkWaitSemaphore);
|
|
||||||
// We want to wait until all the pipeline commands are complete before letting cuda work
|
|
||||||
waitStages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void getSignalFrameSemaphores(std::vector<VkSemaphore>& signal) const {
|
|
||||||
// Add this semaphore for vulkan to signal once the vertex buffer is ready for cuda to modify
|
|
||||||
signal.push_back(m_vkSignalSemaphore);
|
|
||||||
}
|
|
||||||
|
|
||||||
void initVulkanApp() {
|
|
||||||
const size_t nVerts = m_sim.getNumPoints();
|
|
||||||
|
|
||||||
// Obtain cuda device id for the device corresponding to the Vulkan physical device
|
|
||||||
int deviceCount;
|
|
||||||
int cudaDevice = cudaInvalidDeviceId;
|
|
||||||
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
|
|
||||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
|
||||||
cudaDeviceProp devProp = { };
|
|
||||||
checkCudaErrors(cudaGetDeviceProperties(&devProp, dev));
|
|
||||||
if (isVkPhysicalDeviceUuid(&devProp.uuid)) {
|
|
||||||
cudaDevice = dev;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cudaDevice == cudaInvalidDeviceId) {
|
|
||||||
throw std::runtime_error("No Suitable device found!");
|
|
||||||
}
|
|
||||||
|
|
||||||
// On the corresponding cuda device, create the cuda stream we'll using
|
|
||||||
checkCudaErrors(cudaSetDevice(cudaDevice));
|
|
||||||
checkCudaErrors(cudaStreamCreateWithFlags(&m_stream, cudaStreamNonBlocking));
|
|
||||||
m_sim.initSimulation(cudaDevice, m_stream);
|
|
||||||
|
|
||||||
importExternalBuffer((void *)(uintptr_t)m_sim.getPositionShareableHandle(), getDefaultMemHandleType(), nVerts * sizeof(vec2),
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_xyPositionBuffer, m_xyPositionMemory);
|
|
||||||
|
|
||||||
importExternalBuffer((void *)(uintptr_t)m_sim.getInCircleShareableHandle(), getDefaultMemHandleType(), nVerts * sizeof(float),
|
|
||||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer, m_inCircleMemory);
|
|
||||||
|
|
||||||
// Create the semaphore vulkan will signal when it's done with the vertex buffer
|
|
||||||
createExternalSemaphore(m_vkSignalSemaphore, getDefaultSemaphoreHandleType());
|
|
||||||
// Create the semaphore vulkan will wait for before using the vertex buffer
|
|
||||||
createExternalSemaphore(m_vkWaitSemaphore, getDefaultSemaphoreHandleType());
|
|
||||||
// Import the semaphore cuda will use -- vulkan's signal will be cuda's wait
|
|
||||||
importCudaExternalSemaphore(m_cudaWaitSemaphore, m_vkSignalSemaphore, getDefaultSemaphoreHandleType());
|
|
||||||
// Import the semaphore cuda will use -- cuda's signal will be vulkan's wait
|
|
||||||
importCudaExternalSemaphore(m_cudaSignalSemaphore, m_vkWaitSemaphore, getDefaultSemaphoreHandleType());
|
|
||||||
}
|
|
||||||
|
|
||||||
void importCudaExternalSemaphore(cudaExternalSemaphore_t& cudaSem, VkSemaphore& vkSem, VkExternalSemaphoreHandleTypeFlagBits handleType) {
|
|
||||||
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc = {};
|
|
||||||
|
|
||||||
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) {
|
|
||||||
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueWin32;
|
|
||||||
}
|
|
||||||
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) {
|
|
||||||
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
|
|
||||||
}
|
|
||||||
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
|
|
||||||
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw std::runtime_error("Unknown handle type requested!");
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
externalSemaphoreHandleDesc.handle.win32.handle = (HANDLE)getSemaphoreHandle(vkSem, handleType);
|
externalSemaphoreHandleDesc.handle.win32.handle =
|
||||||
|
(HANDLE)getSemaphoreHandle(vkSem, handleType);
|
||||||
#else
|
#else
|
||||||
externalSemaphoreHandleDesc.handle.fd = (int)(uintptr_t)getSemaphoreHandle(vkSem, handleType);
|
externalSemaphoreHandleDesc.handle.fd =
|
||||||
|
(int)(uintptr_t)getSemaphoreHandle(vkSem, handleType);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
externalSemaphoreHandleDesc.flags = 0;
|
externalSemaphoreHandleDesc.flags = 0;
|
||||||
|
|
||||||
checkCudaErrors(cudaImportExternalSemaphore(&cudaSem, &externalSemaphoreHandleDesc));
|
checkCudaErrors(
|
||||||
}
|
cudaImportExternalSemaphore(&cudaSem, &externalSemaphoreHandleDesc));
|
||||||
|
}
|
||||||
|
|
||||||
VkDeviceSize getUniformSize() const {
|
VkDeviceSize getUniformSize() const { return sizeof(UniformBufferObject); }
|
||||||
return sizeof(UniformBufferObject);
|
|
||||||
}
|
|
||||||
|
|
||||||
void updateUniformBuffer(uint32_t imageIndex, size_t globalFrame) {
|
void updateUniformBuffer(uint32_t imageIndex, size_t globalFrame) {
|
||||||
m_ubo.frame = (float)globalFrame;
|
m_ubo.frame = (float)globalFrame;
|
||||||
void *data;
|
void* data;
|
||||||
vkMapMemory(m_device, m_uniformMemory[imageIndex], 0, getUniformSize(), 0, &data);
|
vkMapMemory(m_device, m_uniformMemory[imageIndex], 0, getUniformSize(), 0,
|
||||||
memcpy(data, &m_ubo, sizeof(m_ubo));
|
&data);
|
||||||
vkUnmapMemory(m_device, m_uniformMemory[imageIndex]);
|
memcpy(data, &m_ubo, sizeof(m_ubo));
|
||||||
}
|
vkUnmapMemory(m_device, m_uniformMemory[imageIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<const char *> getRequiredExtensions() const {
|
std::vector<const char*> getRequiredExtensions() const {
|
||||||
std::vector<const char *> extensions;
|
std::vector<const char*> extensions;
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
|
||||||
return extensions;
|
extensions.push_back(VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME);
|
||||||
}
|
extensions.push_back(
|
||||||
|
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
|
||||||
|
return extensions;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<const char *> getRequiredDeviceExtensions() const {
|
std::vector<const char*> getRequiredDeviceExtensions() const {
|
||||||
std::vector<const char *> extensions;
|
std::vector<const char*> extensions;
|
||||||
|
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME);
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME);
|
||||||
#else
|
#else
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME);
|
||||||
#endif /* _WIN64 */
|
#endif /* _WIN64 */
|
||||||
return extensions;
|
return extensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
void drawFrame() {
|
||||||
|
static chrono_tp startTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
chrono_tp currentTime = std::chrono::high_resolution_clock::now();
|
||||||
|
float time = std::chrono::duration<float, std::chrono::seconds::period>(
|
||||||
|
currentTime - startTime)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
if (m_currentFrame == 0) {
|
||||||
|
m_lastTime = startTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
void drawFrame() {
|
cudaExternalSemaphoreWaitParams waitParams = {};
|
||||||
static chrono_tp startTime = std::chrono::high_resolution_clock::now();
|
waitParams.flags = 0;
|
||||||
|
waitParams.params.fence.value = 0;
|
||||||
|
|
||||||
chrono_tp currentTime = std::chrono::high_resolution_clock::now();
|
cudaExternalSemaphoreSignalParams signalParams = {};
|
||||||
float time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - startTime).count();
|
signalParams.flags = 0;
|
||||||
|
signalParams.params.fence.value = 0;
|
||||||
|
|
||||||
if (m_currentFrame == 0) {
|
// Have vulkan draw the current frame...
|
||||||
m_lastTime = startTime;
|
VulkanBaseApp::drawFrame();
|
||||||
}
|
// Wait for vulkan to complete it's work
|
||||||
|
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_cudaWaitSemaphore,
|
||||||
|
&waitParams, 1, m_stream));
|
||||||
|
// Now step the simulation
|
||||||
|
m_sim.stepSimulation(time, m_stream);
|
||||||
|
|
||||||
cudaExternalSemaphoreWaitParams waitParams = {};
|
// Signal vulkan to continue with the updated buffers
|
||||||
waitParams.flags = 0;
|
checkCudaErrors(cudaSignalExternalSemaphoresAsync(
|
||||||
waitParams.params.fence.value = 0;
|
&m_cudaSignalSemaphore, &signalParams, 1, m_stream));
|
||||||
|
}
|
||||||
cudaExternalSemaphoreSignalParams signalParams = {};
|
|
||||||
signalParams.flags = 0;
|
|
||||||
signalParams.params.fence.value = 0;
|
|
||||||
|
|
||||||
// Have vulkan draw the current frame...
|
|
||||||
VulkanBaseApp::drawFrame();
|
|
||||||
// Wait for vulkan to complete it's work
|
|
||||||
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_cudaWaitSemaphore, &waitParams, 1, m_stream));
|
|
||||||
// Now step the simulation
|
|
||||||
m_sim.stepSimulation(time, m_stream);
|
|
||||||
|
|
||||||
// Signal vulkan to continue with the updated buffers
|
|
||||||
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_cudaSignalSemaphore, &signalParams, 1, m_stream));
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char** argv) {
|
||||||
{
|
execution_path = argv[0];
|
||||||
execution_path = argv[0];
|
VulkanCudaPi app(NUM_SIMULATION_POINTS);
|
||||||
VulkanCudaPi app(NUM_SIMULATION_POINTS);
|
app.init();
|
||||||
app.init();
|
app.mainLoop();
|
||||||
app.mainLoop();
|
return 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
BIN
Samples/simpleVulkanMMAP/vert.spv
Normal file
BIN
Samples/simpleVulkanMMAP/vert.spv
Normal file
Binary file not shown.
|
@ -19,8 +19,17 @@ For Linux:
|
||||||
-- Install "libxcb1-dev" and "xorg-dev" as GLFW3 is depended on it
|
-- Install "libxcb1-dev" and "xorg-dev" as GLFW3 is depended on it
|
||||||
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
|
||||||
For Linux aarch64(L4T):
|
For Linux aarch64(L4T):
|
||||||
-- Install GLFW3 library using "sudo apt-get install libglfw3-dev" this will provide glfw3
|
-- Install GLFW3 library using "sudo apt-get install libglfw3-dev" this will provide glfw3
|
||||||
-- install above will also provide libvulkan-dev as dependencies
|
-- install above will also provide libvulkan-dev as dependencies
|
||||||
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
-- Add Vulkan and GLFW3 libraries directories to LD_LIBRARY_PATH
|
||||||
-- Pass path to vulkan sdk while building 'make VULKAN_SDK_PATH=<PATH_TO_VULKAN_SDK>', VULKAN_SDK_PATH in this scenario is typically "/usr"
|
-- Pass path to vulkan sdk while building 'make VULKAN_SDK_PATH=<PATH_TO_VULKAN_SDK>', VULKAN_SDK_PATH in this scenario is typically "/usr"
|
||||||
|
|
||||||
|
|
||||||
|
For Shader changes:
|
||||||
|
-- Update the shader.vert and/or shader.frag shader source file as required
|
||||||
|
-- Use the glslc shader compiler from the installed Vulkan SDK's bin directory to compile shaders as:
|
||||||
|
glslc shader.vert -o vert.spv
|
||||||
|
glslc shader.frag -o frag.spv
|
||||||
|
** Make sure to add glslc's path in your PATH environment variable **
|
||||||
|
|
BIN
Samples/vulkanImageCUDA/frag.spv
Normal file
BIN
Samples/vulkanImageCUDA/frag.spv
Normal file
Binary file not shown.
BIN
Samples/vulkanImageCUDA/vert.spv
Normal file
BIN
Samples/vulkanImageCUDA/vert.spv
Normal file
Binary file not shown.
|
@ -69,7 +69,7 @@ const std::vector<const char*> validationLayers = {
|
||||||
"VK_LAYER_KHRONOS_validation"};
|
"VK_LAYER_KHRONOS_validation"};
|
||||||
|
|
||||||
#ifdef NDEBUG
|
#ifdef NDEBUG
|
||||||
const bool enableValidationLayers = false;
|
const bool enableValidationLayers = true;
|
||||||
#else
|
#else
|
||||||
const bool enableValidationLayers = false;
|
const bool enableValidationLayers = false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -494,7 +494,7 @@ class vulkanImageCUDA {
|
||||||
|
|
||||||
unsigned int* image_data = NULL;
|
unsigned int* image_data = NULL;
|
||||||
unsigned int imageWidth, imageHeight;
|
unsigned int imageWidth, imageHeight;
|
||||||
unsigned int mipLevels;
|
unsigned int mipLevels = 1;
|
||||||
size_t totalImageMemSize;
|
size_t totalImageMemSize;
|
||||||
|
|
||||||
// CUDA objects
|
// CUDA objects
|
||||||
|
@ -630,6 +630,9 @@ class vulkanImageCUDA {
|
||||||
vkDestroyBuffer(device, vertexBuffer, nullptr);
|
vkDestroyBuffer(device, vertexBuffer, nullptr);
|
||||||
vkFreeMemory(device, vertexBufferMemory, nullptr);
|
vkFreeMemory(device, vertexBufferMemory, nullptr);
|
||||||
|
|
||||||
|
vkDestroySemaphore(device, cudaUpdateVkSemaphore, nullptr);
|
||||||
|
vkDestroySemaphore(device, vkUpdateCudaSemaphore, nullptr);
|
||||||
|
|
||||||
for (size_t i = 0; i < MAX_FRAMES; i++) {
|
for (size_t i = 0; i < MAX_FRAMES; i++) {
|
||||||
vkDestroySemaphore(device, renderFinishedSemaphores[i], nullptr);
|
vkDestroySemaphore(device, renderFinishedSemaphores[i], nullptr);
|
||||||
vkDestroySemaphore(device, imageAvailableSemaphores[i], nullptr);
|
vkDestroySemaphore(device, imageAvailableSemaphores[i], nullptr);
|
||||||
|
@ -686,7 +689,7 @@ class vulkanImageCUDA {
|
||||||
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
|
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
|
||||||
appInfo.pEngineName = "No Engine";
|
appInfo.pEngineName = "No Engine";
|
||||||
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
|
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
|
||||||
appInfo.apiVersion = VK_API_VERSION_1_0;
|
appInfo.apiVersion = VK_API_VERSION_1_1;
|
||||||
|
|
||||||
VkInstanceCreateInfo createInfo = {};
|
VkInstanceCreateInfo createInfo = {};
|
||||||
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||||
|
@ -905,6 +908,7 @@ class vulkanImageCUDA {
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPhysicalDeviceFeatures deviceFeatures = {};
|
VkPhysicalDeviceFeatures deviceFeatures = {};
|
||||||
|
deviceFeatures.samplerAnisotropy = VK_TRUE;
|
||||||
|
|
||||||
VkDeviceCreateInfo createInfo = {};
|
VkDeviceCreateInfo createInfo = {};
|
||||||
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||||
|
@ -1078,8 +1082,8 @@ class vulkanImageCUDA {
|
||||||
}
|
}
|
||||||
|
|
||||||
void createGraphicsPipeline() {
|
void createGraphicsPipeline() {
|
||||||
auto vertShaderCode = readFile("shader.vert");
|
auto vertShaderCode = readFile("vert.spv");
|
||||||
auto fragShaderCode = readFile("shader.frag");
|
auto fragShaderCode = readFile("frag.spv");
|
||||||
|
|
||||||
VkShaderModule vertShaderModule = createShaderModule(vertShaderCode);
|
VkShaderModule vertShaderModule = createShaderModule(vertShaderCode);
|
||||||
VkShaderModule fragShaderModule = createShaderModule(fragShaderCode);
|
VkShaderModule fragShaderModule = createShaderModule(fragShaderCode);
|
||||||
|
@ -1268,7 +1272,7 @@ class vulkanImageCUDA {
|
||||||
|
|
||||||
// VK_FORMAT_R8G8B8A8_UNORM changed to VK_FORMAT_R8G8B8A8_UINT
|
// VK_FORMAT_R8G8B8A8_UNORM changed to VK_FORMAT_R8G8B8A8_UINT
|
||||||
createImage(
|
createImage(
|
||||||
imageWidth, imageHeight, VK_FORMAT_R8G8B8A8_UINT,
|
imageWidth, imageHeight, VK_FORMAT_R8G8B8A8_UNORM,
|
||||||
VK_IMAGE_TILING_OPTIMAL,
|
VK_IMAGE_TILING_OPTIMAL,
|
||||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||||
|
@ -1280,9 +1284,6 @@ class vulkanImageCUDA {
|
||||||
copyBufferToImage(stagingBuffer, textureImage,
|
copyBufferToImage(stagingBuffer, textureImage,
|
||||||
static_cast<uint32_t>(imageWidth),
|
static_cast<uint32_t>(imageWidth),
|
||||||
static_cast<uint32_t>(imageHeight));
|
static_cast<uint32_t>(imageHeight));
|
||||||
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UINT,
|
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
|
||||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
|
||||||
|
|
||||||
vkDestroyBuffer(device, stagingBuffer, nullptr);
|
vkDestroyBuffer(device, stagingBuffer, nullptr);
|
||||||
vkFreeMemory(device, stagingBufferMemory, nullptr);
|
vkFreeMemory(device, stagingBufferMemory, nullptr);
|
||||||
|
@ -1523,8 +1524,13 @@ class vulkanImageCUDA {
|
||||||
vkExternalMemImageCreateInfo.sType =
|
vkExternalMemImageCreateInfo.sType =
|
||||||
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
|
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
|
||||||
vkExternalMemImageCreateInfo.pNext = NULL;
|
vkExternalMemImageCreateInfo.pNext = NULL;
|
||||||
|
#ifdef _WIN64
|
||||||
|
vkExternalMemImageCreateInfo.handleTypes =
|
||||||
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
|
||||||
|
#else
|
||||||
vkExternalMemImageCreateInfo.handleTypes =
|
vkExternalMemImageCreateInfo.handleTypes =
|
||||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||||
|
#endif
|
||||||
|
|
||||||
imageInfo.pNext = &vkExternalMemImageCreateInfo;
|
imageInfo.pNext = &vkExternalMemImageCreateInfo;
|
||||||
|
|
||||||
|
@ -2201,7 +2207,6 @@ class vulkanImageCUDA {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"failed to create synchronization objects for a CUDA-Vulkan!");
|
"failed to create synchronization objects for a CUDA-Vulkan!");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void updateUniformBuffer() {
|
void updateUniformBuffer() {
|
||||||
|
@ -2333,8 +2338,8 @@ class vulkanImageCUDA {
|
||||||
submitInfo.signalSemaphoreCount = 2;
|
submitInfo.signalSemaphoreCount = 2;
|
||||||
submitInfo.pSignalSemaphores = signalSemaphores;
|
submitInfo.pSignalSemaphores = signalSemaphores;
|
||||||
|
|
||||||
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFences[currentFrame]) !=
|
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo,
|
||||||
VK_SUCCESS) {
|
inFlightFences[currentFrame]) != VK_SUCCESS) {
|
||||||
throw std::runtime_error("failed to submit draw command buffer!");
|
throw std::runtime_error("failed to submit draw command buffer!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2360,8 +2365,8 @@ class vulkanImageCUDA {
|
||||||
submitInfo.signalSemaphoreCount = 2;
|
submitInfo.signalSemaphoreCount = 2;
|
||||||
submitInfo.pSignalSemaphores = signalSemaphores;
|
submitInfo.pSignalSemaphores = signalSemaphores;
|
||||||
|
|
||||||
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFences[currentFrame]) !=
|
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo,
|
||||||
VK_SUCCESS) {
|
inFlightFences[currentFrame]) != VK_SUCCESS) {
|
||||||
throw std::runtime_error("failed to submit draw command buffer!");
|
throw std::runtime_error("failed to submit draw command buffer!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user