diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu b/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu index 0ed5f25a..9084d20d 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu @@ -201,7 +201,7 @@ void MonteCarloPiSimulation::getIdealExecutionConfiguration() { void MonteCarloPiSimulation::setupSimulationAllocations() { CUdeviceptr d_ptr = 0U; size_t granularity = 0; - CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle; + CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle, cudaBDAHandle; CUmemAllocationProp allocProp = {}; allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED; @@ -222,11 +222,13 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector); size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle); + size_t bdaVecSize = sizeof(float) * 2; size_t xyPositionSize = ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity); size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity); - m_totalAllocationSize = (xyPositionSize + inCircleSize); + size_t bdaSize = ROUND_UP_TO_GRANULARITY(bdaVecSize, granularity); + m_totalAllocationSize = (xyPositionSize + inCircleSize + bdaSize); // Reserve the required contiguous VA space for the allocations checkCudaErrors( @@ -240,6 +242,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0)); checkCudaErrors( cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0)); + checkCudaErrors( + cuMemCreate(&cudaBDAHandle, bdaSize, &allocProp, 0)); // Export the allocation to a platform-specific handle. The type of handle // requested here must match the requestedHandleTypes field in the prop @@ -250,9 +254,12 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { checkCudaErrors( cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle, cudaInCircleHandle, ipcHandleTypeFlag, 0)); + checkCudaErrors(cuMemExportToShareableHandle( + (void *)&m_bdaShareableHandle, cudaBDAHandle, ipcHandleTypeFlag, 0)); CUdeviceptr va_position = d_ptr; CUdeviceptr va_InCircle = va_position + xyPositionSize; + CUdeviceptr va_BDA = va_InCircle + inCircleSize; m_pointsInsideCircle = (float *)va_InCircle; m_xyVector = (vec2 *)va_position; @@ -261,6 +268,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0)); checkCudaErrors( cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0)); + checkCudaErrors( + cuMemMap(va_BDA, bdaSize, 0, cudaBDAHandle, 0)); // Release the handles for the allocation. Since the allocation is currently // mapped to a VA range with a previous call to cuMemMap the actual freeing of @@ -268,6 +277,7 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { // allocation will be kept live until it is unmapped. checkCudaErrors(cuMemRelease(cudaPositionHandle)); checkCudaErrors(cuMemRelease(cudaInCircleHandle)); + checkCudaErrors(cuMemRelease(cudaBDAHandle)); CUmemAccessDesc accessDescriptor = {}; accessDescriptor.location.id = m_cudaDevice; @@ -278,6 +288,10 @@ void MonteCarloPiSimulation::setupSimulationAllocations() { // Read-Write access to the range. checkCudaErrors( cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1)); + + // fill the BDA buffer with something + const float bdaValues[2] = { 42.0f, 17.0f }; + cuMemcpyHtoD(va_BDA, &bdaValues[0], sizeof(float) * 2); } void MonteCarloPiSimulation::cleanupSimulationAllocations() { @@ -290,6 +304,7 @@ void MonteCarloPiSimulation::cleanupSimulationAllocations() { checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle)); checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle)); + checkIpcErrors(ipcCloseShareableHandle(m_bdaShareableHandle)); // Free the virtual address region. checkCudaErrors( diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h b/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h index 0f1c6322..7d4934b5 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h @@ -61,7 +61,7 @@ class MonteCarloPiSimulation { // Shareable Handles(a file descriptor on Linux and NT Handle on Windows), // used for sharing cuda // allocated memory with Vulkan - ShareableHandle m_posShareableHandle, m_inCircleShareableHandle; + ShareableHandle m_posShareableHandle, m_inCircleShareableHandle, m_bdaShareableHandle; // Cuda Device corresponding to the Vulkan Physical device int m_cudaDevice; @@ -90,6 +90,7 @@ class MonteCarloPiSimulation { ShareableHandle &getInCircleShareableHandle() { return m_inCircleShareableHandle; } + ShareableHandle &getBDAShareableHandle() { return m_bdaShareableHandle; } }; #endif // __PISIM_H__ diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp b/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp index 8ea186fb..2d38c688 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp @@ -59,8 +59,8 @@ std::string execution_path; class VulkanCudaPi : public VulkanBaseApp { typedef struct UniformBufferObject_st { float frame; } UniformBufferObject; - VkBuffer m_inCircleBuffer, m_xyPositionBuffer; - VkDeviceMemory m_inCircleMemory, m_xyPositionMemory; + VkBuffer m_inCircleBuffer, m_xyPositionBuffer, m_bdaBuffer; + VkDeviceMemory m_inCircleMemory, m_xyPositionMemory, m_bdaMemory; VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore; MonteCarloPiSimulation m_sim; UniformBufferObject m_ubo; @@ -75,8 +75,10 @@ class VulkanCudaPi : public VulkanBaseApp { : VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION), m_inCircleBuffer(VK_NULL_HANDLE), m_xyPositionBuffer(VK_NULL_HANDLE), + m_bdaBuffer(VK_NULL_HANDLE), m_inCircleMemory(VK_NULL_HANDLE), m_xyPositionMemory(VK_NULL_HANDLE), + m_bdaMemory(VK_NULL_HANDLE), m_sim(num_points), m_ubo(), m_stream(0), @@ -123,6 +125,12 @@ class VulkanCudaPi : public VulkanBaseApp { if (m_inCircleMemory != VK_NULL_HANDLE) { vkFreeMemory(m_device, m_inCircleMemory, nullptr); } + if (m_bdaBuffer != VK_NULL_HANDLE) { + vkDestroyBuffer(m_device, m_bdaBuffer, nullptr); + } + if (m_bdaMemory != VK_NULL_HANDLE) { + vkFreeMemory(m_device, m_bdaMemory, nullptr); + } } void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) { @@ -226,14 +234,21 @@ class VulkanCudaPi : public VulkanBaseApp { VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer, m_inCircleMemory); + importExternalBuffer( + (void*)(uintptr_t)m_sim.getBDAShareableHandle(), + getDefaultMemHandleType(), sizeof(float) * 2, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_bdaBuffer, + m_bdaMemory); + // (SE) get function ptr auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR"); std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl; - // get BDA for the circle buffer + // get BDA VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, nullptr, - m_inCircleBuffer}; + m_bdaBuffer}; auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info); std::cout << "DEBUG: BDA = " << (void*)bda << std::endl; diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP new file mode 100755 index 00000000..f011fbe2 Binary files /dev/null and b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP differ