mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-19 22:32:23 +08:00
Export and wrap test buffer
This commit is contained in:
parent
cbd0b5f506
commit
b43cc7a47e
|
@ -201,7 +201,7 @@ void MonteCarloPiSimulation::getIdealExecutionConfiguration() {
|
||||||
void MonteCarloPiSimulation::setupSimulationAllocations() {
|
void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
CUdeviceptr d_ptr = 0U;
|
CUdeviceptr d_ptr = 0U;
|
||||||
size_t granularity = 0;
|
size_t granularity = 0;
|
||||||
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle;
|
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle, cudaBDAHandle;
|
||||||
|
|
||||||
CUmemAllocationProp allocProp = {};
|
CUmemAllocationProp allocProp = {};
|
||||||
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
||||||
|
@ -222,11 +222,13 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
|
|
||||||
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
|
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
|
||||||
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
|
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
|
||||||
|
size_t bdaVecSize = sizeof(float) * 2;
|
||||||
|
|
||||||
size_t xyPositionSize =
|
size_t xyPositionSize =
|
||||||
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
|
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
|
||||||
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
|
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
|
||||||
m_totalAllocationSize = (xyPositionSize + inCircleSize);
|
size_t bdaSize = ROUND_UP_TO_GRANULARITY(bdaVecSize, granularity);
|
||||||
|
m_totalAllocationSize = (xyPositionSize + inCircleSize + bdaSize);
|
||||||
|
|
||||||
// Reserve the required contiguous VA space for the allocations
|
// Reserve the required contiguous VA space for the allocations
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
|
@ -240,6 +242,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
|
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
|
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemCreate(&cudaBDAHandle, bdaSize, &allocProp, 0));
|
||||||
|
|
||||||
// Export the allocation to a platform-specific handle. The type of handle
|
// Export the allocation to a platform-specific handle. The type of handle
|
||||||
// requested here must match the requestedHandleTypes field in the prop
|
// requested here must match the requestedHandleTypes field in the prop
|
||||||
|
@ -250,9 +254,12 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
|
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
|
||||||
cudaInCircleHandle, ipcHandleTypeFlag, 0));
|
cudaInCircleHandle, ipcHandleTypeFlag, 0));
|
||||||
|
checkCudaErrors(cuMemExportToShareableHandle(
|
||||||
|
(void *)&m_bdaShareableHandle, cudaBDAHandle, ipcHandleTypeFlag, 0));
|
||||||
|
|
||||||
CUdeviceptr va_position = d_ptr;
|
CUdeviceptr va_position = d_ptr;
|
||||||
CUdeviceptr va_InCircle = va_position + xyPositionSize;
|
CUdeviceptr va_InCircle = va_position + xyPositionSize;
|
||||||
|
CUdeviceptr va_BDA = va_InCircle + inCircleSize;
|
||||||
m_pointsInsideCircle = (float *)va_InCircle;
|
m_pointsInsideCircle = (float *)va_InCircle;
|
||||||
m_xyVector = (vec2 *)va_position;
|
m_xyVector = (vec2 *)va_position;
|
||||||
|
|
||||||
|
@ -261,6 +268,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
|
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
|
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
|
||||||
|
checkCudaErrors(
|
||||||
|
cuMemMap(va_BDA, bdaSize, 0, cudaBDAHandle, 0));
|
||||||
|
|
||||||
// Release the handles for the allocation. Since the allocation is currently
|
// Release the handles for the allocation. Since the allocation is currently
|
||||||
// mapped to a VA range with a previous call to cuMemMap the actual freeing of
|
// mapped to a VA range with a previous call to cuMemMap the actual freeing of
|
||||||
|
@ -268,6 +277,7 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
// allocation will be kept live until it is unmapped.
|
// allocation will be kept live until it is unmapped.
|
||||||
checkCudaErrors(cuMemRelease(cudaPositionHandle));
|
checkCudaErrors(cuMemRelease(cudaPositionHandle));
|
||||||
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
|
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
|
||||||
|
checkCudaErrors(cuMemRelease(cudaBDAHandle));
|
||||||
|
|
||||||
CUmemAccessDesc accessDescriptor = {};
|
CUmemAccessDesc accessDescriptor = {};
|
||||||
accessDescriptor.location.id = m_cudaDevice;
|
accessDescriptor.location.id = m_cudaDevice;
|
||||||
|
@ -278,6 +288,10 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||||
// Read-Write access to the range.
|
// Read-Write access to the range.
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
|
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
|
||||||
|
|
||||||
|
// fill the BDA buffer with something
|
||||||
|
const float bdaValues[2] = { 42.0f, 17.0f };
|
||||||
|
cuMemcpyHtoD(va_BDA, &bdaValues[0], sizeof(float) * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
||||||
|
@ -290,6 +304,7 @@ void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
||||||
|
|
||||||
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
|
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
|
||||||
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
|
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
|
||||||
|
checkIpcErrors(ipcCloseShareableHandle(m_bdaShareableHandle));
|
||||||
|
|
||||||
// Free the virtual address region.
|
// Free the virtual address region.
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
|
|
|
@ -61,7 +61,7 @@ class MonteCarloPiSimulation {
|
||||||
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
|
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
|
||||||
// used for sharing cuda
|
// used for sharing cuda
|
||||||
// allocated memory with Vulkan
|
// allocated memory with Vulkan
|
||||||
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle;
|
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle, m_bdaShareableHandle;
|
||||||
|
|
||||||
// Cuda Device corresponding to the Vulkan Physical device
|
// Cuda Device corresponding to the Vulkan Physical device
|
||||||
int m_cudaDevice;
|
int m_cudaDevice;
|
||||||
|
@ -90,6 +90,7 @@ class MonteCarloPiSimulation {
|
||||||
ShareableHandle &getInCircleShareableHandle() {
|
ShareableHandle &getInCircleShareableHandle() {
|
||||||
return m_inCircleShareableHandle;
|
return m_inCircleShareableHandle;
|
||||||
}
|
}
|
||||||
|
ShareableHandle &getBDAShareableHandle() { return m_bdaShareableHandle; }
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __PISIM_H__
|
#endif // __PISIM_H__
|
||||||
|
|
|
@ -59,8 +59,8 @@ std::string execution_path;
|
||||||
class VulkanCudaPi : public VulkanBaseApp {
|
class VulkanCudaPi : public VulkanBaseApp {
|
||||||
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
|
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
|
||||||
|
|
||||||
VkBuffer m_inCircleBuffer, m_xyPositionBuffer;
|
VkBuffer m_inCircleBuffer, m_xyPositionBuffer, m_bdaBuffer;
|
||||||
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory;
|
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory, m_bdaMemory;
|
||||||
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
|
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
|
||||||
MonteCarloPiSimulation m_sim;
|
MonteCarloPiSimulation m_sim;
|
||||||
UniformBufferObject m_ubo;
|
UniformBufferObject m_ubo;
|
||||||
|
@ -75,8 +75,10 @@ class VulkanCudaPi : public VulkanBaseApp {
|
||||||
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
|
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
|
||||||
m_inCircleBuffer(VK_NULL_HANDLE),
|
m_inCircleBuffer(VK_NULL_HANDLE),
|
||||||
m_xyPositionBuffer(VK_NULL_HANDLE),
|
m_xyPositionBuffer(VK_NULL_HANDLE),
|
||||||
|
m_bdaBuffer(VK_NULL_HANDLE),
|
||||||
m_inCircleMemory(VK_NULL_HANDLE),
|
m_inCircleMemory(VK_NULL_HANDLE),
|
||||||
m_xyPositionMemory(VK_NULL_HANDLE),
|
m_xyPositionMemory(VK_NULL_HANDLE),
|
||||||
|
m_bdaMemory(VK_NULL_HANDLE),
|
||||||
m_sim(num_points),
|
m_sim(num_points),
|
||||||
m_ubo(),
|
m_ubo(),
|
||||||
m_stream(0),
|
m_stream(0),
|
||||||
|
@ -123,6 +125,12 @@ class VulkanCudaPi : public VulkanBaseApp {
|
||||||
if (m_inCircleMemory != VK_NULL_HANDLE) {
|
if (m_inCircleMemory != VK_NULL_HANDLE) {
|
||||||
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
|
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
|
||||||
}
|
}
|
||||||
|
if (m_bdaBuffer != VK_NULL_HANDLE) {
|
||||||
|
vkDestroyBuffer(m_device, m_bdaBuffer, nullptr);
|
||||||
|
}
|
||||||
|
if (m_bdaMemory != VK_NULL_HANDLE) {
|
||||||
|
vkFreeMemory(m_device, m_bdaMemory, nullptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
|
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
|
||||||
|
@ -226,14 +234,21 @@ class VulkanCudaPi : public VulkanBaseApp {
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
|
||||||
m_inCircleMemory);
|
m_inCircleMemory);
|
||||||
|
|
||||||
|
importExternalBuffer(
|
||||||
|
(void*)(uintptr_t)m_sim.getBDAShareableHandle(),
|
||||||
|
getDefaultMemHandleType(), sizeof(float) * 2,
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_bdaBuffer,
|
||||||
|
m_bdaMemory);
|
||||||
|
|
||||||
// (SE) get function ptr
|
// (SE) get function ptr
|
||||||
auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR");
|
auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR");
|
||||||
std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl;
|
std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl;
|
||||||
|
|
||||||
// get BDA for the circle buffer
|
// get BDA
|
||||||
VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,
|
VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,
|
||||||
nullptr,
|
nullptr,
|
||||||
m_inCircleBuffer};
|
m_bdaBuffer};
|
||||||
auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info);
|
auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info);
|
||||||
std::cout << "DEBUG: BDA = " << (void*)bda << std::endl;
|
std::cout << "DEBUG: BDA = " << (void*)bda << std::endl;
|
||||||
|
|
||||||
|
|
BIN
Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP
Executable file
BIN
Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user