mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-12 20:38:31 +08:00
Export and wrap test buffer
This commit is contained in:
parent
cbd0b5f506
commit
b43cc7a47e
|
@ -201,7 +201,7 @@ void MonteCarloPiSimulation::getIdealExecutionConfiguration() {
|
|||
void MonteCarloPiSimulation::setupSimulationAllocations() {
|
||||
CUdeviceptr d_ptr = 0U;
|
||||
size_t granularity = 0;
|
||||
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle;
|
||||
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle, cudaBDAHandle;
|
||||
|
||||
CUmemAllocationProp allocProp = {};
|
||||
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
||||
|
@ -222,11 +222,13 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
|
||||
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
|
||||
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
|
||||
size_t bdaVecSize = sizeof(float) * 2;
|
||||
|
||||
size_t xyPositionSize =
|
||||
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
|
||||
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
|
||||
m_totalAllocationSize = (xyPositionSize + inCircleSize);
|
||||
size_t bdaSize = ROUND_UP_TO_GRANULARITY(bdaVecSize, granularity);
|
||||
m_totalAllocationSize = (xyPositionSize + inCircleSize + bdaSize);
|
||||
|
||||
// Reserve the required contiguous VA space for the allocations
|
||||
checkCudaErrors(
|
||||
|
@ -240,6 +242,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
|
||||
checkCudaErrors(
|
||||
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
|
||||
checkCudaErrors(
|
||||
cuMemCreate(&cudaBDAHandle, bdaSize, &allocProp, 0));
|
||||
|
||||
// Export the allocation to a platform-specific handle. The type of handle
|
||||
// requested here must match the requestedHandleTypes field in the prop
|
||||
|
@ -250,9 +254,12 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
checkCudaErrors(
|
||||
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
|
||||
cudaInCircleHandle, ipcHandleTypeFlag, 0));
|
||||
checkCudaErrors(cuMemExportToShareableHandle(
|
||||
(void *)&m_bdaShareableHandle, cudaBDAHandle, ipcHandleTypeFlag, 0));
|
||||
|
||||
CUdeviceptr va_position = d_ptr;
|
||||
CUdeviceptr va_InCircle = va_position + xyPositionSize;
|
||||
CUdeviceptr va_BDA = va_InCircle + inCircleSize;
|
||||
m_pointsInsideCircle = (float *)va_InCircle;
|
||||
m_xyVector = (vec2 *)va_position;
|
||||
|
||||
|
@ -261,6 +268,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
|
||||
checkCudaErrors(
|
||||
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
|
||||
checkCudaErrors(
|
||||
cuMemMap(va_BDA, bdaSize, 0, cudaBDAHandle, 0));
|
||||
|
||||
// Release the handles for the allocation. Since the allocation is currently
|
||||
// mapped to a VA range with a previous call to cuMemMap the actual freeing of
|
||||
|
@ -268,6 +277,7 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
// allocation will be kept live until it is unmapped.
|
||||
checkCudaErrors(cuMemRelease(cudaPositionHandle));
|
||||
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
|
||||
checkCudaErrors(cuMemRelease(cudaBDAHandle));
|
||||
|
||||
CUmemAccessDesc accessDescriptor = {};
|
||||
accessDescriptor.location.id = m_cudaDevice;
|
||||
|
@ -278,6 +288,10 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
|
|||
// Read-Write access to the range.
|
||||
checkCudaErrors(
|
||||
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
|
||||
|
||||
// fill the BDA buffer with something
|
||||
const float bdaValues[2] = { 42.0f, 17.0f };
|
||||
cuMemcpyHtoD(va_BDA, &bdaValues[0], sizeof(float) * 2);
|
||||
}
|
||||
|
||||
void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
||||
|
@ -290,6 +304,7 @@ void MonteCarloPiSimulation::cleanupSimulationAllocations() {
|
|||
|
||||
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
|
||||
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
|
||||
checkIpcErrors(ipcCloseShareableHandle(m_bdaShareableHandle));
|
||||
|
||||
// Free the virtual address region.
|
||||
checkCudaErrors(
|
||||
|
|
|
@ -61,7 +61,7 @@ class MonteCarloPiSimulation {
|
|||
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
|
||||
// used for sharing cuda
|
||||
// allocated memory with Vulkan
|
||||
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle;
|
||||
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle, m_bdaShareableHandle;
|
||||
|
||||
// Cuda Device corresponding to the Vulkan Physical device
|
||||
int m_cudaDevice;
|
||||
|
@ -90,6 +90,7 @@ class MonteCarloPiSimulation {
|
|||
ShareableHandle &getInCircleShareableHandle() {
|
||||
return m_inCircleShareableHandle;
|
||||
}
|
||||
ShareableHandle &getBDAShareableHandle() { return m_bdaShareableHandle; }
|
||||
};
|
||||
|
||||
#endif // __PISIM_H__
|
||||
|
|
|
@ -59,8 +59,8 @@ std::string execution_path;
|
|||
class VulkanCudaPi : public VulkanBaseApp {
|
||||
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
|
||||
|
||||
VkBuffer m_inCircleBuffer, m_xyPositionBuffer;
|
||||
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory;
|
||||
VkBuffer m_inCircleBuffer, m_xyPositionBuffer, m_bdaBuffer;
|
||||
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory, m_bdaMemory;
|
||||
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
|
||||
MonteCarloPiSimulation m_sim;
|
||||
UniformBufferObject m_ubo;
|
||||
|
@ -75,8 +75,10 @@ class VulkanCudaPi : public VulkanBaseApp {
|
|||
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
|
||||
m_inCircleBuffer(VK_NULL_HANDLE),
|
||||
m_xyPositionBuffer(VK_NULL_HANDLE),
|
||||
m_bdaBuffer(VK_NULL_HANDLE),
|
||||
m_inCircleMemory(VK_NULL_HANDLE),
|
||||
m_xyPositionMemory(VK_NULL_HANDLE),
|
||||
m_bdaMemory(VK_NULL_HANDLE),
|
||||
m_sim(num_points),
|
||||
m_ubo(),
|
||||
m_stream(0),
|
||||
|
@ -123,6 +125,12 @@ class VulkanCudaPi : public VulkanBaseApp {
|
|||
if (m_inCircleMemory != VK_NULL_HANDLE) {
|
||||
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
|
||||
}
|
||||
if (m_bdaBuffer != VK_NULL_HANDLE) {
|
||||
vkDestroyBuffer(m_device, m_bdaBuffer, nullptr);
|
||||
}
|
||||
if (m_bdaMemory != VK_NULL_HANDLE) {
|
||||
vkFreeMemory(m_device, m_bdaMemory, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
|
||||
|
@ -226,14 +234,21 @@ class VulkanCudaPi : public VulkanBaseApp {
|
|||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
|
||||
m_inCircleMemory);
|
||||
|
||||
importExternalBuffer(
|
||||
(void*)(uintptr_t)m_sim.getBDAShareableHandle(),
|
||||
getDefaultMemHandleType(), sizeof(float) * 2,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_bdaBuffer,
|
||||
m_bdaMemory);
|
||||
|
||||
// (SE) get function ptr
|
||||
auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR");
|
||||
std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl;
|
||||
|
||||
// get BDA for the circle buffer
|
||||
// get BDA
|
||||
VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,
|
||||
nullptr,
|
||||
m_inCircleBuffer};
|
||||
m_bdaBuffer};
|
||||
auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info);
|
||||
std::cout << "DEBUG: BDA = " << (void*)bda << std::endl;
|
||||
|
||||
|
|
BIN
Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP
Executable file
BIN
Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user