Export and wrap test buffer

This commit is contained in:
Simon Eves 2022-03-31 17:11:13 -07:00
parent cbd0b5f506
commit b43cc7a47e
4 changed files with 38 additions and 7 deletions

View File

@ -201,7 +201,7 @@ void MonteCarloPiSimulation::getIdealExecutionConfiguration() {
void MonteCarloPiSimulation::setupSimulationAllocations() { void MonteCarloPiSimulation::setupSimulationAllocations() {
CUdeviceptr d_ptr = 0U; CUdeviceptr d_ptr = 0U;
size_t granularity = 0; size_t granularity = 0;
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle; CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle, cudaBDAHandle;
CUmemAllocationProp allocProp = {}; CUmemAllocationProp allocProp = {};
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED; allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
@ -222,11 +222,13 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector); size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle); size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
size_t bdaVecSize = sizeof(float) * 2;
size_t xyPositionSize = size_t xyPositionSize =
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity); ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity); size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
m_totalAllocationSize = (xyPositionSize + inCircleSize); size_t bdaSize = ROUND_UP_TO_GRANULARITY(bdaVecSize, granularity);
m_totalAllocationSize = (xyPositionSize + inCircleSize + bdaSize);
// Reserve the required contiguous VA space for the allocations // Reserve the required contiguous VA space for the allocations
checkCudaErrors( checkCudaErrors(
@ -240,6 +242,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0)); cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
checkCudaErrors( checkCudaErrors(
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0)); cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
checkCudaErrors(
cuMemCreate(&cudaBDAHandle, bdaSize, &allocProp, 0));
// Export the allocation to a platform-specific handle. The type of handle // Export the allocation to a platform-specific handle. The type of handle
// requested here must match the requestedHandleTypes field in the prop // requested here must match the requestedHandleTypes field in the prop
@ -250,9 +254,12 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
checkCudaErrors( checkCudaErrors(
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle, cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
cudaInCircleHandle, ipcHandleTypeFlag, 0)); cudaInCircleHandle, ipcHandleTypeFlag, 0));
checkCudaErrors(cuMemExportToShareableHandle(
(void *)&m_bdaShareableHandle, cudaBDAHandle, ipcHandleTypeFlag, 0));
CUdeviceptr va_position = d_ptr; CUdeviceptr va_position = d_ptr;
CUdeviceptr va_InCircle = va_position + xyPositionSize; CUdeviceptr va_InCircle = va_position + xyPositionSize;
CUdeviceptr va_BDA = va_InCircle + inCircleSize;
m_pointsInsideCircle = (float *)va_InCircle; m_pointsInsideCircle = (float *)va_InCircle;
m_xyVector = (vec2 *)va_position; m_xyVector = (vec2 *)va_position;
@ -261,6 +268,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0)); cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
checkCudaErrors( checkCudaErrors(
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0)); cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
checkCudaErrors(
cuMemMap(va_BDA, bdaSize, 0, cudaBDAHandle, 0));
// Release the handles for the allocation. Since the allocation is currently // Release the handles for the allocation. Since the allocation is currently
// mapped to a VA range with a previous call to cuMemMap the actual freeing of // mapped to a VA range with a previous call to cuMemMap the actual freeing of
@ -268,6 +277,7 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
// allocation will be kept live until it is unmapped. // allocation will be kept live until it is unmapped.
checkCudaErrors(cuMemRelease(cudaPositionHandle)); checkCudaErrors(cuMemRelease(cudaPositionHandle));
checkCudaErrors(cuMemRelease(cudaInCircleHandle)); checkCudaErrors(cuMemRelease(cudaInCircleHandle));
checkCudaErrors(cuMemRelease(cudaBDAHandle));
CUmemAccessDesc accessDescriptor = {}; CUmemAccessDesc accessDescriptor = {};
accessDescriptor.location.id = m_cudaDevice; accessDescriptor.location.id = m_cudaDevice;
@ -278,6 +288,10 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
// Read-Write access to the range. // Read-Write access to the range.
checkCudaErrors( checkCudaErrors(
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1)); cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
// fill the BDA buffer with something
const float bdaValues[2] = { 42.0f, 17.0f };
cuMemcpyHtoD(va_BDA, &bdaValues[0], sizeof(float) * 2);
} }
void MonteCarloPiSimulation::cleanupSimulationAllocations() { void MonteCarloPiSimulation::cleanupSimulationAllocations() {
@ -290,6 +304,7 @@ void MonteCarloPiSimulation::cleanupSimulationAllocations() {
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle)); checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle)); checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
checkIpcErrors(ipcCloseShareableHandle(m_bdaShareableHandle));
// Free the virtual address region. // Free the virtual address region.
checkCudaErrors( checkCudaErrors(

View File

@ -61,7 +61,7 @@ class MonteCarloPiSimulation {
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows), // Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
// used for sharing cuda // used for sharing cuda
// allocated memory with Vulkan // allocated memory with Vulkan
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle; ShareableHandle m_posShareableHandle, m_inCircleShareableHandle, m_bdaShareableHandle;
// Cuda Device corresponding to the Vulkan Physical device // Cuda Device corresponding to the Vulkan Physical device
int m_cudaDevice; int m_cudaDevice;
@ -90,6 +90,7 @@ class MonteCarloPiSimulation {
ShareableHandle &getInCircleShareableHandle() { ShareableHandle &getInCircleShareableHandle() {
return m_inCircleShareableHandle; return m_inCircleShareableHandle;
} }
ShareableHandle &getBDAShareableHandle() { return m_bdaShareableHandle; }
}; };
#endif // __PISIM_H__ #endif // __PISIM_H__

View File

@ -59,8 +59,8 @@ std::string execution_path;
class VulkanCudaPi : public VulkanBaseApp { class VulkanCudaPi : public VulkanBaseApp {
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject; typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
VkBuffer m_inCircleBuffer, m_xyPositionBuffer; VkBuffer m_inCircleBuffer, m_xyPositionBuffer, m_bdaBuffer;
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory; VkDeviceMemory m_inCircleMemory, m_xyPositionMemory, m_bdaMemory;
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore; VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
MonteCarloPiSimulation m_sim; MonteCarloPiSimulation m_sim;
UniformBufferObject m_ubo; UniformBufferObject m_ubo;
@ -75,8 +75,10 @@ class VulkanCudaPi : public VulkanBaseApp {
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION), : VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
m_inCircleBuffer(VK_NULL_HANDLE), m_inCircleBuffer(VK_NULL_HANDLE),
m_xyPositionBuffer(VK_NULL_HANDLE), m_xyPositionBuffer(VK_NULL_HANDLE),
m_bdaBuffer(VK_NULL_HANDLE),
m_inCircleMemory(VK_NULL_HANDLE), m_inCircleMemory(VK_NULL_HANDLE),
m_xyPositionMemory(VK_NULL_HANDLE), m_xyPositionMemory(VK_NULL_HANDLE),
m_bdaMemory(VK_NULL_HANDLE),
m_sim(num_points), m_sim(num_points),
m_ubo(), m_ubo(),
m_stream(0), m_stream(0),
@ -123,6 +125,12 @@ class VulkanCudaPi : public VulkanBaseApp {
if (m_inCircleMemory != VK_NULL_HANDLE) { if (m_inCircleMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_inCircleMemory, nullptr); vkFreeMemory(m_device, m_inCircleMemory, nullptr);
} }
if (m_bdaBuffer != VK_NULL_HANDLE) {
vkDestroyBuffer(m_device, m_bdaBuffer, nullptr);
}
if (m_bdaMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_bdaMemory, nullptr);
}
} }
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) { void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
@ -226,14 +234,21 @@ class VulkanCudaPi : public VulkanBaseApp {
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
m_inCircleMemory); m_inCircleMemory);
importExternalBuffer(
(void*)(uintptr_t)m_sim.getBDAShareableHandle(),
getDefaultMemHandleType(), sizeof(float) * 2,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_bdaBuffer,
m_bdaMemory);
// (SE) get function ptr // (SE) get function ptr
auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR"); auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR");
std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl; std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl;
// get BDA for the circle buffer // get BDA
VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR, VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,
nullptr, nullptr,
m_inCircleBuffer}; m_bdaBuffer};
auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info); auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info);
std::cout << "DEBUG: BDA = " << (void*)bda << std::endl; std::cout << "DEBUG: BDA = " << (void*)bda << std::endl;

Binary file not shown.