Export and wrap test buffer

This commit is contained in:
Simon Eves 2022-03-31 17:11:13 -07:00
parent cbd0b5f506
commit b43cc7a47e
4 changed files with 38 additions and 7 deletions

View File

@ -201,7 +201,7 @@ void MonteCarloPiSimulation::getIdealExecutionConfiguration() {
void MonteCarloPiSimulation::setupSimulationAllocations() {
CUdeviceptr d_ptr = 0U;
size_t granularity = 0;
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle;
CUmemGenericAllocationHandle cudaPositionHandle, cudaInCircleHandle, cudaBDAHandle;
CUmemAllocationProp allocProp = {};
allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
@ -222,11 +222,13 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
size_t xyPositionVecSize = m_numPoints * sizeof(*m_xyVector);
size_t inCircleVecSize = m_numPoints * sizeof(*m_pointsInsideCircle);
size_t bdaVecSize = sizeof(float) * 2;
size_t xyPositionSize =
ROUND_UP_TO_GRANULARITY(xyPositionVecSize, granularity);
size_t inCircleSize = ROUND_UP_TO_GRANULARITY(inCircleVecSize, granularity);
m_totalAllocationSize = (xyPositionSize + inCircleSize);
size_t bdaSize = ROUND_UP_TO_GRANULARITY(bdaVecSize, granularity);
m_totalAllocationSize = (xyPositionSize + inCircleSize + bdaSize);
// Reserve the required contiguous VA space for the allocations
checkCudaErrors(
@ -240,6 +242,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
cuMemCreate(&cudaPositionHandle, xyPositionSize, &allocProp, 0));
checkCudaErrors(
cuMemCreate(&cudaInCircleHandle, inCircleSize, &allocProp, 0));
checkCudaErrors(
cuMemCreate(&cudaBDAHandle, bdaSize, &allocProp, 0));
// Export the allocation to a platform-specific handle. The type of handle
// requested here must match the requestedHandleTypes field in the prop
@ -250,9 +254,12 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
checkCudaErrors(
cuMemExportToShareableHandle((void *)&m_inCircleShareableHandle,
cudaInCircleHandle, ipcHandleTypeFlag, 0));
checkCudaErrors(cuMemExportToShareableHandle(
(void *)&m_bdaShareableHandle, cudaBDAHandle, ipcHandleTypeFlag, 0));
CUdeviceptr va_position = d_ptr;
CUdeviceptr va_InCircle = va_position + xyPositionSize;
CUdeviceptr va_BDA = va_InCircle + inCircleSize;
m_pointsInsideCircle = (float *)va_InCircle;
m_xyVector = (vec2 *)va_position;
@ -261,6 +268,8 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
cuMemMap(va_position, xyPositionSize, 0, cudaPositionHandle, 0));
checkCudaErrors(
cuMemMap(va_InCircle, inCircleSize, 0, cudaInCircleHandle, 0));
checkCudaErrors(
cuMemMap(va_BDA, bdaSize, 0, cudaBDAHandle, 0));
// Release the handles for the allocation. Since the allocation is currently
// mapped to a VA range with a previous call to cuMemMap the actual freeing of
@ -268,6 +277,7 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
// allocation will be kept live until it is unmapped.
checkCudaErrors(cuMemRelease(cudaPositionHandle));
checkCudaErrors(cuMemRelease(cudaInCircleHandle));
checkCudaErrors(cuMemRelease(cudaBDAHandle));
CUmemAccessDesc accessDescriptor = {};
accessDescriptor.location.id = m_cudaDevice;
@ -278,6 +288,10 @@ void MonteCarloPiSimulation::setupSimulationAllocations() {
// Read-Write access to the range.
checkCudaErrors(
cuMemSetAccess(d_ptr, m_totalAllocationSize, &accessDescriptor, 1));
// fill the BDA buffer with something
const float bdaValues[2] = { 42.0f, 17.0f };
cuMemcpyHtoD(va_BDA, &bdaValues[0], sizeof(float) * 2);
}
void MonteCarloPiSimulation::cleanupSimulationAllocations() {
@ -290,6 +304,7 @@ void MonteCarloPiSimulation::cleanupSimulationAllocations() {
checkIpcErrors(ipcCloseShareableHandle(m_posShareableHandle));
checkIpcErrors(ipcCloseShareableHandle(m_inCircleShareableHandle));
checkIpcErrors(ipcCloseShareableHandle(m_bdaShareableHandle));
// Free the virtual address region.
checkCudaErrors(

View File

@ -61,7 +61,7 @@ class MonteCarloPiSimulation {
// Shareable Handles(a file descriptor on Linux and NT Handle on Windows),
// used for sharing cuda
// allocated memory with Vulkan
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle;
ShareableHandle m_posShareableHandle, m_inCircleShareableHandle, m_bdaShareableHandle;
// Cuda Device corresponding to the Vulkan Physical device
int m_cudaDevice;
@ -90,6 +90,7 @@ class MonteCarloPiSimulation {
ShareableHandle &getInCircleShareableHandle() {
return m_inCircleShareableHandle;
}
ShareableHandle &getBDAShareableHandle() { return m_bdaShareableHandle; }
};
#endif // __PISIM_H__

View File

@ -59,8 +59,8 @@ std::string execution_path;
class VulkanCudaPi : public VulkanBaseApp {
typedef struct UniformBufferObject_st { float frame; } UniformBufferObject;
VkBuffer m_inCircleBuffer, m_xyPositionBuffer;
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory;
VkBuffer m_inCircleBuffer, m_xyPositionBuffer, m_bdaBuffer;
VkDeviceMemory m_inCircleMemory, m_xyPositionMemory, m_bdaMemory;
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore;
MonteCarloPiSimulation m_sim;
UniformBufferObject m_ubo;
@ -75,8 +75,10 @@ class VulkanCudaPi : public VulkanBaseApp {
: VulkanBaseApp("simpleVulkanMMAP", ENABLE_VALIDATION),
m_inCircleBuffer(VK_NULL_HANDLE),
m_xyPositionBuffer(VK_NULL_HANDLE),
m_bdaBuffer(VK_NULL_HANDLE),
m_inCircleMemory(VK_NULL_HANDLE),
m_xyPositionMemory(VK_NULL_HANDLE),
m_bdaMemory(VK_NULL_HANDLE),
m_sim(num_points),
m_ubo(),
m_stream(0),
@ -123,6 +125,12 @@ class VulkanCudaPi : public VulkanBaseApp {
if (m_inCircleMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_inCircleMemory, nullptr);
}
if (m_bdaBuffer != VK_NULL_HANDLE) {
vkDestroyBuffer(m_device, m_bdaBuffer, nullptr);
}
if (m_bdaMemory != VK_NULL_HANDLE) {
vkFreeMemory(m_device, m_bdaMemory, nullptr);
}
}
void fillRenderingCommandBuffer(VkCommandBuffer& commandBuffer) {
@ -226,14 +234,21 @@ class VulkanCudaPi : public VulkanBaseApp {
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_inCircleBuffer,
m_inCircleMemory);
importExternalBuffer(
(void*)(uintptr_t)m_sim.getBDAShareableHandle(),
getDefaultMemHandleType(), sizeof(float) * 2,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, m_bdaBuffer,
m_bdaMemory);
// (SE) get function ptr
auto* vkGetBufferDeviceAddressKHR = (PFN_vkGetBufferDeviceAddressKHR)vkGetDeviceProcAddr(m_device, "vkGetBufferDeviceAddressKHR");
std::cout << "DEBUG: vkGetBufferDeviceAddressKHR = " << (void*)vkGetBufferDeviceAddressKHR << std::endl;
// get BDA for the circle buffer
// get BDA
VkBufferDeviceAddressInfoKHR bda_info{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR,
nullptr,
m_inCircleBuffer};
m_bdaBuffer};
auto bda = vkGetBufferDeviceAddressKHR(m_device, &bda_info);
std::cout << "DEBUG: BDA = " << (void*)bda << std::endl;

Binary file not shown.