/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define GLFW_INCLUDE_VULKAN #ifdef _WIN64 #include #include #include #include #define _USE_MATH_DEFINES #endif #include #include #ifdef _WIN64 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linmath.h" #define WIDTH 800 #define HEIGHT 600 const int MAX_FRAMES = 4; const std::vector validationLayers = { "VK_LAYER_KHRONOS_validation"}; #ifdef NDEBUG const bool enableValidationLayers = true; #else const bool enableValidationLayers = false; #endif std::string execution_path; VkResult CreateDebugUtilsMessengerEXT( VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pDebugMessenger) { auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( instance, "vkCreateDebugUtilsMessengerEXT"); if (func != nullptr) { return func(instance, pCreateInfo, pAllocator, pDebugMessenger); } else { return VK_ERROR_EXTENSION_NOT_PRESENT; } }; const std::vector deviceExtensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, #ifdef _WIN64 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, #else VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, #endif }; #ifdef _WIN64 class WindowsSecurityAttributes { protected: SECURITY_ATTRIBUTES m_winSecurityAttributes; PSECURITY_DESCRIPTOR m_winPSecurityDescriptor; public: WindowsSecurityAttributes(); SECURITY_ATTRIBUTES* operator&(); ~WindowsSecurityAttributes(); }; WindowsSecurityAttributes::WindowsSecurityAttributes() { m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc( 1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**)); PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*)); InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION); SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY; AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID); EXPLICIT_ACCESS explicitAccess; ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS)); explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL; explicitAccess.grfAccessMode = SET_ACCESS; explicitAccess.grfInheritance = INHERIT_ONLY; explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID; explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID; SetEntriesInAcl(1, &explicitAccess, NULL, ppACL); SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE); m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes); m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor; m_winSecurityAttributes.bInheritHandle = TRUE; } SECURITY_ATTRIBUTES* WindowsSecurityAttributes::operator&() { return &m_winSecurityAttributes; } WindowsSecurityAttributes::~WindowsSecurityAttributes() { PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*)); if (*ppSID) { FreeSid(*ppSID); } if (*ppACL) { LocalFree(*ppACL); } free(m_winPSecurityDescriptor); } #endif void DestroyDebugUtilsMessengerEXT(VkInstance instance, VkDebugUtilsMessengerEXT debugMessenger, const VkAllocationCallbacks* pAllocator) { auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( instance, "vkDestroyDebugUtilsMessengerEXT"); if (func != nullptr) { func(instance, debugMessenger, pAllocator); } } struct QueueFamilyIndices { int graphicsFamily = -1; int presentFamily = -1; bool isComplete() { return graphicsFamily >= 0 && presentFamily >= 0; } }; struct SwapChainSupportDetails { VkSurfaceCapabilitiesKHR capabilities; std::vector formats; std::vector presentModes; }; typedef float vec2[2]; struct Vertex { vec4 pos; vec3 color; vec2 texCoord; static VkVertexInputBindingDescription getBindingDescription() { VkVertexInputBindingDescription bindingDescription = {}; bindingDescription.binding = 0; bindingDescription.stride = sizeof(Vertex); bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; return bindingDescription; } static std::array getAttributeDescriptions() { std::array attributeDescriptions = {}; attributeDescriptions[0].binding = 0; attributeDescriptions[0].location = 0; attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT; attributeDescriptions[0].offset = offsetof(Vertex, pos); attributeDescriptions[1].binding = 0; attributeDescriptions[1].location = 1; attributeDescriptions[1].format = VK_FORMAT_R32G32B32_SFLOAT; attributeDescriptions[1].offset = offsetof(Vertex, color); attributeDescriptions[2].binding = 0; attributeDescriptions[2].location = 2; attributeDescriptions[2].format = VK_FORMAT_R32G32_SFLOAT; attributeDescriptions[2].offset = offsetof(Vertex, texCoord); return attributeDescriptions; } }; struct UniformBufferObject { alignas(16) mat4x4 model; alignas(16) mat4x4 view; alignas(16) mat4x4 proj; }; const std::vector vertices = { {{-1.0f, -1.0f, 0.0f, 1.0f}, {1.0f, 0.0f, 0.0f}, {0.0f, 0.0f}}, {{1.0f, -1.0f, 0.0f, 1.0f}, {0.0f, 1.0f, 0.0f}, {1.0f, 0.0f}}, {{1.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}}, {{-1.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}}; const std::vector indices = {0, 1, 2, 2, 3, 0}; // convert floating point rgba color to 32-bit integer __device__ unsigned int rgbaFloatToInt(float4 rgba) { rgba.x = __saturatef(rgba.x); // clamp to [0.0, 1.0] rgba.y = __saturatef(rgba.y); rgba.z = __saturatef(rgba.z); rgba.w = __saturatef(rgba.w); return ((unsigned int)(rgba.w * 255.0f) << 24) | ((unsigned int)(rgba.z * 255.0f) << 16) | ((unsigned int)(rgba.y * 255.0f) << 8) | ((unsigned int)(rgba.x * 255.0f)); } __device__ float4 rgbaIntToFloat(unsigned int c) { float4 rgba; rgba.x = (c & 0xff) * 0.003921568627f; // /255.0f; rgba.y = ((c >> 8) & 0xff) * 0.003921568627f; // /255.0f; rgba.z = ((c >> 16) & 0xff) * 0.003921568627f; // /255.0f; rgba.w = ((c >> 24) & 0xff) * 0.003921568627f; // /255.0f; return rgba; } int filter_radius = 14; int g_nFilterSign = 1; // This varies the filter radius, so we can see automatic animation void varySigma() { filter_radius += g_nFilterSign; if (filter_radius > 64) { filter_radius = 64; // clamp to 64 and then negate sign g_nFilterSign = -1; } else if (filter_radius < 0) { filter_radius = 0; g_nFilterSign = 1; } } // row pass using texture lookups __global__ void d_boxfilter_rgba_x(cudaSurfaceObject_t* dstSurfMipMapArray, cudaTextureObject_t textureMipMapInput, size_t baseWidth, size_t baseHeight, size_t mipLevels, int filter_radius) { float scale = 1.0f / (float)((filter_radius << 1) + 1); unsigned int y = blockIdx.x * blockDim.x + threadIdx.x; if (y < baseHeight) { for (uint32_t mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) { uint32_t width = (baseWidth >> mipLevelIdx) ? (baseWidth >> mipLevelIdx) : 1; uint32_t height = (baseHeight >> mipLevelIdx) ? (baseHeight >> mipLevelIdx) : 1; if (y < height && filter_radius < width) { float px = 1.0 / width; float py = 1.0 / height; float4 t = make_float4(0.0f); for (int x = -filter_radius; x <= filter_radius; x++) { t += tex2DLod(textureMipMapInput, x * px, y * py, (float)mipLevelIdx); } unsigned int dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], 0, y); for (int x = 1; x < width; x++) { t += tex2DLod(textureMipMapInput, (x + filter_radius) * px, y * py, (float)mipLevelIdx); t -= tex2DLod(textureMipMapInput, (x - filter_radius - 1) * px, y * py, (float)mipLevelIdx); unsigned int dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], x * sizeof(uchar4), y); } } } } } // column pass using coalesced global memory reads __global__ void d_boxfilter_rgba_y(cudaSurfaceObject_t* dstSurfMipMapArray, cudaSurfaceObject_t* srcSurfMipMapArray, size_t baseWidth, size_t baseHeight, size_t mipLevels, int filter_radius) { unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; float scale = 1.0f / (float)((filter_radius << 1) + 1); for (uint32_t mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) { uint32_t width = (baseWidth >> mipLevelIdx) ? (baseWidth >> mipLevelIdx) : 1; uint32_t height = (baseHeight >> mipLevelIdx) ? (baseHeight >> mipLevelIdx) : 1; if (x < width && height > filter_radius) { float4 t; // do left edge int colInBytes = x * sizeof(uchar4); unsigned int pixFirst = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, 0); t = rgbaIntToFloat(pixFirst) * filter_radius; for (int y = 0; (y < (filter_radius + 1)) && (y < height); y++) { unsigned int pix = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, y); t += rgbaIntToFloat(pix); } unsigned int dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, 0); for (int y = 1; (y < filter_radius + 1) && ((y + filter_radius) < height); y++) { unsigned int pix = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, y + filter_radius); t += rgbaIntToFloat(pix); t -= rgbaIntToFloat(pixFirst); dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y); } // main loop for (int y = (filter_radius + 1); y < (height - filter_radius); y++) { unsigned int pix = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, y + filter_radius); t += rgbaIntToFloat(pix); pix = surf2Dread(srcSurfMipMapArray[mipLevelIdx], colInBytes, y - filter_radius - 1); t -= rgbaIntToFloat(pix); dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y); } // do right edge unsigned int pixLast = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, height - 1); for (int y = height - filter_radius; (y < height) && ((y - filter_radius - 1) > 1); y++) { t += rgbaIntToFloat(pixLast); unsigned int pix = surf2Dread( srcSurfMipMapArray[mipLevelIdx], colInBytes, y - filter_radius - 1); t -= rgbaIntToFloat(pix); dataB = rgbaFloatToInt(t * scale); surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y); } } } } class vulkanImageCUDA { public: void loadImageData(const std::string& filename) { // load image (needed so we can get the width and height before we create // the window char* image_path = sdkFindFilePath(filename.c_str(), execution_path.c_str()); if (image_path == 0) { printf("Error finding image file '%s'\n", filename.c_str()); exit(EXIT_FAILURE); } sdkLoadPPM4(image_path, (unsigned char**)&image_data, &imageWidth, &imageHeight); if (!image_data) { printf("Error opening file '%s'\n", image_path); exit(EXIT_FAILURE); } printf("Loaded '%s', %d x %d pixels\n", image_path, imageWidth, imageHeight); } void run() { initWindow(); initVulkan(); initCuda(); mainLoop(); cleanup(); } private: GLFWwindow* window; VkInstance instance; VkDebugUtilsMessengerEXT debugMessenger; VkSurfaceKHR surface; VkPhysicalDevice physicalDevice = VK_NULL_HANDLE; VkDevice device; uint8_t vkDeviceUUID[VK_UUID_SIZE]; VkQueue graphicsQueue; VkQueue presentQueue; VkSwapchainKHR swapChain; std::vector swapChainImages; VkFormat swapChainImageFormat; VkExtent2D swapChainExtent; std::vector swapChainImageViews; std::vector swapChainFramebuffers; VkRenderPass renderPass; VkDescriptorSetLayout descriptorSetLayout; VkPipelineLayout pipelineLayout; VkPipeline graphicsPipeline; VkCommandPool commandPool; VkImage textureImage; VkDeviceMemory textureImageMemory; VkImageView textureImageView; VkSampler textureSampler; VkBuffer vertexBuffer; VkDeviceMemory vertexBufferMemory; VkBuffer indexBuffer; VkDeviceMemory indexBufferMemory; std::vector uniformBuffers; std::vector uniformBuffersMemory; VkDescriptorPool descriptorPool; std::vector descriptorSets; std::vector commandBuffers; std::vector imageAvailableSemaphores; std::vector renderFinishedSemaphores; VkSemaphore cudaUpdateVkSemaphore, vkUpdateCudaSemaphore; std::vector inFlightFences; size_t currentFrame = 0; bool framebufferResized = false; #ifdef _WIN64 PFN_vkGetMemoryWin32HandleKHR fpGetMemoryWin32HandleKHR; PFN_vkGetSemaphoreWin32HandleKHR fpGetSemaphoreWin32HandleKHR; #else PFN_vkGetMemoryFdKHR fpGetMemoryFdKHR = NULL; PFN_vkGetSemaphoreFdKHR fpGetSemaphoreFdKHR = NULL; #endif PFN_vkGetPhysicalDeviceProperties2 fpGetPhysicalDeviceProperties2; unsigned int* image_data = NULL; unsigned int imageWidth, imageHeight; unsigned int mipLevels = 1; size_t totalImageMemSize; // CUDA objects cudaExternalMemory_t cudaExtMemImageBuffer; cudaMipmappedArray_t cudaMipmappedImageArray, cudaMipmappedImageArrayTemp, cudaMipmappedImageArrayOrig; std::vector surfaceObjectList, surfaceObjectListTemp; cudaSurfaceObject_t *d_surfaceObjectList, *d_surfaceObjectListTemp; cudaTextureObject_t textureObjMipMapInput; cudaExternalSemaphore_t cudaExtCudaUpdateVkSemaphore; cudaExternalSemaphore_t cudaExtVkUpdateCudaSemaphore; cudaStream_t streamToRun; void initWindow() { glfwInit(); glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan Image CUDA Box Filter", nullptr, nullptr); glfwSetWindowUserPointer(window, this); glfwSetFramebufferSizeCallback(window, framebufferResizeCallback); } static void framebufferResizeCallback(GLFWwindow* window, int width, int height) { auto app = reinterpret_cast(glfwGetWindowUserPointer(window)); app->framebufferResized = true; } void initVulkan() { createInstance(); setupDebugMessenger(); createSurface(); pickPhysicalDevice(); createLogicalDevice(); getKhrExtensionsFn(); createSwapChain(); createImageViews(); createRenderPass(); createDescriptorSetLayout(); createGraphicsPipeline(); createFramebuffers(); createCommandPool(); createTextureImage(); createTextureImageView(); createTextureSampler(); createVertexBuffer(); createIndexBuffer(); createUniformBuffers(); createDescriptorPool(); createDescriptorSets(); createCommandBuffers(); createSyncObjects(); createSyncObjectsExt(); } void initCuda() { setCudaVkDevice(); checkCudaErrors(cudaStreamCreate(&streamToRun)); cudaVkImportImageMem(); cudaVkImportSemaphore(); } void mainLoop() { updateUniformBuffer(); while (!glfwWindowShouldClose(window)) { glfwPollEvents(); drawFrame(); } vkDeviceWaitIdle(device); } void cleanupSwapChain() { for (auto framebuffer : swapChainFramebuffers) { vkDestroyFramebuffer(device, framebuffer, nullptr); } vkFreeCommandBuffers(device, commandPool, static_cast(commandBuffers.size()), commandBuffers.data()); vkDestroyPipeline(device, graphicsPipeline, nullptr); vkDestroyPipelineLayout(device, pipelineLayout, nullptr); vkDestroyRenderPass(device, renderPass, nullptr); for (auto imageView : swapChainImageViews) { vkDestroyImageView(device, imageView, nullptr); } vkDestroySwapchainKHR(device, swapChain, nullptr); for (size_t i = 0; i < swapChainImages.size(); i++) { vkDestroyBuffer(device, uniformBuffers[i], nullptr); vkFreeMemory(device, uniformBuffersMemory[i], nullptr); } vkDestroyDescriptorPool(device, descriptorPool, nullptr); } void cleanup() { cleanupSwapChain(); vkDestroySampler(device, textureSampler, nullptr); vkDestroyImageView(device, textureImageView, nullptr); for (int i = 0; i < mipLevels; i++) { checkCudaErrors(cudaDestroySurfaceObject(surfaceObjectList[i])); checkCudaErrors(cudaDestroySurfaceObject(surfaceObjectListTemp[i])); } checkCudaErrors(cudaFree(d_surfaceObjectList)); checkCudaErrors(cudaFree(d_surfaceObjectListTemp)); checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArrayTemp)); checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArrayOrig)); checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArray)); checkCudaErrors(cudaDestroyTextureObject(textureObjMipMapInput)); checkCudaErrors(cudaDestroyExternalMemory(cudaExtMemImageBuffer)); checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtCudaUpdateVkSemaphore)); checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtVkUpdateCudaSemaphore)); vkDestroyImage(device, textureImage, nullptr); vkFreeMemory(device, textureImageMemory, nullptr); vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); vkDestroyBuffer(device, indexBuffer, nullptr); vkFreeMemory(device, indexBufferMemory, nullptr); vkDestroyBuffer(device, vertexBuffer, nullptr); vkFreeMemory(device, vertexBufferMemory, nullptr); vkDestroySemaphore(device, cudaUpdateVkSemaphore, nullptr); vkDestroySemaphore(device, vkUpdateCudaSemaphore, nullptr); for (size_t i = 0; i < MAX_FRAMES; i++) { vkDestroySemaphore(device, renderFinishedSemaphores[i], nullptr); vkDestroySemaphore(device, imageAvailableSemaphores[i], nullptr); vkDestroyFence(device, inFlightFences[i], nullptr); } vkDestroyCommandPool(device, commandPool, nullptr); vkDestroyDevice(device, nullptr); if (enableValidationLayers) { DestroyDebugUtilsMessengerEXT(instance, debugMessenger, nullptr); } vkDestroySurfaceKHR(instance, surface, nullptr); vkDestroyInstance(instance, nullptr); glfwDestroyWindow(window); glfwTerminate(); } void recreateSwapChain() { int width = 0, height = 0; while (width == 0 || height == 0) { glfwGetFramebufferSize(window, &width, &height); glfwWaitEvents(); } vkDeviceWaitIdle(device); cleanupSwapChain(); createSwapChain(); createImageViews(); createRenderPass(); createGraphicsPipeline(); createFramebuffers(); createUniformBuffers(); createDescriptorPool(); createDescriptorSets(); createCommandBuffers(); } void createInstance() { if (enableValidationLayers && !checkValidationLayerSupport()) { throw std::runtime_error( "validation layers requested, but not available!"); } VkApplicationInfo appInfo = {}; appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; appInfo.pApplicationName = "Vulkan Image CUDA Interop"; appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); appInfo.pEngineName = "No Engine"; appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); appInfo.apiVersion = VK_API_VERSION_1_1; VkInstanceCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; createInfo.pApplicationInfo = &appInfo; auto extensions = getRequiredExtensions(); createInfo.enabledExtensionCount = static_cast(extensions.size()); createInfo.ppEnabledExtensionNames = extensions.data(); VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo; if (enableValidationLayers) { createInfo.enabledLayerCount = static_cast(validationLayers.size()); createInfo.ppEnabledLayerNames = validationLayers.data(); populateDebugMessengerCreateInfo(debugCreateInfo); createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo; } else { createInfo.enabledLayerCount = 0; createInfo.pNext = nullptr; } if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) { throw std::runtime_error("failed to create instance!"); } fpGetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr( instance, "vkGetPhysicalDeviceProperties2"); if (fpGetPhysicalDeviceProperties2 == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetPhysicalDeviceProperties2KHR\" not " "found.\n"); } #ifdef _WIN64 fpGetMemoryWin32HandleKHR = (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr( instance, "vkGetMemoryWin32HandleKHR"); if (fpGetMemoryWin32HandleKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetMemoryWin32HandleKHR\" not " "found.\n"); } #else fpGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr( instance, "vkGetMemoryFdKHR"); if (fpGetMemoryFdKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetMemoryFdKHR\" not found.\n"); } else { std::cout << "Vulkan proc address for vkGetMemoryFdKHR - " << fpGetMemoryFdKHR << std::endl; } #endif } void populateDebugMessengerCreateInfo( VkDebugUtilsMessengerCreateInfoEXT& createInfo) { createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; createInfo.pfnUserCallback = debugCallback; } void setupDebugMessenger() { if (!enableValidationLayers) return; VkDebugUtilsMessengerCreateInfoEXT createInfo; populateDebugMessengerCreateInfo(createInfo); if (CreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr, &debugMessenger) != VK_SUCCESS) { throw std::runtime_error("failed to set up debug messenger!"); } } void createSurface() { if (glfwCreateWindowSurface(instance, window, nullptr, &surface) != VK_SUCCESS) { throw std::runtime_error("failed to create window surface!"); } } void pickPhysicalDevice() { uint32_t deviceCount = 0; vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr); if (deviceCount == 0) { throw std::runtime_error("failed to find GPUs with Vulkan support!"); } std::vector devices(deviceCount); vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data()); for (const auto& device : devices) { if (isDeviceSuitable(device)) { physicalDevice = device; break; } } if (physicalDevice == VK_NULL_HANDLE) { throw std::runtime_error("failed to find a suitable GPU!"); } std::cout << "Selected physical device = " << physicalDevice << std::endl; VkPhysicalDeviceIDProperties vkPhysicalDeviceIDProperties = {}; vkPhysicalDeviceIDProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; vkPhysicalDeviceIDProperties.pNext = NULL; VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {}; vkPhysicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDProperties; fpGetPhysicalDeviceProperties2(physicalDevice, &vkPhysicalDeviceProperties2); memcpy(vkDeviceUUID, vkPhysicalDeviceIDProperties.deviceUUID, sizeof(vkDeviceUUID)); } void getKhrExtensionsFn() { #ifdef _WIN64 fpGetSemaphoreWin32HandleKHR = (PFN_vkGetSemaphoreWin32HandleKHR)vkGetDeviceProcAddr( device, "vkGetSemaphoreWin32HandleKHR"); if (fpGetSemaphoreWin32HandleKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetSemaphoreWin32HandleKHR\" not " "found.\n"); } #else fpGetSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetDeviceProcAddr( device, "vkGetSemaphoreFdKHR"); if (fpGetSemaphoreFdKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetSemaphoreFdKHR\" not found.\n"); } #endif } int setCudaVkDevice() { int current_device = 0; int device_count = 0; int devices_prohibited = 0; cudaDeviceProp deviceProp; checkCudaErrors(cudaGetDeviceCount(&device_count)); if (device_count == 0) { fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); exit(EXIT_FAILURE); } // Find the GPU which is selected by Vulkan while (current_device < device_count) { cudaGetDeviceProperties(&deviceProp, current_device); if ((deviceProp.computeMode != cudaComputeModeProhibited)) { // Compare the cuda device UUID with vulkan UUID int ret = memcmp(&deviceProp.uuid, &vkDeviceUUID, VK_UUID_SIZE); if (ret == 0) { checkCudaErrors(cudaSetDevice(current_device)); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device)); printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", current_device, deviceProp.name, deviceProp.major, deviceProp.minor); return current_device; } } else { devices_prohibited++; } current_device++; } if (devices_prohibited == device_count) { fprintf(stderr, "CUDA error:" " No Vulkan-CUDA Interop capable GPU found.\n"); exit(EXIT_FAILURE); } return -1; } void createLogicalDevice() { QueueFamilyIndices indices = findQueueFamilies(physicalDevice); std::vector queueCreateInfos; std::set uniqueQueueFamilies = {indices.graphicsFamily, indices.presentFamily}; float queuePriority = 1.0f; for (int queueFamily : uniqueQueueFamilies) { VkDeviceQueueCreateInfo queueCreateInfo = {}; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.queueFamilyIndex = queueFamily; queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &queuePriority; queueCreateInfos.push_back(queueCreateInfo); } VkPhysicalDeviceFeatures deviceFeatures = {}; deviceFeatures.samplerAnisotropy = VK_TRUE; VkDeviceCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; createInfo.pQueueCreateInfos = queueCreateInfos.data(); createInfo.queueCreateInfoCount = queueCreateInfos.size(); createInfo.pEnabledFeatures = &deviceFeatures; std::vector enabledExtensionNameList; for (int i = 0; i < deviceExtensions.size(); i++) { enabledExtensionNameList.push_back(deviceExtensions[i]); } if (enableValidationLayers) { createInfo.enabledLayerCount = static_cast(validationLayers.size()); createInfo.ppEnabledLayerNames = validationLayers.data(); } else { createInfo.enabledLayerCount = 0; } createInfo.enabledExtensionCount = static_cast(enabledExtensionNameList.size()); createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data(); if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) != VK_SUCCESS) { throw std::runtime_error("failed to create logical device!"); } vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphicsQueue); vkGetDeviceQueue(device, indices.presentFamily, 0, &presentQueue); } void createSwapChain() { SwapChainSupportDetails swapChainSupport = querySwapChainSupport(physicalDevice); VkSurfaceFormatKHR surfaceFormat = chooseSwapSurfaceFormat(swapChainSupport.formats); VkPresentModeKHR presentMode = chooseSwapPresentMode(swapChainSupport.presentModes); VkExtent2D extent = chooseSwapExtent(swapChainSupport.capabilities); uint32_t imageCount = swapChainSupport.capabilities.minImageCount + 1; if (swapChainSupport.capabilities.maxImageCount > 0 && imageCount > swapChainSupport.capabilities.maxImageCount) { imageCount = swapChainSupport.capabilities.maxImageCount; } VkSwapchainCreateInfoKHR createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; createInfo.surface = surface; createInfo.minImageCount = imageCount; createInfo.imageFormat = surfaceFormat.format; createInfo.imageColorSpace = surfaceFormat.colorSpace; createInfo.imageExtent = extent; createInfo.imageArrayLayers = 1; createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; QueueFamilyIndices indices = findQueueFamilies(physicalDevice); uint32_t queueFamilyIndices[] = {(uint32_t)indices.graphicsFamily, (uint32_t)indices.presentFamily}; if (indices.graphicsFamily != indices.presentFamily) { createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT; createInfo.queueFamilyIndexCount = 2; createInfo.pQueueFamilyIndices = queueFamilyIndices; } else { createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } createInfo.preTransform = swapChainSupport.capabilities.currentTransform; createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; createInfo.presentMode = presentMode; createInfo.clipped = VK_TRUE; if (vkCreateSwapchainKHR(device, &createInfo, nullptr, &swapChain) != VK_SUCCESS) { throw std::runtime_error("failed to create swap chain!"); } vkGetSwapchainImagesKHR(device, swapChain, &imageCount, nullptr); swapChainImages.resize(imageCount); vkGetSwapchainImagesKHR(device, swapChain, &imageCount, swapChainImages.data()); swapChainImageFormat = surfaceFormat.format; swapChainExtent = extent; } void createImageViews() { swapChainImageViews.resize(swapChainImages.size()); for (size_t i = 0; i < swapChainImages.size(); i++) { swapChainImageViews[i] = createImageView(swapChainImages[i], swapChainImageFormat); } } void createRenderPass() { VkAttachmentDescription colorAttachment = {}; colorAttachment.format = swapChainImageFormat; colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT; colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; VkAttachmentReference colorAttachmentRef = {}; colorAttachmentRef.attachment = 0; colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass = {}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorAttachmentRef; VkSubpassDependency dependency = {}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dependency.srcAccessMask = 0; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; VkRenderPassCreateInfo renderPassInfo = {}; renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; renderPassInfo.attachmentCount = 1; renderPassInfo.pAttachments = &colorAttachment; renderPassInfo.subpassCount = 1; renderPassInfo.pSubpasses = &subpass; renderPassInfo.dependencyCount = 1; renderPassInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass) != VK_SUCCESS) { throw std::runtime_error("failed to create render pass!"); } } void createDescriptorSetLayout() { VkDescriptorSetLayoutBinding uboLayoutBinding = {}; uboLayoutBinding.binding = 0; uboLayoutBinding.descriptorCount = 1; uboLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; uboLayoutBinding.pImmutableSamplers = nullptr; uboLayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; VkDescriptorSetLayoutBinding samplerLayoutBinding = {}; samplerLayoutBinding.binding = 1; samplerLayoutBinding.descriptorCount = 1; samplerLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; samplerLayoutBinding.pImmutableSamplers = nullptr; samplerLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; std::array bindings = { uboLayoutBinding, samplerLayoutBinding}; VkDescriptorSetLayoutCreateInfo layoutInfo = {}; layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; layoutInfo.bindingCount = static_cast(bindings.size()); layoutInfo.pBindings = bindings.data(); if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &descriptorSetLayout) != VK_SUCCESS) { throw std::runtime_error("failed to create descriptor set layout!"); } } void createGraphicsPipeline() { auto vertShaderCode = readFile("vert.spv"); auto fragShaderCode = readFile("frag.spv"); VkShaderModule vertShaderModule = createShaderModule(vertShaderCode); VkShaderModule fragShaderModule = createShaderModule(fragShaderCode); VkPipelineShaderStageCreateInfo vertShaderStageInfo = {}; vertShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT; vertShaderStageInfo.module = vertShaderModule; vertShaderStageInfo.pName = "main"; VkPipelineShaderStageCreateInfo fragShaderStageInfo = {}; fragShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT; fragShaderStageInfo.module = fragShaderModule; fragShaderStageInfo.pName = "main"; VkPipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; VkPipelineVertexInputStateCreateInfo vertexInputInfo = {}; vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; auto bindingDescription = Vertex::getBindingDescription(); auto attributeDescriptions = Vertex::getAttributeDescriptions(); vertexInputInfo.vertexBindingDescriptionCount = 1; vertexInputInfo.vertexAttributeDescriptionCount = static_cast(attributeDescriptions.size()); vertexInputInfo.pVertexBindingDescriptions = &bindingDescription; vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data(); VkPipelineInputAssemblyStateCreateInfo inputAssembly = {}; inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; inputAssembly.primitiveRestartEnable = VK_FALSE; VkViewport viewport = {}; viewport.x = 0.0f; viewport.y = 0.0f; viewport.width = (float)swapChainExtent.width; viewport.height = (float)swapChainExtent.height; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; VkRect2D scissor = {}; scissor.offset = {0, 0}; scissor.extent = swapChainExtent; VkPipelineViewportStateCreateInfo viewportState = {}; viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; viewportState.viewportCount = 1; viewportState.pViewports = &viewport; viewportState.scissorCount = 1; viewportState.pScissors = &scissor; VkPipelineRasterizationStateCreateInfo rasterizer = {}; rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterizer.depthClampEnable = VK_FALSE; rasterizer.rasterizerDiscardEnable = VK_FALSE; rasterizer.polygonMode = VK_POLYGON_MODE_FILL; rasterizer.lineWidth = 1.0f; rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; rasterizer.depthBiasEnable = VK_FALSE; VkPipelineMultisampleStateCreateInfo multisampling = {}; multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; multisampling.sampleShadingEnable = VK_FALSE; multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; VkPipelineColorBlendAttachmentState colorBlendAttachment = {}; colorBlendAttachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; colorBlendAttachment.blendEnable = VK_FALSE; VkPipelineColorBlendStateCreateInfo colorBlending = {}; colorBlending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; colorBlending.logicOpEnable = VK_FALSE; colorBlending.logicOp = VK_LOGIC_OP_COPY; colorBlending.attachmentCount = 1; colorBlending.pAttachments = &colorBlendAttachment; colorBlending.blendConstants[0] = 0.0f; colorBlending.blendConstants[1] = 0.0f; colorBlending.blendConstants[2] = 0.0f; colorBlending.blendConstants[3] = 0.0f; VkPipelineLayoutCreateInfo pipelineLayoutInfo = {}; pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipelineLayoutInfo.setLayoutCount = 1; pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout; if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr, &pipelineLayout) != VK_SUCCESS) { throw std::runtime_error("failed to create pipeline layout!"); } VkGraphicsPipelineCreateInfo pipelineInfo = {}; pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; pipelineInfo.stageCount = 2; pipelineInfo.pStages = shaderStages; pipelineInfo.pVertexInputState = &vertexInputInfo; pipelineInfo.pInputAssemblyState = &inputAssembly; pipelineInfo.pViewportState = &viewportState; pipelineInfo.pRasterizationState = &rasterizer; pipelineInfo.pMultisampleState = &multisampling; pipelineInfo.pColorBlendState = &colorBlending; pipelineInfo.layout = pipelineLayout; pipelineInfo.renderPass = renderPass; pipelineInfo.subpass = 0; pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &graphicsPipeline) != VK_SUCCESS) { throw std::runtime_error("failed to create graphics pipeline!"); } vkDestroyShaderModule(device, fragShaderModule, nullptr); vkDestroyShaderModule(device, vertShaderModule, nullptr); } void createFramebuffers() { swapChainFramebuffers.resize(swapChainImageViews.size()); for (size_t i = 0; i < swapChainImageViews.size(); i++) { VkImageView attachments[] = {swapChainImageViews[i]}; VkFramebufferCreateInfo framebufferInfo = {}; framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebufferInfo.renderPass = renderPass; framebufferInfo.attachmentCount = 1; framebufferInfo.pAttachments = attachments; framebufferInfo.width = swapChainExtent.width; framebufferInfo.height = swapChainExtent.height; framebufferInfo.layers = 1; if (vkCreateFramebuffer(device, &framebufferInfo, nullptr, &swapChainFramebuffers[i]) != VK_SUCCESS) { throw std::runtime_error("failed to create framebuffer!"); } } } void createCommandPool() { QueueFamilyIndices queueFamilyIndices = findQueueFamilies(physicalDevice); VkCommandPoolCreateInfo poolInfo = {}; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily; if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) { throw std::runtime_error("failed to create graphics command pool!"); } } void createTextureImage() { VkDeviceSize imageSize = imageWidth * imageHeight * 4; mipLevels = static_cast( std::floor(std::log2(std::max(imageWidth, imageHeight)))) + 1; printf("mipLevels = %d\n", mipLevels); if (!image_data) { throw std::runtime_error("failed to load texture image!"); } VkBuffer stagingBuffer; VkDeviceMemory stagingBufferMemory; createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); void* data; vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data); memcpy(data, image_data, static_cast(imageSize)); vkUnmapMemory(device, stagingBufferMemory); // VK_FORMAT_R8G8B8A8_UNORM changed to VK_FORMAT_R8G8B8A8_UINT createImage( imageWidth, imageHeight, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, textureImage, textureImageMemory); transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UINT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); copyBufferToImage(stagingBuffer, textureImage, static_cast(imageWidth), static_cast(imageHeight)); vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingBufferMemory, nullptr); generateMipmaps(textureImage, VK_FORMAT_R8G8B8A8_UNORM); } void generateMipmaps(VkImage image, VkFormat imageFormat) { VkFormatProperties formatProperties; vkGetPhysicalDeviceFormatProperties(physicalDevice, imageFormat, &formatProperties); if (!(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) { throw std::runtime_error( "texture image format does not support linear blitting!"); } VkCommandBuffer commandBuffer = beginSingleTimeCommands(); VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.image = image; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; barrier.subresourceRange.baseArrayLayer = 0; barrier.subresourceRange.layerCount = 1; barrier.subresourceRange.levelCount = 1; int32_t mipWidth = imageWidth; int32_t mipHeight = imageHeight; for (uint32_t i = 1; i < mipLevels; i++) { barrier.subresourceRange.baseMipLevel = i - 1; barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); VkImageBlit blit = {}; blit.srcOffsets[0] = {0, 0, 0}; blit.srcOffsets[1] = {mipWidth, mipHeight, 1}; blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; blit.srcSubresource.mipLevel = i - 1; blit.srcSubresource.baseArrayLayer = 0; blit.srcSubresource.layerCount = 1; blit.dstOffsets[0] = {0, 0, 0}; blit.dstOffsets[1] = {mipWidth > 1 ? mipWidth / 2 : 1, mipHeight > 1 ? mipHeight / 2 : 1, 1}; blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; blit.dstSubresource.mipLevel = i; blit.dstSubresource.baseArrayLayer = 0; blit.dstSubresource.layerCount = 1; vkCmdBlitImage(commandBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_LINEAR); barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); if (mipWidth > 1) mipWidth /= 2; if (mipHeight > 1) mipHeight /= 2; } barrier.subresourceRange.baseMipLevel = mipLevels - 1; barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); endSingleTimeCommands(commandBuffer); } #ifdef _WIN64 // For windows HANDLE getVkImageMemHandle( VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) { HANDLE handle; VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {}; vkMemoryGetWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; vkMemoryGetWin32HandleInfoKHR.pNext = NULL; vkMemoryGetWin32HandleInfoKHR.memory = textureImageMemory; vkMemoryGetWin32HandleInfoKHR.handleType = (VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType; fpGetMemoryWin32HandleKHR(device, &vkMemoryGetWin32HandleInfoKHR, &handle); return handle; } HANDLE getVkSemaphoreHandle( VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType, VkSemaphore& semVkCuda) { HANDLE handle; VkSemaphoreGetWin32HandleInfoKHR vulkanSemaphoreGetWin32HandleInfoKHR = {}; vulkanSemaphoreGetWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; vulkanSemaphoreGetWin32HandleInfoKHR.pNext = NULL; vulkanSemaphoreGetWin32HandleInfoKHR.semaphore = semVkCuda; vulkanSemaphoreGetWin32HandleInfoKHR.handleType = externalSemaphoreHandleType; fpGetSemaphoreWin32HandleKHR(device, &vulkanSemaphoreGetWin32HandleInfoKHR, &handle); return handle; } #else int getVkImageMemHandle( VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) { if (externalMemoryHandleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) { int fd; VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {}; vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; vkMemoryGetFdInfoKHR.pNext = NULL; vkMemoryGetFdInfoKHR.memory = textureImageMemory; vkMemoryGetFdInfoKHR.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; fpGetMemoryFdKHR(device, &vkMemoryGetFdInfoKHR, &fd); return fd; } return -1; } int getVkSemaphoreHandle( VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType, VkSemaphore& semVkCuda) { if (externalSemaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { int fd; VkSemaphoreGetFdInfoKHR vulkanSemaphoreGetFdInfoKHR = {}; vulkanSemaphoreGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; vulkanSemaphoreGetFdInfoKHR.pNext = NULL; vulkanSemaphoreGetFdInfoKHR.semaphore = semVkCuda; vulkanSemaphoreGetFdInfoKHR.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; fpGetSemaphoreFdKHR(device, &vulkanSemaphoreGetFdInfoKHR, &fd); return fd; } return -1; } #endif void createTextureImageView() { textureImageView = createImageView(textureImage, VK_FORMAT_R8G8B8A8_UNORM); } void createTextureSampler() { VkSamplerCreateInfo samplerInfo = {}; samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; samplerInfo.magFilter = VK_FILTER_LINEAR; samplerInfo.minFilter = VK_FILTER_LINEAR; samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; samplerInfo.anisotropyEnable = VK_TRUE; samplerInfo.maxAnisotropy = 16; samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; samplerInfo.unnormalizedCoordinates = VK_FALSE; samplerInfo.compareEnable = VK_FALSE; samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS; samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; samplerInfo.minLod = 0; // Optional samplerInfo.maxLod = static_cast(mipLevels); samplerInfo.mipLodBias = 0; // Optional if (vkCreateSampler(device, &samplerInfo, nullptr, &textureSampler) != VK_SUCCESS) { throw std::runtime_error("failed to create texture sampler!"); } } VkImageView createImageView(VkImage image, VkFormat format) { VkImageViewCreateInfo viewInfo = {}; viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; viewInfo.image = image; viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; viewInfo.format = format; viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; viewInfo.subresourceRange.baseMipLevel = 0; viewInfo.subresourceRange.levelCount = mipLevels; viewInfo.subresourceRange.baseArrayLayer = 0; viewInfo.subresourceRange.layerCount = 1; VkImageView imageView; if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) != VK_SUCCESS) { throw std::runtime_error("failed to create texture image view!"); } return imageView; } void createImage(uint32_t width, uint32_t height, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage, VkMemoryPropertyFlags properties, VkImage& image, VkDeviceMemory& imageMemory) { VkImageCreateInfo imageInfo = {}; imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; imageInfo.imageType = VK_IMAGE_TYPE_2D; imageInfo.extent.width = width; imageInfo.extent.height = height; imageInfo.extent.depth = 1; imageInfo.mipLevels = mipLevels; imageInfo.arrayLayers = 1; imageInfo.format = format; imageInfo.tiling = tiling; imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; imageInfo.usage = usage; imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VkExternalMemoryImageCreateInfo vkExternalMemImageCreateInfo = {}; vkExternalMemImageCreateInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; vkExternalMemImageCreateInfo.pNext = NULL; #ifdef _WIN64 vkExternalMemImageCreateInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; #else vkExternalMemImageCreateInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; #endif imageInfo.pNext = &vkExternalMemImageCreateInfo; if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS) { throw std::runtime_error("failed to create image!"); } VkMemoryRequirements memRequirements; vkGetImageMemoryRequirements(device, image, &memRequirements); #ifdef _WIN64 WindowsSecurityAttributes winSecurityAttributes; VkExportMemoryWin32HandleInfoKHR vulkanExportMemoryWin32HandleInfoKHR = {}; vulkanExportMemoryWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; vulkanExportMemoryWin32HandleInfoKHR.pNext = NULL; vulkanExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; vulkanExportMemoryWin32HandleInfoKHR.dwAccess = DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; vulkanExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)NULL; #endif VkExportMemoryAllocateInfoKHR vulkanExportMemoryAllocateInfoKHR = {}; vulkanExportMemoryAllocateInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; #ifdef _WIN64 vulkanExportMemoryAllocateInfoKHR.pNext = IsWindows8OrGreater() ? &vulkanExportMemoryWin32HandleInfoKHR : NULL; vulkanExportMemoryAllocateInfoKHR.handleTypes = IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; #else vulkanExportMemoryAllocateInfoKHR.pNext = NULL; vulkanExportMemoryAllocateInfoKHR.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; #endif VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memRequirements.size; allocInfo.pNext = &vulkanExportMemoryAllocateInfoKHR; allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); VkMemoryRequirements vkMemoryRequirements = {}; vkGetImageMemoryRequirements(device, image, &vkMemoryRequirements); totalImageMemSize = vkMemoryRequirements.size; if (vkAllocateMemory(device, &allocInfo, nullptr, &textureImageMemory) != VK_SUCCESS) { throw std::runtime_error("failed to allocate image memory!"); } vkBindImageMemory(device, image, textureImageMemory, 0); } void cudaVkImportSemaphore() { cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc; memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); #ifdef _WIN64 externalSemaphoreHandleDesc.type = IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32 : cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt; externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle( IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, cudaUpdateVkSemaphore); #else externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd; externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle( VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, cudaUpdateVkSemaphore); #endif externalSemaphoreHandleDesc.flags = 0; checkCudaErrors(cudaImportExternalSemaphore(&cudaExtCudaUpdateVkSemaphore, &externalSemaphoreHandleDesc)); memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); #ifdef _WIN64 externalSemaphoreHandleDesc.type = IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32 : cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt; ; externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle( IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, vkUpdateCudaSemaphore); #else externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd; externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle( VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, vkUpdateCudaSemaphore); #endif externalSemaphoreHandleDesc.flags = 0; checkCudaErrors(cudaImportExternalSemaphore(&cudaExtVkUpdateCudaSemaphore, &externalSemaphoreHandleDesc)); printf("CUDA Imported Vulkan semaphore\n"); } void cudaVkImportImageMem() { cudaExternalMemoryHandleDesc cudaExtMemHandleDesc; memset(&cudaExtMemHandleDesc, 0, sizeof(cudaExtMemHandleDesc)); #ifdef _WIN64 cudaExtMemHandleDesc.type = IsWindows8OrGreater() ? cudaExternalMemoryHandleTypeOpaqueWin32 : cudaExternalMemoryHandleTypeOpaqueWin32Kmt; cudaExtMemHandleDesc.handle.win32.handle = getVkImageMemHandle( IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); #else cudaExtMemHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueFd; cudaExtMemHandleDesc.handle.fd = getVkImageMemHandle(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); #endif cudaExtMemHandleDesc.size = totalImageMemSize; checkCudaErrors(cudaImportExternalMemory(&cudaExtMemImageBuffer, &cudaExtMemHandleDesc)); cudaExternalMemoryMipmappedArrayDesc externalMemoryMipmappedArrayDesc; memset(&externalMemoryMipmappedArrayDesc, 0, sizeof(externalMemoryMipmappedArrayDesc)); cudaExtent extent = make_cudaExtent(imageWidth, imageHeight, 0); cudaChannelFormatDesc formatDesc; formatDesc.x = 8; formatDesc.y = 8; formatDesc.z = 8; formatDesc.w = 8; formatDesc.f = cudaChannelFormatKindUnsigned; externalMemoryMipmappedArrayDesc.offset = 0; externalMemoryMipmappedArrayDesc.formatDesc = formatDesc; externalMemoryMipmappedArrayDesc.extent = extent; externalMemoryMipmappedArrayDesc.flags = 0; externalMemoryMipmappedArrayDesc.numLevels = mipLevels; checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray( &cudaMipmappedImageArray, cudaExtMemImageBuffer, &externalMemoryMipmappedArrayDesc)); checkCudaErrors(cudaMallocMipmappedArray(&cudaMipmappedImageArrayTemp, &formatDesc, extent, mipLevels)); checkCudaErrors(cudaMallocMipmappedArray(&cudaMipmappedImageArrayOrig, &formatDesc, extent, mipLevels)); for (int mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) { cudaArray_t cudaMipLevelArray, cudaMipLevelArrayTemp, cudaMipLevelArrayOrig; cudaResourceDesc resourceDesc; checkCudaErrors(cudaGetMipmappedArrayLevel( &cudaMipLevelArray, cudaMipmappedImageArray, mipLevelIdx)); checkCudaErrors(cudaGetMipmappedArrayLevel( &cudaMipLevelArrayTemp, cudaMipmappedImageArrayTemp, mipLevelIdx)); checkCudaErrors(cudaGetMipmappedArrayLevel( &cudaMipLevelArrayOrig, cudaMipmappedImageArrayOrig, mipLevelIdx)); uint32_t width = (imageWidth >> mipLevelIdx) ? (imageWidth >> mipLevelIdx) : 1; uint32_t height = (imageHeight >> mipLevelIdx) ? (imageHeight >> mipLevelIdx) : 1; checkCudaErrors(cudaMemcpy2DArrayToArray( cudaMipLevelArrayOrig, 0, 0, cudaMipLevelArray, 0, 0, width * sizeof(uchar4), height, cudaMemcpyDeviceToDevice)); memset(&resourceDesc, 0, sizeof(resourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = cudaMipLevelArray; cudaSurfaceObject_t surfaceObject; checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc)); surfaceObjectList.push_back(surfaceObject); memset(&resourceDesc, 0, sizeof(resourceDesc)); resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = cudaMipLevelArrayTemp; cudaSurfaceObject_t surfaceObjectTemp; checkCudaErrors( cudaCreateSurfaceObject(&surfaceObjectTemp, &resourceDesc)); surfaceObjectListTemp.push_back(surfaceObjectTemp); } cudaResourceDesc resDescr; memset(&resDescr, 0, sizeof(cudaResourceDesc)); resDescr.resType = cudaResourceTypeMipmappedArray; resDescr.res.mipmap.mipmap = cudaMipmappedImageArrayOrig; cudaTextureDesc texDescr; memset(&texDescr, 0, sizeof(cudaTextureDesc)); texDescr.normalizedCoords = true; texDescr.filterMode = cudaFilterModeLinear; texDescr.mipmapFilterMode = cudaFilterModeLinear; texDescr.addressMode[0] = cudaAddressModeWrap; texDescr.addressMode[1] = cudaAddressModeWrap; texDescr.maxMipmapLevelClamp = float(mipLevels - 1); texDescr.readMode = cudaReadModeNormalizedFloat; checkCudaErrors(cudaCreateTextureObject(&textureObjMipMapInput, &resDescr, &texDescr, NULL)); checkCudaErrors(cudaMalloc((void**)&d_surfaceObjectList, sizeof(cudaSurfaceObject_t) * mipLevels)); checkCudaErrors(cudaMalloc((void**)&d_surfaceObjectListTemp, sizeof(cudaSurfaceObject_t) * mipLevels)); checkCudaErrors(cudaMemcpy(d_surfaceObjectList, surfaceObjectList.data(), sizeof(cudaSurfaceObject_t) * mipLevels, cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy( d_surfaceObjectListTemp, surfaceObjectListTemp.data(), sizeof(cudaSurfaceObject_t) * mipLevels, cudaMemcpyHostToDevice)); printf("CUDA Kernel Vulkan image buffer\n"); } void cudaUpdateVkImage() { cudaVkSemaphoreWait(cudaExtVkUpdateCudaSemaphore); int nthreads = 128; /*Perform 2D box filter on image using CUDA */ d_boxfilter_rgba_x<<>>( d_surfaceObjectListTemp, textureObjMipMapInput, imageWidth, imageHeight, mipLevels, filter_radius); d_boxfilter_rgba_y<<>>( d_surfaceObjectList, d_surfaceObjectListTemp, imageWidth, imageHeight, mipLevels, filter_radius); varySigma(); cudaVkSemaphoreSignal(cudaExtCudaUpdateVkSemaphore); } void transitionImageLayout(VkImage image, VkFormat format, VkImageLayout oldLayout, VkImageLayout newLayout) { VkCommandBuffer commandBuffer = beginSingleTimeCommands(); VkImageMemoryBarrier barrier = {}; barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.oldLayout = oldLayout; barrier.newLayout = newLayout; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.image = image; barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; barrier.subresourceRange.baseMipLevel = 0; barrier.subresourceRange.levelCount = mipLevels; barrier.subresourceRange.baseArrayLayer = 0; barrier.subresourceRange.layerCount = 1; VkPipelineStageFlags sourceStage; VkPipelineStageFlags destinationStage; if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { barrier.srcAccessMask = 0; barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; } else { throw std::invalid_argument("unsupported layout transition!"); } vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0, nullptr, 0, nullptr, 1, &barrier); endSingleTimeCommands(commandBuffer); } void copyBufferToImage(VkBuffer buffer, VkImage image, uint32_t width, uint32_t height) { VkCommandBuffer commandBuffer = beginSingleTimeCommands(); VkBufferImageCopy region = {}; region.bufferOffset = 0; region.bufferRowLength = 0; region.bufferImageHeight = 0; region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; region.imageSubresource.mipLevel = 0; region.imageSubresource.baseArrayLayer = 0; region.imageSubresource.layerCount = 1; region.imageOffset = {0, 0, 0}; region.imageExtent = {width, height, 1}; vkCmdCopyBufferToImage(commandBuffer, buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); endSingleTimeCommands(commandBuffer); } void createVertexBuffer() { VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size(); VkBuffer stagingBuffer; VkDeviceMemory stagingBufferMemory; createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); void* data; vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); memcpy(data, vertices.data(), (size_t)bufferSize); vkUnmapMemory(device, stagingBufferMemory); createBuffer( bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory); copyBuffer(stagingBuffer, vertexBuffer, bufferSize); vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingBufferMemory, nullptr); } void createIndexBuffer() { VkDeviceSize bufferSize = sizeof(indices[0]) * indices.size(); VkBuffer stagingBuffer; VkDeviceMemory stagingBufferMemory; createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); void* data; vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); memcpy(data, indices.data(), (size_t)bufferSize); vkUnmapMemory(device, stagingBufferMemory); createBuffer( bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory); copyBuffer(stagingBuffer, indexBuffer, bufferSize); vkDestroyBuffer(device, stagingBuffer, nullptr); vkFreeMemory(device, stagingBufferMemory, nullptr); } void createUniformBuffers() { VkDeviceSize bufferSize = sizeof(UniformBufferObject); uniformBuffers.resize(swapChainImages.size()); uniformBuffersMemory.resize(swapChainImages.size()); for (size_t i = 0; i < swapChainImages.size(); i++) { createBuffer(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, uniformBuffers[i], uniformBuffersMemory[i]); } } void createDescriptorPool() { std::array poolSizes = {}; poolSizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; poolSizes[0].descriptorCount = static_cast(swapChainImages.size()); poolSizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; poolSizes[1].descriptorCount = static_cast(swapChainImages.size()); VkDescriptorPoolCreateInfo poolInfo = {}; poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; poolInfo.poolSizeCount = static_cast(poolSizes.size()); poolInfo.pPoolSizes = poolSizes.data(); poolInfo.maxSets = static_cast(swapChainImages.size()); if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptorPool) != VK_SUCCESS) { throw std::runtime_error("failed to create descriptor pool!"); } } void createDescriptorSets() { std::vector layouts(swapChainImages.size(), descriptorSetLayout); VkDescriptorSetAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; allocInfo.descriptorPool = descriptorPool; allocInfo.descriptorSetCount = static_cast(swapChainImages.size()); allocInfo.pSetLayouts = layouts.data(); descriptorSets.resize(swapChainImages.size()); if (vkAllocateDescriptorSets(device, &allocInfo, descriptorSets.data()) != VK_SUCCESS) { throw std::runtime_error("failed to allocate descriptor sets!"); } for (size_t i = 0; i < swapChainImages.size(); i++) { VkDescriptorBufferInfo bufferInfo = {}; bufferInfo.buffer = uniformBuffers[i]; bufferInfo.offset = 0; bufferInfo.range = sizeof(UniformBufferObject); VkDescriptorImageInfo imageInfo = {}; imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; imageInfo.imageView = textureImageView; imageInfo.sampler = textureSampler; std::array descriptorWrites = {}; descriptorWrites[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; descriptorWrites[0].dstSet = descriptorSets[i]; descriptorWrites[0].dstBinding = 0; descriptorWrites[0].dstArrayElement = 0; descriptorWrites[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptorWrites[0].descriptorCount = 1; descriptorWrites[0].pBufferInfo = &bufferInfo; descriptorWrites[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; descriptorWrites[1].dstSet = descriptorSets[i]; descriptorWrites[1].dstBinding = 1; descriptorWrites[1].dstArrayElement = 0; descriptorWrites[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; descriptorWrites[1].descriptorCount = 1; descriptorWrites[1].pImageInfo = &imageInfo; vkUpdateDescriptorSets(device, static_cast(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr); } } void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory) { VkBufferCreateInfo bufferInfo = {}; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.size = size; bufferInfo.usage = usage; bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) { throw std::runtime_error("failed to create buffer!"); } VkMemoryRequirements memRequirements; vkGetBufferMemoryRequirements(device, buffer, &memRequirements); VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) { throw std::runtime_error("failed to allocate buffer memory!"); } vkBindBufferMemory(device, buffer, bufferMemory, 0); } VkCommandBuffer beginSingleTimeCommands() { VkCommandBufferAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandPool = commandPool; allocInfo.commandBufferCount = 1; VkCommandBuffer commandBuffer; vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); VkCommandBufferBeginInfo beginInfo = {}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; vkBeginCommandBuffer(commandBuffer, &beginInfo); return commandBuffer; } void endSingleTimeCommands(VkCommandBuffer commandBuffer) { vkEndCommandBuffer(commandBuffer); VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffer; vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE); vkQueueWaitIdle(graphicsQueue); vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer); } void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) { VkCommandBuffer commandBuffer = beginSingleTimeCommands(); VkBufferCopy copyRegion = {}; copyRegion.size = size; vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ©Region); endSingleTimeCommands(commandBuffer); } uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) { VkPhysicalDeviceMemoryProperties memProperties; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) { return i; } } throw std::runtime_error("failed to find suitable memory type!"); } void createCommandBuffers() { commandBuffers.resize(swapChainFramebuffers.size()); VkCommandBufferAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = commandPool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = (uint32_t)commandBuffers.size(); if (vkAllocateCommandBuffers(device, &allocInfo, commandBuffers.data()) != VK_SUCCESS) { throw std::runtime_error("failed to allocate command buffers!"); } for (size_t i = 0; i < commandBuffers.size(); i++) { VkCommandBufferBeginInfo beginInfo = {}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; if (vkBeginCommandBuffer(commandBuffers[i], &beginInfo) != VK_SUCCESS) { throw std::runtime_error("failed to begin recording command buffer!"); } VkRenderPassBeginInfo renderPassInfo = {}; renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; renderPassInfo.renderPass = renderPass; renderPassInfo.framebuffer = swapChainFramebuffers[i]; renderPassInfo.renderArea.offset = {0, 0}; renderPassInfo.renderArea.extent = swapChainExtent; VkClearValue clearColor = {0.0f, 0.0f, 0.0f, 1.0f}; renderPassInfo.clearValueCount = 1; renderPassInfo.pClearValues = &clearColor; vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE); vkCmdBindPipeline(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline); VkBuffer vertexBuffers[] = {vertexBuffer}; VkDeviceSize offsets[] = {0}; vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets); vkCmdBindIndexBuffer(commandBuffers[i], indexBuffer, 0, VK_INDEX_TYPE_UINT16); vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSets[i], 0, nullptr); vkCmdDrawIndexed(commandBuffers[i], static_cast(indices.size()), 1, 0, 0, 0); // vkCmdDraw(commandBuffers[i], static_cast(vertices.size()), 1, // 0, 0); vkCmdEndRenderPass(commandBuffers[i]); if (vkEndCommandBuffer(commandBuffers[i]) != VK_SUCCESS) { throw std::runtime_error("failed to record command buffer!"); } } } void createSyncObjects() { imageAvailableSemaphores.resize(MAX_FRAMES); renderFinishedSemaphores.resize(MAX_FRAMES); inFlightFences.resize(MAX_FRAMES); VkSemaphoreCreateInfo semaphoreInfo = {}; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; VkFenceCreateInfo fenceInfo = {}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; for (size_t i = 0; i < MAX_FRAMES; i++) { if (vkCreateSemaphore(device, &semaphoreInfo, nullptr, &imageAvailableSemaphores[i]) != VK_SUCCESS || vkCreateSemaphore(device, &semaphoreInfo, nullptr, &renderFinishedSemaphores[i]) != VK_SUCCESS || vkCreateFence(device, &fenceInfo, nullptr, &inFlightFences[i]) != VK_SUCCESS) { throw std::runtime_error( "failed to create synchronization objects for a frame!"); } } } void createSyncObjectsExt() { VkSemaphoreCreateInfo semaphoreInfo = {}; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; memset(&semaphoreInfo, 0, sizeof(semaphoreInfo)); semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; #ifdef _WIN64 WindowsSecurityAttributes winSecurityAttributes; VkExportSemaphoreWin32HandleInfoKHR vulkanExportSemaphoreWin32HandleInfoKHR = {}; vulkanExportSemaphoreWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR; vulkanExportSemaphoreWin32HandleInfoKHR.pNext = NULL; vulkanExportSemaphoreWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; vulkanExportSemaphoreWin32HandleInfoKHR.dwAccess = DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; vulkanExportSemaphoreWin32HandleInfoKHR.name = (LPCWSTR)NULL; #endif VkExportSemaphoreCreateInfoKHR vulkanExportSemaphoreCreateInfo = {}; vulkanExportSemaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR; #ifdef _WIN64 vulkanExportSemaphoreCreateInfo.pNext = IsWindows8OrGreater() ? &vulkanExportSemaphoreWin32HandleInfoKHR : NULL; vulkanExportSemaphoreCreateInfo.handleTypes = IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; #else vulkanExportSemaphoreCreateInfo.pNext = NULL; vulkanExportSemaphoreCreateInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; #endif semaphoreInfo.pNext = &vulkanExportSemaphoreCreateInfo; if (vkCreateSemaphore(device, &semaphoreInfo, nullptr, &cudaUpdateVkSemaphore) != VK_SUCCESS || vkCreateSemaphore(device, &semaphoreInfo, nullptr, &vkUpdateCudaSemaphore) != VK_SUCCESS) { throw std::runtime_error( "failed to create synchronization objects for a CUDA-Vulkan!"); } } void updateUniformBuffer() { UniformBufferObject ubo = {}; mat4x4_identity(ubo.model); mat4x4 Model; mat4x4_dup(Model, ubo.model); mat4x4_rotate(ubo.model, Model, 0.0f, 0.0f, 1.0f, degreesToRadians(135.0f)); vec3 eye = {2.0f, 2.0f, 2.0f}; vec3 center = {0.0f, 0.0f, 0.0f}; vec3 up = {0.0f, 0.0f, 1.0f}; mat4x4_look_at(ubo.view, eye, center, up); mat4x4_perspective(ubo.proj, degreesToRadians(45.0f), swapChainExtent.width / (float)swapChainExtent.height, 0.1f, 10.0f); ubo.proj[1][1] *= -1; for (size_t i = 0; i < swapChainImages.size(); i++) { void* data; vkMapMemory(device, uniformBuffersMemory[i], 0, sizeof(ubo), 0, &data); memcpy(data, &ubo, sizeof(ubo)); vkUnmapMemory(device, uniformBuffersMemory[i]); } } void drawFrame() { static int startSubmit = 0; vkWaitForFences(device, 1, &inFlightFences[currentFrame], VK_TRUE, std::numeric_limits::max()); uint32_t imageIndex; VkResult result = vkAcquireNextImageKHR( device, swapChain, std::numeric_limits::max(), imageAvailableSemaphores[currentFrame], VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { recreateSwapChain(); return; } else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) { throw std::runtime_error("failed to acquire swap chain image!"); } vkResetFences(device, 1, &inFlightFences[currentFrame]); if (!startSubmit) { submitVulkan(imageIndex); startSubmit = 1; } else { submitVulkanCuda(imageIndex); } VkPresentInfoKHR presentInfo = {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame]}; presentInfo.waitSemaphoreCount = 1; presentInfo.pWaitSemaphores = signalSemaphores; VkSwapchainKHR swapChains[] = {swapChain}; presentInfo.swapchainCount = 1; presentInfo.pSwapchains = swapChains; presentInfo.pImageIndices = &imageIndex; presentInfo.pResults = nullptr; // Optional result = vkQueuePresentKHR(presentQueue, &presentInfo); if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR || framebufferResized) { framebufferResized = false; recreateSwapChain(); } else if (result != VK_SUCCESS) { throw std::runtime_error("failed to present swap chain image!"); } cudaUpdateVkImage(); currentFrame = (currentFrame + 1) % MAX_FRAMES; // Added sleep of 10 millisecs so that CPU does not submit too much work to // GPU std::this_thread::sleep_for(std::chrono::microseconds(10000)); char title[256]; sprintf(title, "Vulkan Image CUDA Box Filter (radius=%d)", filter_radius); glfwSetWindowTitle(window, title); } void cudaVkSemaphoreSignal(cudaExternalSemaphore_t& extSemaphore) { cudaExternalSemaphoreSignalParams extSemaphoreSignalParams; memset(&extSemaphoreSignalParams, 0, sizeof(extSemaphoreSignalParams)); extSemaphoreSignalParams.params.fence.value = 0; extSemaphoreSignalParams.flags = 0; checkCudaErrors(cudaSignalExternalSemaphoresAsync( &extSemaphore, &extSemaphoreSignalParams, 1, streamToRun)); } void cudaVkSemaphoreWait(cudaExternalSemaphore_t& extSemaphore) { cudaExternalSemaphoreWaitParams extSemaphoreWaitParams; memset(&extSemaphoreWaitParams, 0, sizeof(extSemaphoreWaitParams)); extSemaphoreWaitParams.params.fence.value = 0; extSemaphoreWaitParams.flags = 0; checkCudaErrors(cudaWaitExternalSemaphoresAsync( &extSemaphore, &extSemaphoreWaitParams, 1, streamToRun)); } void submitVulkan(uint32_t imageIndex) { VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; VkSemaphore waitSemaphores[] = {imageAvailableSemaphores[currentFrame]}; VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; submitInfo.waitSemaphoreCount = 1; submitInfo.pWaitSemaphores = waitSemaphores; submitInfo.pWaitDstStageMask = waitStages; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffers[imageIndex]; VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame], vkUpdateCudaSemaphore}; submitInfo.signalSemaphoreCount = 2; submitInfo.pSignalSemaphores = signalSemaphores; if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFences[currentFrame]) != VK_SUCCESS) { throw std::runtime_error("failed to submit draw command buffer!"); } } void submitVulkanCuda(uint32_t imageIndex) { VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; VkSemaphore waitSemaphores[] = {imageAvailableSemaphores[currentFrame], cudaUpdateVkSemaphore}; VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; submitInfo.waitSemaphoreCount = 2; submitInfo.pWaitSemaphores = waitSemaphores; submitInfo.pWaitDstStageMask = waitStages; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffers[imageIndex]; VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame], vkUpdateCudaSemaphore}; submitInfo.signalSemaphoreCount = 2; submitInfo.pSignalSemaphores = signalSemaphores; if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, inFlightFences[currentFrame]) != VK_SUCCESS) { throw std::runtime_error("failed to submit draw command buffer!"); } } VkShaderModule createShaderModule(const std::vector& code) { VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; createInfo.codeSize = code.size(); createInfo.pCode = reinterpret_cast(code.data()); VkShaderModule shaderModule; if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) != VK_SUCCESS) { throw std::runtime_error("failed to create shader module!"); } return shaderModule; } VkSurfaceFormatKHR chooseSwapSurfaceFormat( const std::vector& availableFormats) { if (availableFormats.size() == 1 && availableFormats[0].format == VK_FORMAT_UNDEFINED) { return {VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; } for (const auto& availableFormat : availableFormats) { if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM && availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { return availableFormat; } } return availableFormats[0]; } VkPresentModeKHR chooseSwapPresentMode( const std::vector& availablePresentModes) { VkPresentModeKHR bestMode = VK_PRESENT_MODE_FIFO_KHR; for (const auto& availablePresentMode : availablePresentModes) { if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) { return availablePresentMode; } else if (availablePresentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) { bestMode = availablePresentMode; } } return bestMode; } VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities) { if (capabilities.currentExtent.width != std::numeric_limits::max()) { return capabilities.currentExtent; } else { int width, height; glfwGetFramebufferSize(window, &width, &height); VkExtent2D actualExtent = {static_cast(width), static_cast(height)}; actualExtent.width = std::max( capabilities.minImageExtent.width, std::min(capabilities.maxImageExtent.width, actualExtent.width)); actualExtent.height = std::max( capabilities.minImageExtent.height, std::min(capabilities.maxImageExtent.height, actualExtent.height)); return actualExtent; } } SwapChainSupportDetails querySwapChainSupport(VkPhysicalDevice device) { SwapChainSupportDetails details; vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface, &details.capabilities); uint32_t formatCount; vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, nullptr); if (formatCount != 0) { details.formats.resize(formatCount); vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, details.formats.data()); } uint32_t presentModeCount; vkGetPhysicalDeviceSurfacePresentModesKHR(device, surface, &presentModeCount, nullptr); if (presentModeCount != 0) { details.presentModes.resize(presentModeCount); vkGetPhysicalDeviceSurfacePresentModesKHR( device, surface, &presentModeCount, details.presentModes.data()); } return details; } bool isDeviceSuitable(VkPhysicalDevice device) { QueueFamilyIndices indices = findQueueFamilies(device); bool extensionsSupported = checkDeviceExtensionSupport(device); bool swapChainAdequate = false; if (extensionsSupported) { SwapChainSupportDetails swapChainSupport = querySwapChainSupport(device); swapChainAdequate = !swapChainSupport.formats.empty() && !swapChainSupport.presentModes.empty(); } VkPhysicalDeviceFeatures supportedFeatures; vkGetPhysicalDeviceFeatures(device, &supportedFeatures); return indices.isComplete() && extensionsSupported && swapChainAdequate && supportedFeatures.samplerAnisotropy; } bool checkDeviceExtensionSupport(VkPhysicalDevice device) { uint32_t extensionCount; vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, nullptr); std::vector availableExtensions(extensionCount); vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, availableExtensions.data()); std::set requiredExtensions(deviceExtensions.begin(), deviceExtensions.end()); for (const auto& extension : availableExtensions) { requiredExtensions.erase(extension.extensionName); } return requiredExtensions.empty(); } QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) { QueueFamilyIndices indices; uint32_t queueFamilyCount = 0; vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, nullptr); std::vector queueFamilies(queueFamilyCount); vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, queueFamilies.data()); int i = 0; for (const auto& queueFamily : queueFamilies) { if (queueFamily.queueCount > 0 && queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) { indices.graphicsFamily = i; } VkBool32 presentSupport = false; vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &presentSupport); if (queueFamily.queueCount > 0 && presentSupport) { indices.presentFamily = i; } if (indices.isComplete()) { break; } i++; } return indices; } std::vector getRequiredExtensions() { uint32_t glfwExtensionCount = 0; const char** glfwExtensions; glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); std::vector extensions(glfwExtensions, glfwExtensions + glfwExtensionCount); if (enableValidationLayers) { extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); } return extensions; } bool checkValidationLayerSupport() { uint32_t layerCount; vkEnumerateInstanceLayerProperties(&layerCount, nullptr); std::vector availableLayers(layerCount); vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data()); for (const char* layerName : validationLayers) { bool layerFound = false; for (const auto& layerProperties : availableLayers) { if (strcmp(layerName, layerProperties.layerName) == 0) { layerFound = true; break; } } if (!layerFound) { return false; } } return true; } static std::vector readFile(const std::string& filename) { char* file_path = sdkFindFilePath(filename.c_str(), execution_path.c_str()); std::ifstream file(file_path, std::ios::ate | std::ios::binary); if (!file.is_open()) { throw std::runtime_error("failed to open file!"); } size_t fileSize = (size_t)file.tellg(); std::vector buffer(fileSize); file.seekg(0); file.read(buffer.data(), fileSize); file.close(); return buffer; } static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) { std::cerr << "validation layer: " << pCallbackData->pMessage << std::endl; return VK_FALSE; } }; int main(int argc, char** argv) { execution_path = argv[0]; std::string image_filename = "lenaRGB.ppm"; if (checkCmdLineFlag(argc, (const char**)argv, "file")) { getCmdLineArgumentString(argc, (const char**)argv, "file", (char**)&image_filename); } vulkanImageCUDA app; try { // This app only works on ppm images app.loadImageData(image_filename); app.run(); } catch (const std::exception& e) { std::cerr << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }