/* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define GLFW_INCLUDE_VULKAN #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _WIN64 #include #include #include #include #include #define _USE_MATH_DEFINES #endif #include #include #include #include "linmath.h" #define WIDTH 800 #define HEIGHT 600 #define VULKAN_VALIDATION 0 const std::vector validationLayers = { "VK_LAYER_LUNARG_standard_validation"}; #if VULKAN_VALIDATION const bool enableValidationLayers = true; #else const bool enableValidationLayers = false; #endif struct QueueFamilyIndices { int graphicsFamily = -1; int presentFamily = -1; bool isComplete() { return graphicsFamily >= 0 && presentFamily >= 0; } }; const std::vector deviceExtensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, #ifdef _WIN64 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, #else VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, #endif }; #ifdef _WIN64 class WindowsSecurityAttributes { protected: SECURITY_ATTRIBUTES m_winSecurityAttributes; PSECURITY_DESCRIPTOR m_winPSecurityDescriptor; public: WindowsSecurityAttributes(); SECURITY_ATTRIBUTES* operator&(); ~WindowsSecurityAttributes(); }; WindowsSecurityAttributes::WindowsSecurityAttributes() { m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc( 1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**)); // CHECK_NEQ(m_winPSecurityDescriptor, (PSECURITY_DESCRIPTOR)NULL); PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*)); InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION); SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY; AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID); EXPLICIT_ACCESS explicitAccess; ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS)); explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL; explicitAccess.grfAccessMode = SET_ACCESS; explicitAccess.grfInheritance = INHERIT_ONLY; explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID; explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID; SetEntriesInAcl(1, &explicitAccess, NULL, ppACL); SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE); m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes); m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor; m_winSecurityAttributes.bInheritHandle = TRUE; } SECURITY_ATTRIBUTES* WindowsSecurityAttributes::operator&() { return &m_winSecurityAttributes; } WindowsSecurityAttributes::~WindowsSecurityAttributes() { PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*)); if (*ppSID) { FreeSid(*ppSID); } if (*ppACL) { LocalFree(*ppACL); } free(m_winPSecurityDescriptor); } #endif struct UniformBufferObject { mat4x4 model; mat4x4 view; mat4x4 proj; }; struct SwapChainSupportDetails { VkSurfaceCapabilitiesKHR capabilities; std::vector formats; std::vector presentModes; }; struct Vertex { vec4 pos; vec3 color; static VkVertexInputBindingDescription getBindingDescription() { VkVertexInputBindingDescription bindingDescription = {}; bindingDescription.binding = 0; bindingDescription.stride = sizeof(Vertex); bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; return bindingDescription; } static std::array getAttributeDescriptions() { std::array attributeDescriptions = {}; attributeDescriptions[0].binding = 0; attributeDescriptions[0].location = 0; attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT; attributeDescriptions[0].offset = offsetof(Vertex, pos); attributeDescriptions[1].binding = 0; attributeDescriptions[1].location = 1; attributeDescriptions[1].format = VK_FORMAT_R32G32B32_SFLOAT; attributeDescriptions[1].offset = offsetof(Vertex, color); return attributeDescriptions; } }; size_t mesh_width = 0, mesh_height = 0; std::string execution_path; __global__ void sinewave_gen_kernel(Vertex* vertices, unsigned int width, unsigned int height, float time) { unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; // calculate uv coordinates float u = x / (float)width; float v = y / (float)height; u = u * 2.0f - 1.0f; v = v * 2.0f - 1.0f; // calculate simple sine wave pattern float freq = 4.0f; float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f; if (y < height && x < width) { // write output vertex vertices[y * width + x].pos[0] = u; vertices[y * width + x].pos[1] = w; vertices[y * width + x].pos[2] = v; vertices[y * width + x].pos[3] = 1.0f; vertices[y * width + x].color[0] = 1.0f; vertices[y * width + x].color[1] = 0.0f; vertices[y * width + x].color[2] = 0.0f; } } class vulkanCudaApp { public: void run() { initWindow(); initVulkan(); initCuda(); mainLoop(); cleanup(); } private: GLFWwindow* window; VkInstance instance; VkPhysicalDevice physicalDevice = VK_NULL_HANDLE; uint8_t vkDeviceUUID[VK_UUID_SIZE]; VkDevice device; VkQueue graphicsQueue; VkQueue presentQueue; VkSurfaceKHR surface; VkSwapchainKHR swapChain; std::vector swapChainImages; VkFormat swapChainImageFormat; VkExtent2D swapChainExtent; std::vector swapChainImageViews; VkDescriptorSetLayout descriptorSetLayout; VkDescriptorPool descriptorPool; VkDescriptorSet descriptorSet; VkPipelineLayout pipelineLayout; VkRenderPass renderPass; VkPipeline graphicsPipeline; std::vector swapChainFramebuffers; VkCommandPool commandPool; VkBuffer vertexBuffer; VkDeviceMemory vertexBufferMemory; VkBuffer uniformBuffer; VkDeviceMemory uniformBufferMemory; std::vector commandBuffers; VkSemaphore imageAvailableSemaphore; VkSemaphore renderFinishedSemaphore; VkSemaphore cudaUpdateVkVertexBufSemaphore; VkSemaphore vkUpdateCudaVertexBufSemaphore; size_t vertexBufSize = 0; bool startSubmit = 0; double AnimTime = 1.0f; VkDebugReportCallbackEXT callback; #ifdef _WIN64 PFN_vkGetMemoryWin32HandleKHR fpGetMemoryWin32HandleKHR; PFN_vkGetSemaphoreWin32HandleKHR fpGetSemaphoreWin32HandleKHR; #else PFN_vkGetMemoryFdKHR fpGetMemoryFdKHR; PFN_vkGetSemaphoreFdKHR fpGetSemaphoreFdKHR; #endif PFN_vkGetPhysicalDeviceProperties2 fpGetPhysicalDeviceProperties2; // CUDA stuff cudaExternalMemory_t cudaExtMemVertexBuffer; cudaExternalSemaphore_t cudaExtCudaUpdateVkVertexBufSemaphore; cudaExternalSemaphore_t cudaExtVkUpdateCudaVertexBufSemaphore; void* cudaDevVertptr = NULL; cudaStream_t streamToRun; bool checkValidationLayerSupport() { uint32_t layerCount; vkEnumerateInstanceLayerProperties(&layerCount, nullptr); std::vector availableLayers(layerCount); vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data()); for (const char* layerName : validationLayers) { bool layerFound = false; for (const auto& layerProperties : availableLayers) { if (strcmp(layerName, layerProperties.layerName) == 0) { layerFound = true; break; } } if (!layerFound) { return false; } } return true; } static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t obj, size_t location, int32_t code, const char* layerPrefix, const char* msg, void* userData) { std::cerr << "validation layer: " << msg << std::endl; return VK_FALSE; } VkResult CreateDebugReportCallbackEXT( VkInstance instance, const VkDebugReportCallbackCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugReportCallbackEXT* pCallback) { auto func = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr( instance, "vkCreateDebugReportCallbackEXT"); if (func != nullptr) { return func(instance, pCreateInfo, pAllocator, pCallback); } else { return VK_ERROR_EXTENSION_NOT_PRESENT; } } void DestroyDebugReportCallbackEXT(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks* pAllocator) { auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr( instance, "vkDestroyDebugReportCallbackEXT"); if (func != nullptr) { func(instance, callback, pAllocator); } } void setupDebugCallback() { if (!enableValidationLayers) return; VkDebugReportCallbackCreateInfoEXT createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; createInfo.pfnCallback = debugCallback; if (CreateDebugReportCallbackEXT(instance, &createInfo, nullptr, &callback) != VK_SUCCESS) { throw std::runtime_error("failed to set up debug callback!"); } } void initWindow() { glfwInit(); glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE); window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan-CUDA Interop Sinewave", nullptr, nullptr); } void createInstance() { if (enableValidationLayers && !checkValidationLayerSupport()) { throw std::runtime_error( "validation layers requested, but not available!"); } VkApplicationInfo appInfo = {}; appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; appInfo.pApplicationName = "Vulkan CUDA Sinewave"; appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); appInfo.pEngineName = "No Engine"; appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); appInfo.apiVersion = VK_API_VERSION_1_0; VkInstanceCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; createInfo.pApplicationInfo = &appInfo; uint32_t glfwExtensionCount = 0; const char** glfwExtensions; glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); std::vector enabledExtensionNameList; enabledExtensionNameList.push_back( VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); enabledExtensionNameList.push_back( VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); enabledExtensionNameList.push_back( VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME); for (int i = 0; i < glfwExtensionCount; i++) { enabledExtensionNameList.push_back(glfwExtensions[i]); } if (enableValidationLayers) { enabledExtensionNameList.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); createInfo.enabledLayerCount = static_cast(validationLayers.size()); createInfo.ppEnabledLayerNames = validationLayers.data(); } else { createInfo.enabledLayerCount = 0; } createInfo.enabledExtensionCount = enabledExtensionNameList.size(); createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data(); if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) { throw std::runtime_error("failed to create instance!"); } else { std::cout << "Instance created successfully!!\n"; } fpGetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr( instance, "vkGetPhysicalDeviceProperties2"); if (fpGetPhysicalDeviceProperties2 == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetPhysicalDeviceProperties2KHR\" not " "found.\n"); } #ifdef _WIN64 fpGetMemoryWin32HandleKHR = (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr( instance, "vkGetMemoryWin32HandleKHR"); if (fpGetMemoryWin32HandleKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetMemoryWin32HandleKHR\" not " "found.\n"); } #else fpGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr( instance, "vkGetMemoryFdKHR"); if (fpGetMemoryFdKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetMemoryFdKHR\" not found.\n"); } #endif } void initVulkan() { createInstance(); setupDebugCallback(); createSurface(); pickPhysicalDevice(); createLogicalDevice(); getKhrExtensionsFn(); createSwapChain(); createImageViews(); createRenderPass(); createDescriptorSetLayout(); createGraphicsPipeline(); createFramebuffers(); createCommandPool(); createVertexBuffer(); createUniformBuffer(); createDescriptorPool(); createDescriptorSet(); createCommandBuffers(); createSyncObjects(); createSyncObjectsExt(); } void initCuda() { setCudaVkDevice(); cudaVkImportVertexMem(); cudaInitVertexMem(); cudaVkImportSemaphore(); } void createSurface() { if (glfwCreateWindowSurface(instance, window, nullptr, &surface) != VK_SUCCESS) { throw std::runtime_error("failed to create window surface!"); } } void pickPhysicalDevice() { uint32_t deviceCount = 0; vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr); if (deviceCount == 0) { throw std::runtime_error("failed to find GPUs with Vulkan support!"); } std::vector devices(deviceCount); vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data()); for (const auto& device : devices) { if (isDeviceSuitable(device)) { physicalDevice = device; break; } } if (physicalDevice == VK_NULL_HANDLE) { throw std::runtime_error("failed to find a suitable GPU!"); } std::cout << "Selected physical device = " << physicalDevice << std::endl; VkPhysicalDeviceIDProperties vkPhysicalDeviceIDProperties = {}; vkPhysicalDeviceIDProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; vkPhysicalDeviceIDProperties.pNext = NULL; VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {}; vkPhysicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDProperties; fpGetPhysicalDeviceProperties2(physicalDevice, &vkPhysicalDeviceProperties2); memcpy(vkDeviceUUID, vkPhysicalDeviceIDProperties.deviceUUID, sizeof(vkDeviceUUID)); } int setCudaVkDevice() { int current_device = 0; int device_count = 0; int devices_prohibited = 0; cudaDeviceProp deviceProp; checkCudaErrors(cudaGetDeviceCount(&device_count)); if (device_count == 0) { fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); exit(EXIT_FAILURE); } // Find the GPU which is selected by Vulkan while (current_device < device_count) { cudaGetDeviceProperties(&deviceProp, current_device); if ((deviceProp.computeMode != cudaComputeModeProhibited)) { // Compare the cuda device UUID with vulkan UUID int ret = memcmp(&deviceProp.uuid, &vkDeviceUUID, VK_UUID_SIZE); if (ret == 0) { checkCudaErrors(cudaSetDevice(current_device)); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device)); printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", current_device, deviceProp.name, deviceProp.major, deviceProp.minor); return current_device; } } else { devices_prohibited++; } current_device++; } if (devices_prohibited == device_count) { fprintf(stderr, "CUDA error:" " No Vulkan-CUDA Interop capable GPU found.\n"); exit(EXIT_FAILURE); } return -1; } bool isDeviceSuitable(VkPhysicalDevice device) { QueueFamilyIndices indices = findQueueFamilies(device); bool extensionsSupported = checkDeviceExtensionSupport(device); bool swapChainAdequate = false; if (extensionsSupported) { SwapChainSupportDetails swapChainSupport = querySwapChainSupport(device); swapChainAdequate = !swapChainSupport.formats.empty() && !swapChainSupport.presentModes.empty(); } return indices.isComplete() && extensionsSupported && swapChainAdequate; } bool checkDeviceExtensionSupport(VkPhysicalDevice device) { uint32_t extensionCount; vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, nullptr); std::vector availableExtensions(extensionCount); vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, availableExtensions.data()); std::set requiredExtensions(deviceExtensions.begin(), deviceExtensions.end()); for (const auto& extension : availableExtensions) { requiredExtensions.erase(extension.extensionName); } return requiredExtensions.empty(); } QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) { QueueFamilyIndices indices; uint32_t queueFamilyCount = 0; vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, nullptr); std::vector queueFamilies(queueFamilyCount); vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, queueFamilies.data()); int i = 0; for (const auto& queueFamily : queueFamilies) { if (queueFamily.queueCount > 0 && queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) { indices.graphicsFamily = i; } VkBool32 presentSupport = false; vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &presentSupport); if (queueFamily.queueCount > 0 && presentSupport) { indices.presentFamily = i; } if (indices.isComplete()) { break; } i++; } return indices; } SwapChainSupportDetails querySwapChainSupport(VkPhysicalDevice device) { SwapChainSupportDetails details; vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface, &details.capabilities); uint32_t formatCount; vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, nullptr); if (formatCount != 0) { details.formats.resize(formatCount); vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, details.formats.data()); } uint32_t presentModeCount; vkGetPhysicalDeviceSurfacePresentModesKHR(device, surface, &presentModeCount, nullptr); if (presentModeCount != 0) { details.presentModes.resize(presentModeCount); vkGetPhysicalDeviceSurfacePresentModesKHR( device, surface, &presentModeCount, details.presentModes.data()); } return details; } VkSurfaceFormatKHR chooseSwapSurfaceFormat( const std::vector& availableFormats) { if (availableFormats.size() == 1 && availableFormats[0].format == VK_FORMAT_UNDEFINED) { return {VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; } for (const auto& availableFormat : availableFormats) { if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM && availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { return availableFormat; } } return availableFormats[0]; } VkPresentModeKHR chooseSwapPresentMode( const std::vector availablePresentModes) { VkPresentModeKHR bestMode = VK_PRESENT_MODE_FIFO_KHR; for (const auto& availablePresentMode : availablePresentModes) { if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) { return availablePresentMode; } else if (availablePresentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) { bestMode = availablePresentMode; } } return bestMode; } VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities) { if (capabilities.currentExtent.width != std::numeric_limits::max()) { return capabilities.currentExtent; } else { VkExtent2D actualExtent = {WIDTH, HEIGHT}; actualExtent.width = std::max( capabilities.minImageExtent.width, std::min(capabilities.maxImageExtent.width, actualExtent.width)); actualExtent.height = std::max( capabilities.minImageExtent.height, std::min(capabilities.maxImageExtent.height, actualExtent.height)); return actualExtent; } } void createLogicalDevice() { QueueFamilyIndices indices = findQueueFamilies(physicalDevice); std::vector queueCreateInfos; std::set uniqueQueueFamilies = {indices.graphicsFamily, indices.presentFamily}; float queuePriority = 1.0f; for (int queueFamily : uniqueQueueFamilies) { VkDeviceQueueCreateInfo queueCreateInfo = {}; queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queueCreateInfo.queueFamilyIndex = queueFamily; queueCreateInfo.queueCount = 1; queueCreateInfo.pQueuePriorities = &queuePriority; queueCreateInfos.push_back(queueCreateInfo); } VkPhysicalDeviceFeatures deviceFeatures = {}; VkDeviceCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; createInfo.pQueueCreateInfos = queueCreateInfos.data(); createInfo.queueCreateInfoCount = queueCreateInfos.size(); createInfo.pEnabledFeatures = &deviceFeatures; std::vector enabledExtensionNameList; for (int i = 0; i < deviceExtensions.size(); i++) { enabledExtensionNameList.push_back(deviceExtensions[i]); } if (enableValidationLayers) { createInfo.enabledLayerCount = static_cast(validationLayers.size()); createInfo.ppEnabledLayerNames = validationLayers.data(); } else { createInfo.enabledLayerCount = 0; } createInfo.enabledExtensionCount = static_cast(enabledExtensionNameList.size()); createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data(); if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) != VK_SUCCESS) { throw std::runtime_error("failed to create logical device!"); } vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphicsQueue); vkGetDeviceQueue(device, indices.presentFamily, 0, &presentQueue); } void createSwapChain() { SwapChainSupportDetails swapChainSupport = querySwapChainSupport(physicalDevice); VkSurfaceFormatKHR surfaceFormat = chooseSwapSurfaceFormat(swapChainSupport.formats); VkPresentModeKHR presentMode = chooseSwapPresentMode(swapChainSupport.presentModes); VkExtent2D extent = chooseSwapExtent(swapChainSupport.capabilities); uint32_t imageCount = swapChainSupport.capabilities.minImageCount + 1; if (swapChainSupport.capabilities.maxImageCount > 0 && imageCount > swapChainSupport.capabilities.maxImageCount) { imageCount = swapChainSupport.capabilities.maxImageCount; } VkSwapchainCreateInfoKHR createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; createInfo.surface = surface; createInfo.minImageCount = imageCount; createInfo.imageFormat = surfaceFormat.format; createInfo.imageColorSpace = surfaceFormat.colorSpace; createInfo.imageExtent = extent; createInfo.imageArrayLayers = 1; createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; QueueFamilyIndices indices = findQueueFamilies(physicalDevice); uint32_t queueFamilyIndices[] = {(uint32_t)indices.graphicsFamily, (uint32_t)indices.presentFamily}; if (indices.graphicsFamily != indices.presentFamily) { createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT; createInfo.queueFamilyIndexCount = 2; createInfo.pQueueFamilyIndices = queueFamilyIndices; } else { createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; createInfo.queueFamilyIndexCount = 0; // Optional createInfo.pQueueFamilyIndices = nullptr; // Optional } createInfo.preTransform = swapChainSupport.capabilities.currentTransform; createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; createInfo.presentMode = presentMode; createInfo.clipped = VK_TRUE; createInfo.oldSwapchain = VK_NULL_HANDLE; if (vkCreateSwapchainKHR(device, &createInfo, nullptr, &swapChain) != VK_SUCCESS) { throw std::runtime_error("failed to create swap chain!"); } else { std::cout << "Swapchain created!!\n"; } vkGetSwapchainImagesKHR(device, swapChain, &imageCount, nullptr); swapChainImages.resize(imageCount); vkGetSwapchainImagesKHR(device, swapChain, &imageCount, swapChainImages.data()); swapChainImageFormat = surfaceFormat.format; swapChainExtent = extent; } void createImageViews() { swapChainImageViews.resize(swapChainImages.size()); for (size_t i = 0; i < swapChainImages.size(); i++) { VkImageViewCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; createInfo.image = swapChainImages[i]; createInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; createInfo.format = swapChainImageFormat; createInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; createInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; createInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; createInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; createInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; createInfo.subresourceRange.baseMipLevel = 0; createInfo.subresourceRange.levelCount = 1; createInfo.subresourceRange.baseArrayLayer = 0; createInfo.subresourceRange.layerCount = 1; if (vkCreateImageView(device, &createInfo, nullptr, &swapChainImageViews[i]) != VK_SUCCESS) { throw std::runtime_error("failed to create image views!"); } } } void createDescriptorSetLayout() { VkDescriptorSetLayoutBinding uboLayoutBinding = {}; uboLayoutBinding.binding = 0; uboLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; uboLayoutBinding.descriptorCount = 1; uboLayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; uboLayoutBinding.pImmutableSamplers = nullptr; // Optional VkDescriptorSetLayoutCreateInfo layoutInfo = {}; layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; layoutInfo.bindingCount = 1; layoutInfo.pBindings = &uboLayoutBinding; if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &descriptorSetLayout) != VK_SUCCESS) { throw std::runtime_error("failed to create descriptor set layout!"); } } void createGraphicsPipeline() { auto vertShaderCode = readFile("vert.spv"); auto fragShaderCode = readFile("frag.spv"); VkShaderModule vertShaderModule; VkShaderModule fragShaderModule; vertShaderModule = createShaderModule(vertShaderCode); fragShaderModule = createShaderModule(fragShaderCode); VkPipelineShaderStageCreateInfo vertShaderStageInfo = {}; vertShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT; vertShaderStageInfo.module = vertShaderModule; vertShaderStageInfo.pName = "main"; VkPipelineShaderStageCreateInfo fragShaderStageInfo = {}; fragShaderStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT; fragShaderStageInfo.module = fragShaderModule; fragShaderStageInfo.pName = "main"; VkPipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; VkPipelineVertexInputStateCreateInfo vertexInputInfo = {}; vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; auto bindingDescription = Vertex::getBindingDescription(); auto attributeDescriptions = Vertex::getAttributeDescriptions(); vertexInputInfo.vertexBindingDescriptionCount = 1; vertexInputInfo.pVertexBindingDescriptions = &bindingDescription; vertexInputInfo.vertexAttributeDescriptionCount = static_cast(attributeDescriptions.size()); vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data(); VkPipelineInputAssemblyStateCreateInfo inputAssembly = {}; inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; inputAssembly.primitiveRestartEnable = VK_FALSE; VkViewport viewport = {}; viewport.x = 0.0f; viewport.y = 0.0f; viewport.width = (float)swapChainExtent.width; viewport.height = (float)swapChainExtent.height; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; VkRect2D scissor = {}; scissor.offset = {0, 0}; scissor.extent = swapChainExtent; VkPipelineViewportStateCreateInfo viewportState = {}; viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; viewportState.viewportCount = 1; viewportState.pViewports = &viewport; viewportState.scissorCount = 1; viewportState.pScissors = &scissor; VkPipelineRasterizationStateCreateInfo rasterizer = {}; rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterizer.depthClampEnable = VK_FALSE; rasterizer.rasterizerDiscardEnable = VK_FALSE; rasterizer.polygonMode = VK_POLYGON_MODE_FILL; rasterizer.lineWidth = 1.0f; rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; rasterizer.depthBiasEnable = VK_FALSE; rasterizer.depthBiasConstantFactor = 0.0f; // Optional rasterizer.depthBiasClamp = 0.0f; // Optional rasterizer.depthBiasSlopeFactor = 0.0f; // Optional VkPipelineMultisampleStateCreateInfo multisampling = {}; multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; multisampling.sampleShadingEnable = VK_FALSE; multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; multisampling.minSampleShading = 1.0f; // Optional multisampling.pSampleMask = nullptr; // Optional multisampling.alphaToCoverageEnable = VK_FALSE; // Optional multisampling.alphaToOneEnable = VK_FALSE; // Optional VkPipelineColorBlendAttachmentState colorBlendAttachment = {}; colorBlendAttachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; colorBlendAttachment.blendEnable = VK_FALSE; colorBlendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; // Optional colorBlendAttachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; // Optional colorBlendAttachment.colorBlendOp = VK_BLEND_OP_ADD; // Optional colorBlendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; // Optional colorBlendAttachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; // Optional colorBlendAttachment.alphaBlendOp = VK_BLEND_OP_ADD; // Optional VkPipelineColorBlendStateCreateInfo colorBlending = {}; colorBlending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; colorBlending.logicOpEnable = VK_FALSE; colorBlending.logicOp = VK_LOGIC_OP_COPY; // Optional colorBlending.attachmentCount = 1; colorBlending.pAttachments = &colorBlendAttachment; colorBlending.blendConstants[0] = 0.0f; // Optional colorBlending.blendConstants[1] = 0.0f; // Optional colorBlending.blendConstants[2] = 0.0f; // Optional colorBlending.blendConstants[3] = 0.0f; // Optional #if 0 VkDynamicState dynamicStates[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_LINE_WIDTH }; VkPipelineDynamicStateCreateInfo dynamicState = {}; dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; dynamicState.dynamicStateCount = 2; dynamicState.pDynamicStates = dynamicStates; #endif VkPipelineLayoutCreateInfo pipelineLayoutInfo = {}; pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipelineLayoutInfo.setLayoutCount = 1; // Optional pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout; // Optional pipelineLayoutInfo.pushConstantRangeCount = 0; // Optional pipelineLayoutInfo.pPushConstantRanges = nullptr; // Optional if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr, &pipelineLayout) != VK_SUCCESS) { throw std::runtime_error("failed to create pipeline layout!"); } VkGraphicsPipelineCreateInfo pipelineInfo = {}; pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; pipelineInfo.stageCount = 2; pipelineInfo.pStages = shaderStages; pipelineInfo.pVertexInputState = &vertexInputInfo; pipelineInfo.pInputAssemblyState = &inputAssembly; pipelineInfo.pViewportState = &viewportState; pipelineInfo.pRasterizationState = &rasterizer; pipelineInfo.pMultisampleState = &multisampling; pipelineInfo.pDepthStencilState = nullptr; // Optional pipelineInfo.pColorBlendState = &colorBlending; pipelineInfo.pDynamicState = nullptr; // Optional pipelineInfo.layout = pipelineLayout; pipelineInfo.renderPass = renderPass; pipelineInfo.subpass = 0; pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; // Optional pipelineInfo.basePipelineIndex = -1; // Optional if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &graphicsPipeline) != VK_SUCCESS) { throw std::runtime_error("failed to create graphics pipeline!"); } else { std::cout << "Pipeline created successfully!!\n"; } vkDestroyShaderModule(device, fragShaderModule, nullptr); vkDestroyShaderModule(device, vertShaderModule, nullptr); } void createRenderPass() { VkAttachmentDescription colorAttachment = {}; colorAttachment.format = swapChainImageFormat; colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT; colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; VkAttachmentReference colorAttachmentRef = {}; colorAttachmentRef.attachment = 0; colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass = {}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorAttachmentRef; VkRenderPassCreateInfo renderPassInfo = {}; renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; renderPassInfo.attachmentCount = 1; renderPassInfo.pAttachments = &colorAttachment; renderPassInfo.subpassCount = 1; renderPassInfo.pSubpasses = &subpass; VkSubpassDependency dependency = {}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dependency.srcAccessMask = 0; dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; renderPassInfo.dependencyCount = 1; renderPassInfo.pDependencies = &dependency; if (vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass) != VK_SUCCESS) { throw std::runtime_error("failed to create render pass!"); } } void createFramebuffers() { swapChainFramebuffers.resize(swapChainImageViews.size()); for (size_t i = 0; i < swapChainImageViews.size(); i++) { VkImageView attachments[] = {swapChainImageViews[i]}; VkFramebufferCreateInfo framebufferInfo = {}; framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebufferInfo.renderPass = renderPass; framebufferInfo.attachmentCount = 1; framebufferInfo.pAttachments = attachments; framebufferInfo.width = swapChainExtent.width; framebufferInfo.height = swapChainExtent.height; framebufferInfo.layers = 1; if (vkCreateFramebuffer(device, &framebufferInfo, nullptr, &swapChainFramebuffers[i]) != VK_SUCCESS) { throw std::runtime_error("failed to create framebuffer!"); } } } void createCommandPool() { QueueFamilyIndices queueFamilyIndices = findQueueFamilies(physicalDevice); VkCommandPoolCreateInfo poolInfo = {}; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily; poolInfo.flags = 0; // Optional if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) { throw std::runtime_error("failed to create command pool!"); } } void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& bufferMemory) { VkBufferCreateInfo bufferInfo = {}; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.size = size; bufferInfo.usage = usage; bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) { throw std::runtime_error("failed to create buffer!"); } VkMemoryRequirements memRequirements; vkGetBufferMemoryRequirements(device, buffer, &memRequirements); VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) { throw std::runtime_error("failed to allocate buffer memory!"); } vkBindBufferMemory(device, buffer, bufferMemory, 0); } void createBufferExtMem(VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkExternalMemoryHandleTypeFlagsKHR extMemHandleType, VkBuffer& buffer, VkDeviceMemory& bufferMemory) { VkBufferCreateInfo bufferInfo = {}; bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; bufferInfo.size = size; bufferInfo.usage = usage; bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) { throw std::runtime_error("failed to create buffer!"); } VkMemoryRequirements memRequirements; vkGetBufferMemoryRequirements(device, buffer, &memRequirements); #ifdef _WIN64 WindowsSecurityAttributes winSecurityAttributes; VkExportMemoryWin32HandleInfoKHR vulkanExportMemoryWin32HandleInfoKHR = {}; vulkanExportMemoryWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; vulkanExportMemoryWin32HandleInfoKHR.pNext = NULL; vulkanExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; vulkanExportMemoryWin32HandleInfoKHR.dwAccess = DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; vulkanExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)NULL; #endif VkExportMemoryAllocateInfoKHR vulkanExportMemoryAllocateInfoKHR = {}; vulkanExportMemoryAllocateInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; #ifdef _WIN64 vulkanExportMemoryAllocateInfoKHR.pNext = extMemHandleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR ? &vulkanExportMemoryWin32HandleInfoKHR : NULL; vulkanExportMemoryAllocateInfoKHR.handleTypes = extMemHandleType; #else vulkanExportMemoryAllocateInfoKHR.pNext = NULL; vulkanExportMemoryAllocateInfoKHR.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; #endif VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.pNext = &vulkanExportMemoryAllocateInfoKHR; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties); if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) != VK_SUCCESS) { throw std::runtime_error("failed to allocate external buffer memory!"); } vkBindBufferMemory(device, buffer, bufferMemory, 0); } void createVertexBuffer() { mesh_width = swapChainExtent.width / 2; mesh_height = swapChainExtent.height / 2; vertexBufSize = mesh_height * mesh_width; VkDeviceSize bufferSize = sizeof(Vertex) * vertexBufSize; #ifdef _WIN64 if (IsWindows8OrGreater()) { createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, vertexBuffer, vertexBufferMemory); } else { createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, vertexBuffer, vertexBufferMemory); } #else createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, vertexBuffer, vertexBufferMemory); #endif } void cudaInitVertexMem() { checkCudaErrors(cudaStreamCreate(&streamToRun)); dim3 block(16, 16, 1); dim3 grid(mesh_width / 16, mesh_height / 16, 1); Vertex* vertices = (Vertex*)cudaDevVertptr; sinewave_gen_kernel<<>>(vertices, mesh_width, mesh_height, 1.0); checkCudaErrors(cudaStreamSynchronize(streamToRun)); } void createUniformBuffer() { VkDeviceSize bufferSize = sizeof(UniformBufferObject); createBuffer(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, uniformBuffer, uniformBufferMemory); } uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) { VkPhysicalDeviceMemoryProperties memProperties; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties); for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { if (typeFilter & (1 << i) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) { return i; } } throw std::runtime_error("failed to find suitable memory type!"); } void getKhrExtensionsFn() { #ifdef _WIN64 fpGetSemaphoreWin32HandleKHR = (PFN_vkGetSemaphoreWin32HandleKHR)vkGetDeviceProcAddr( device, "vkGetSemaphoreWin32HandleKHR"); if (fpGetSemaphoreWin32HandleKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetSemaphoreWin32HandleKHR\" not " "found.\n"); } #else fpGetSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetDeviceProcAddr( device, "vkGetSemaphoreFdKHR"); if (fpGetSemaphoreFdKHR == NULL) { throw std::runtime_error( "Vulkan: Proc address for \"vkGetSemaphoreFdKHR\" not found.\n"); } #endif } void createCommandBuffers() { commandBuffers.resize(swapChainFramebuffers.size()); VkCommandBufferAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = commandPool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = (uint32_t)commandBuffers.size(); if (vkAllocateCommandBuffers(device, &allocInfo, commandBuffers.data()) != VK_SUCCESS) { throw std::runtime_error("failed to allocate command buffers!"); } for (size_t i = 0; i < commandBuffers.size(); i++) { VkCommandBufferBeginInfo beginInfo = {}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; beginInfo.pInheritanceInfo = nullptr; // Optional if (vkBeginCommandBuffer(commandBuffers[i], &beginInfo) != VK_SUCCESS) { throw std::runtime_error("failed to begin recording command buffer!"); } VkRenderPassBeginInfo renderPassInfo = {}; renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; renderPassInfo.renderPass = renderPass; renderPassInfo.framebuffer = swapChainFramebuffers[i]; renderPassInfo.renderArea.offset = {0, 0}; renderPassInfo.renderArea.extent = swapChainExtent; VkClearValue clearColor = {0.0f, 0.0f, 0.0f, 1.0f}; renderPassInfo.clearValueCount = 1; renderPassInfo.pClearValues = &clearColor; vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE); vkCmdBindPipeline(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline); VkBuffer vertexBuffers[] = {vertexBuffer}; VkDeviceSize offsets[] = {0}; vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets); vkCmdBindDescriptorSets(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); vkCmdDraw(commandBuffers[i], static_cast(vertexBufSize), 1, 0, 0); vkCmdEndRenderPass(commandBuffers[i]); if (vkEndCommandBuffer(commandBuffers[i]) != VK_SUCCESS) { throw std::runtime_error("failed to record command buffer!"); } } } VkShaderModule createShaderModule(const std::vector& code) { VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; createInfo.codeSize = code.size(); createInfo.pCode = reinterpret_cast(code.data()); VkShaderModule shaderModule; if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) != VK_SUCCESS) { throw std::runtime_error("failed to create shader module!"); } return shaderModule; } static std::vector readFile(const std::string& filename) { char* file_path = sdkFindFilePath(filename.c_str(), execution_path.c_str()); std::ifstream file(file_path, std::ios::ate | std::ios::binary); if (!file.is_open()) { throw std::runtime_error("failed to open shader spv file!\n"); } size_t fileSize = (size_t)file.tellg(); std::vector buffer(fileSize); file.seekg(0); file.read(buffer.data(), fileSize); file.close(); return buffer; } void mainLoop() { updateUniformBuffer(); while (!glfwWindowShouldClose(window)) { glfwPollEvents(); drawFrame(); } vkDeviceWaitIdle(device); } void updateUniformBuffer() { UniformBufferObject ubo = {}; mat4x4_identity(ubo.model); mat4x4 Model; mat4x4_dup(Model, ubo.model); mat4x4_rotate(ubo.model, Model, 1.0f, 0.0f, 1.0f, degreesToRadians(45.0f)); vec3 eye = {2.0f, 2.0f, 2.0f}; vec3 center = {0.0f, 0.0f, 0.0f}; vec3 up = {0.0f, 0.0f, 1.0f}; mat4x4_look_at(ubo.view, eye, center, up); mat4x4_perspective(ubo.proj, degreesToRadians(45.0f), swapChainExtent.width / (float)swapChainExtent.height, 0.1f, 10.0f); ubo.proj[1][1] *= -1; void* data; vkMapMemory(device, uniformBufferMemory, 0, sizeof(ubo), 0, &data); memcpy(data, &ubo, sizeof(ubo)); vkUnmapMemory(device, uniformBufferMemory); } void createDescriptorPool() { VkDescriptorPoolSize poolSize = {}; poolSize.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; poolSize.descriptorCount = 1; VkDescriptorPoolCreateInfo poolInfo = {}; poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; poolInfo.poolSizeCount = 1; poolInfo.pPoolSizes = &poolSize; poolInfo.maxSets = 1; if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptorPool) != VK_SUCCESS) { throw std::runtime_error("failed to create descriptor pool!"); } } void createDescriptorSet() { VkDescriptorSetLayout layouts[] = {descriptorSetLayout}; VkDescriptorSetAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; allocInfo.descriptorPool = descriptorPool; allocInfo.descriptorSetCount = 1; allocInfo.pSetLayouts = layouts; if (vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet) != VK_SUCCESS) { throw std::runtime_error("failed to allocate descriptor set!"); } VkDescriptorBufferInfo bufferInfo = {}; bufferInfo.buffer = uniformBuffer; bufferInfo.offset = 0; bufferInfo.range = sizeof(UniformBufferObject); VkWriteDescriptorSet descriptorWrite = {}; descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; descriptorWrite.dstSet = descriptorSet; descriptorWrite.dstBinding = 0; descriptorWrite.dstArrayElement = 0; descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptorWrite.descriptorCount = 1; descriptorWrite.pBufferInfo = &bufferInfo; descriptorWrite.pImageInfo = nullptr; // Optional descriptorWrite.pTexelBufferView = nullptr; // Optional vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr); } void drawFrame() { uint32_t imageIndex; vkAcquireNextImageKHR(device, swapChain, std::numeric_limits::max(), imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex); if (!startSubmit) { submitVulkan(imageIndex); startSubmit = 1; } else { submitVulkanCuda(imageIndex); } VkPresentInfoKHR presentInfo = {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; VkSemaphore signalSemaphores[] = {renderFinishedSemaphore}; presentInfo.waitSemaphoreCount = 1; presentInfo.pWaitSemaphores = signalSemaphores; VkSwapchainKHR swapChains[] = {swapChain}; presentInfo.swapchainCount = 1; presentInfo.pSwapchains = swapChains; presentInfo.pImageIndices = &imageIndex; presentInfo.pResults = nullptr; // Optional vkQueuePresentKHR(presentQueue, &presentInfo); cudaUpdateVertexBuffer(); // Added sleep of 5 millisecs so that CPU does not submit too much work to // GPU std::this_thread::sleep_for(std::chrono::microseconds(5000)); } void submitVulkan(uint32_t imageIndex) { VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; VkSemaphore waitSemaphores[] = {imageAvailableSemaphore}; VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; submitInfo.waitSemaphoreCount = 1; submitInfo.pWaitSemaphores = waitSemaphores; submitInfo.pWaitDstStageMask = waitStages; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffers[imageIndex]; VkSemaphore signalSemaphores[] = {renderFinishedSemaphore, vkUpdateCudaVertexBufSemaphore}; submitInfo.signalSemaphoreCount = 2; submitInfo.pSignalSemaphores = signalSemaphores; if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE) != VK_SUCCESS) { throw std::runtime_error("failed to submit draw command buffer!"); } } void submitVulkanCuda(uint32_t imageIndex) { VkSubmitInfo submitInfo = {}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; VkSemaphore waitSemaphores[] = {imageAvailableSemaphore, cudaUpdateVkVertexBufSemaphore}; VkPipelineStageFlags waitStages[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT}; submitInfo.waitSemaphoreCount = 2; submitInfo.pWaitSemaphores = waitSemaphores; submitInfo.pWaitDstStageMask = waitStages; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffers[imageIndex]; VkSemaphore signalSemaphores[] = {renderFinishedSemaphore, vkUpdateCudaVertexBufSemaphore}; submitInfo.signalSemaphoreCount = 2; submitInfo.pSignalSemaphores = signalSemaphores; if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE) != VK_SUCCESS) { throw std::runtime_error("failed to submit draw command buffer!"); } } void createSyncObjects() { VkSemaphoreCreateInfo semaphoreInfo = {}; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; if (vkCreateSemaphore(device, &semaphoreInfo, nullptr, &imageAvailableSemaphore) != VK_SUCCESS || vkCreateSemaphore(device, &semaphoreInfo, nullptr, &renderFinishedSemaphore) != VK_SUCCESS) { throw std::runtime_error( "failed to create synchronization objects for a frame!"); } } void createSyncObjectsExt() { VkSemaphoreCreateInfo semaphoreInfo = {}; semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; memset(&semaphoreInfo, 0, sizeof(semaphoreInfo)); semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; #ifdef _WIN64 WindowsSecurityAttributes winSecurityAttributes; VkExportSemaphoreWin32HandleInfoKHR vulkanExportSemaphoreWin32HandleInfoKHR = {}; vulkanExportSemaphoreWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR; vulkanExportSemaphoreWin32HandleInfoKHR.pNext = NULL; vulkanExportSemaphoreWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; vulkanExportSemaphoreWin32HandleInfoKHR.dwAccess = DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; vulkanExportSemaphoreWin32HandleInfoKHR.name = (LPCWSTR)NULL; #endif VkExportSemaphoreCreateInfoKHR vulkanExportSemaphoreCreateInfo = {}; vulkanExportSemaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR; #ifdef _WIN64 vulkanExportSemaphoreCreateInfo.pNext = IsWindows8OrGreater() ? &vulkanExportSemaphoreWin32HandleInfoKHR : NULL; vulkanExportSemaphoreCreateInfo.handleTypes = IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; #else vulkanExportSemaphoreCreateInfo.pNext = NULL; vulkanExportSemaphoreCreateInfo.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; #endif semaphoreInfo.pNext = &vulkanExportSemaphoreCreateInfo; if (vkCreateSemaphore(device, &semaphoreInfo, nullptr, &cudaUpdateVkVertexBufSemaphore) != VK_SUCCESS || vkCreateSemaphore(device, &semaphoreInfo, nullptr, &vkUpdateCudaVertexBufSemaphore) != VK_SUCCESS) { throw std::runtime_error( "failed to create synchronization objects for a CUDA-Vulkan!"); } } void cudaVkImportVertexMem() { cudaExternalMemoryHandleDesc cudaExtMemHandleDesc; memset(&cudaExtMemHandleDesc, 0, sizeof(cudaExtMemHandleDesc)); #ifdef _WIN64 cudaExtMemHandleDesc.type = IsWindows8OrGreater() ? cudaExternalMemoryHandleTypeOpaqueWin32 : cudaExternalMemoryHandleTypeOpaqueWin32Kmt; cudaExtMemHandleDesc.handle.win32.handle = getVkMemHandle( IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); #else cudaExtMemHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueFd; cudaExtMemHandleDesc.handle.fd = getVkMemHandle(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); #endif cudaExtMemHandleDesc.size = sizeof(Vertex) * vertexBufSize; checkCudaErrors(cudaImportExternalMemory(&cudaExtMemVertexBuffer, &cudaExtMemHandleDesc)); cudaExternalMemoryBufferDesc cudaExtBufferDesc; cudaExtBufferDesc.offset = 0; cudaExtBufferDesc.size = sizeof(Vertex) * vertexBufSize; cudaExtBufferDesc.flags = 0; checkCudaErrors(cudaExternalMemoryGetMappedBuffer( &cudaDevVertptr, cudaExtMemVertexBuffer, &cudaExtBufferDesc)); printf("CUDA Imported Vulkan vertex buffer\n"); } void cudaVkImportSemaphore() { cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc; memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); #ifdef _WIN64 externalSemaphoreHandleDesc.type = IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32 : cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt; externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle( IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, cudaUpdateVkVertexBufSemaphore); #else externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd; externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle(VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, cudaUpdateVkVertexBufSemaphore); #endif externalSemaphoreHandleDesc.flags = 0; checkCudaErrors(cudaImportExternalSemaphore( &cudaExtCudaUpdateVkVertexBufSemaphore, &externalSemaphoreHandleDesc)); memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); #ifdef _WIN64 externalSemaphoreHandleDesc.type = IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32 : cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt; ; externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle( IsWindows8OrGreater() ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, vkUpdateCudaVertexBufSemaphore); #else externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd; externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle(VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, vkUpdateCudaVertexBufSemaphore); #endif externalSemaphoreHandleDesc.flags = 0; checkCudaErrors(cudaImportExternalSemaphore( &cudaExtVkUpdateCudaVertexBufSemaphore, &externalSemaphoreHandleDesc)); printf("CUDA Imported Vulkan semaphore\n"); } #ifdef _WIN64 // For windows HANDLE getVkMemHandle( VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) { HANDLE handle; VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {}; vkMemoryGetWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; vkMemoryGetWin32HandleInfoKHR.pNext = NULL; vkMemoryGetWin32HandleInfoKHR.memory = vertexBufferMemory; vkMemoryGetWin32HandleInfoKHR.handleType = (VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType; fpGetMemoryWin32HandleKHR(device, &vkMemoryGetWin32HandleInfoKHR, &handle); return handle; } #else int getVkMemHandle( VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) { if (externalMemoryHandleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT) { int fd; VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {}; vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; vkMemoryGetFdInfoKHR.pNext = NULL; vkMemoryGetFdInfoKHR.memory = vertexBufferMemory; vkMemoryGetFdInfoKHR.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; fpGetMemoryFdKHR(device, &vkMemoryGetFdInfoKHR, &fd); return fd; } return -1; } #endif #ifdef _WIN64 HANDLE getVkSemaphoreHandle( VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType, VkSemaphore& semVkCuda) { HANDLE handle; VkSemaphoreGetWin32HandleInfoKHR vulkanSemaphoreGetWin32HandleInfoKHR = {}; vulkanSemaphoreGetWin32HandleInfoKHR.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR; vulkanSemaphoreGetWin32HandleInfoKHR.pNext = NULL; vulkanSemaphoreGetWin32HandleInfoKHR.semaphore = semVkCuda; vulkanSemaphoreGetWin32HandleInfoKHR.handleType = externalSemaphoreHandleType; fpGetSemaphoreWin32HandleKHR(device, &vulkanSemaphoreGetWin32HandleInfoKHR, &handle); return handle; } #else int getVkSemaphoreHandle( VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType, VkSemaphore& semVkCuda) { if (externalSemaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { int fd; VkSemaphoreGetFdInfoKHR vulkanSemaphoreGetFdInfoKHR = {}; vulkanSemaphoreGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; vulkanSemaphoreGetFdInfoKHR.pNext = NULL; vulkanSemaphoreGetFdInfoKHR.semaphore = semVkCuda; vulkanSemaphoreGetFdInfoKHR.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; fpGetSemaphoreFdKHR(device, &vulkanSemaphoreGetFdInfoKHR, &fd); return fd; } return -1; } #endif void cudaVkSemaphoreSignal(cudaExternalSemaphore_t& extSemaphore) { cudaExternalSemaphoreSignalParams extSemaphoreSignalParams; memset(&extSemaphoreSignalParams, 0, sizeof(extSemaphoreSignalParams)); extSemaphoreSignalParams.params.fence.value = 0; extSemaphoreSignalParams.flags = 0; checkCudaErrors(cudaSignalExternalSemaphoresAsync( &extSemaphore, &extSemaphoreSignalParams, 1, streamToRun)); } void cudaVkSemaphoreWait(cudaExternalSemaphore_t& extSemaphore) { cudaExternalSemaphoreWaitParams extSemaphoreWaitParams; memset(&extSemaphoreWaitParams, 0, sizeof(extSemaphoreWaitParams)); extSemaphoreWaitParams.params.fence.value = 0; extSemaphoreWaitParams.flags = 0; checkCudaErrors(cudaWaitExternalSemaphoresAsync( &extSemaphore, &extSemaphoreWaitParams, 1, streamToRun)); } void cudaUpdateVertexBuffer() { cudaVkSemaphoreWait(cudaExtVkUpdateCudaVertexBufSemaphore); dim3 block(16, 16, 1); dim3 grid(mesh_width / block.x, mesh_height / block.y, 1); Vertex* pos = (Vertex*)cudaDevVertptr; AnimTime += 0.01f; sinewave_gen_kernel<<>>(pos, mesh_width, mesh_height, AnimTime); cudaVkSemaphoreSignal(cudaExtCudaUpdateVkVertexBufSemaphore); } void cleanup() { if (enableValidationLayers) { DestroyDebugReportCallbackEXT(instance, callback, nullptr); } vkDestroySemaphore(device, renderFinishedSemaphore, nullptr); vkDestroySemaphore(device, imageAvailableSemaphore, nullptr); checkCudaErrors( cudaDestroyExternalSemaphore(cudaExtCudaUpdateVkVertexBufSemaphore)); vkDestroySemaphore(device, cudaUpdateVkVertexBufSemaphore, nullptr); checkCudaErrors( cudaDestroyExternalSemaphore(cudaExtVkUpdateCudaVertexBufSemaphore)); vkDestroySemaphore(device, vkUpdateCudaVertexBufSemaphore, nullptr); vkDestroyCommandPool(device, commandPool, nullptr); for (auto framebuffer : swapChainFramebuffers) { vkDestroyFramebuffer(device, framebuffer, nullptr); } for (auto imageView : swapChainImageViews) { vkDestroyImageView(device, imageView, nullptr); } vkDestroyPipeline(device, graphicsPipeline, nullptr); vkDestroyPipelineLayout(device, pipelineLayout, nullptr); vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr); vkDestroyBuffer(device, uniformBuffer, nullptr); vkFreeMemory(device, uniformBufferMemory, nullptr); vkDestroyRenderPass(device, renderPass, nullptr); vkDestroySwapchainKHR(device, swapChain, nullptr); checkCudaErrors(cudaDestroyExternalMemory(cudaExtMemVertexBuffer)); vkDestroyBuffer(device, vertexBuffer, nullptr); vkFreeMemory(device, vertexBufferMemory, nullptr); vkDestroyDescriptorPool(device, descriptorPool, nullptr); vkDestroyDevice(device, nullptr); vkDestroySurfaceKHR(instance, surface, nullptr); vkDestroyInstance(instance, nullptr); glfwDestroyWindow(window); glfwTerminate(); } }; int main(int argc, char* argv[]) { execution_path = argv[0]; vulkanCudaApp app; try { app.run(); } catch (const std::runtime_error& e) { std::cerr << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }