cuda-samples/Samples/simpleVulkan/vulkanCUDASinewave.cu
2018-08-24 22:35:15 +05:30

1863 lines
68 KiB
Plaintext

/* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define GLFW_INCLUDE_VULKAN
#include <GLFW/glfw3.h>
#include <vulkan/vulkan.h>
#include <algorithm>
#include <array>
#include <chrono>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <functional>
#include <iostream>
#include <set>
#include <stdexcept>
#include <thread>
#include <vector>
#ifdef _WIN64
#include <VersionHelpers.h>
#include <aclapi.h>
#include <dxgi1_2.h>
#include <vulkan/vulkan_win32.h>
#include <windows.h>
#define _USE_MATH_DEFINES
#endif
#include <cuda.h>
#include <cuda_runtime.h>
#include <helper_cuda.h>
#include "linmath.h"
#define WIDTH 800
#define HEIGHT 600
#define VULKAN_VALIDATION 0
const std::vector<const char*> validationLayers = {
"VK_LAYER_LUNARG_standard_validation"};
#if VULKAN_VALIDATION
const bool enableValidationLayers = true;
#else
const bool enableValidationLayers = false;
#endif
struct QueueFamilyIndices {
int graphicsFamily = -1;
int presentFamily = -1;
bool isComplete() { return graphicsFamily >= 0 && presentFamily >= 0; }
};
const std::vector<const char*> deviceExtensions = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
#ifdef _WIN64
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
#else
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
#endif
};
#ifdef _WIN64
class WindowsSecurityAttributes {
protected:
SECURITY_ATTRIBUTES m_winSecurityAttributes;
PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
public:
WindowsSecurityAttributes();
SECURITY_ATTRIBUTES* operator&();
~WindowsSecurityAttributes();
};
WindowsSecurityAttributes::WindowsSecurityAttributes() {
m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(
1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**));
// CHECK_NEQ(m_winPSecurityDescriptor, (PSECURITY_DESCRIPTOR)NULL);
PSID* ppSID =
(PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
InitializeSecurityDescriptor(m_winPSecurityDescriptor,
SECURITY_DESCRIPTOR_REVISION);
SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority =
SECURITY_WORLD_SID_AUTHORITY;
AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0,
0, 0, 0, 0, 0, ppSID);
EXPLICIT_ACCESS explicitAccess;
ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
explicitAccess.grfAccessPermissions =
STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
explicitAccess.grfAccessMode = SET_ACCESS;
explicitAccess.grfInheritance = INHERIT_ONLY;
explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
m_winSecurityAttributes.bInheritHandle = TRUE;
}
SECURITY_ATTRIBUTES* WindowsSecurityAttributes::operator&() {
return &m_winSecurityAttributes;
}
WindowsSecurityAttributes::~WindowsSecurityAttributes() {
PSID* ppSID =
(PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
if (*ppSID) {
FreeSid(*ppSID);
}
if (*ppACL) {
LocalFree(*ppACL);
}
free(m_winPSecurityDescriptor);
}
#endif
struct UniformBufferObject {
mat4x4 model;
mat4x4 view;
mat4x4 proj;
};
struct SwapChainSupportDetails {
VkSurfaceCapabilitiesKHR capabilities;
std::vector<VkSurfaceFormatKHR> formats;
std::vector<VkPresentModeKHR> presentModes;
};
struct Vertex {
vec4 pos;
vec3 color;
static VkVertexInputBindingDescription getBindingDescription() {
VkVertexInputBindingDescription bindingDescription = {};
bindingDescription.binding = 0;
bindingDescription.stride = sizeof(Vertex);
bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
return bindingDescription;
}
static std::array<VkVertexInputAttributeDescription, 2>
getAttributeDescriptions() {
std::array<VkVertexInputAttributeDescription, 2> attributeDescriptions = {};
attributeDescriptions[0].binding = 0;
attributeDescriptions[0].location = 0;
attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
attributeDescriptions[0].offset = offsetof(Vertex, pos);
attributeDescriptions[1].binding = 0;
attributeDescriptions[1].location = 1;
attributeDescriptions[1].format = VK_FORMAT_R32G32B32_SFLOAT;
attributeDescriptions[1].offset = offsetof(Vertex, color);
return attributeDescriptions;
}
};
size_t mesh_width = 0, mesh_height = 0;
std::string execution_path;
__global__ void sinewave_gen_kernel(Vertex* vertices, unsigned int width,
unsigned int height, float time) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
// calculate uv coordinates
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
if (y < height && x < width) {
// write output vertex
vertices[y * width + x].pos[0] = u;
vertices[y * width + x].pos[1] = w;
vertices[y * width + x].pos[2] = v;
vertices[y * width + x].pos[3] = 1.0f;
vertices[y * width + x].color[0] = 1.0f;
vertices[y * width + x].color[1] = 0.0f;
vertices[y * width + x].color[2] = 0.0f;
}
}
class vulkanCudaApp {
public:
void run() {
initWindow();
initVulkan();
initCuda();
mainLoop();
cleanup();
}
private:
GLFWwindow* window;
VkInstance instance;
VkPhysicalDevice physicalDevice = VK_NULL_HANDLE;
uint8_t vkDeviceUUID[VK_UUID_SIZE];
VkDevice device;
VkQueue graphicsQueue;
VkQueue presentQueue;
VkSurfaceKHR surface;
VkSwapchainKHR swapChain;
std::vector<VkImage> swapChainImages;
VkFormat swapChainImageFormat;
VkExtent2D swapChainExtent;
std::vector<VkImageView> swapChainImageViews;
VkDescriptorSetLayout descriptorSetLayout;
VkDescriptorPool descriptorPool;
VkDescriptorSet descriptorSet;
VkPipelineLayout pipelineLayout;
VkRenderPass renderPass;
VkPipeline graphicsPipeline;
std::vector<VkFramebuffer> swapChainFramebuffers;
VkCommandPool commandPool;
VkBuffer vertexBuffer;
VkDeviceMemory vertexBufferMemory;
VkBuffer uniformBuffer;
VkDeviceMemory uniformBufferMemory;
std::vector<VkCommandBuffer> commandBuffers;
VkSemaphore imageAvailableSemaphore;
VkSemaphore renderFinishedSemaphore;
VkSemaphore cudaUpdateVkVertexBufSemaphore;
VkSemaphore vkUpdateCudaVertexBufSemaphore;
size_t vertexBufSize = 0;
bool startSubmit = 0;
double AnimTime = 1.0f;
VkDebugReportCallbackEXT callback;
#ifdef _WIN64
PFN_vkGetMemoryWin32HandleKHR fpGetMemoryWin32HandleKHR;
PFN_vkGetSemaphoreWin32HandleKHR fpGetSemaphoreWin32HandleKHR;
#else
PFN_vkGetMemoryFdKHR fpGetMemoryFdKHR;
PFN_vkGetSemaphoreFdKHR fpGetSemaphoreFdKHR;
#endif
PFN_vkGetPhysicalDeviceProperties2 fpGetPhysicalDeviceProperties2;
// CUDA stuff
cudaExternalMemory_t cudaExtMemVertexBuffer;
cudaExternalSemaphore_t cudaExtCudaUpdateVkVertexBufSemaphore;
cudaExternalSemaphore_t cudaExtVkUpdateCudaVertexBufSemaphore;
void* cudaDevVertptr = NULL;
cudaStream_t streamToRun;
bool checkValidationLayerSupport() {
uint32_t layerCount;
vkEnumerateInstanceLayerProperties(&layerCount, nullptr);
std::vector<VkLayerProperties> availableLayers(layerCount);
vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data());
for (const char* layerName : validationLayers) {
bool layerFound = false;
for (const auto& layerProperties : availableLayers) {
if (strcmp(layerName, layerProperties.layerName) == 0) {
layerFound = true;
break;
}
}
if (!layerFound) {
return false;
}
}
return true;
}
static VKAPI_ATTR VkBool32 VKAPI_CALL
debugCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType,
uint64_t obj, size_t location, int32_t code,
const char* layerPrefix, const char* msg, void* userData) {
std::cerr << "validation layer: " << msg << std::endl;
return VK_FALSE;
}
VkResult CreateDebugReportCallbackEXT(
VkInstance instance,
const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDebugReportCallbackEXT* pCallback) {
auto func = (PFN_vkCreateDebugReportCallbackEXT)vkGetInstanceProcAddr(
instance, "vkCreateDebugReportCallbackEXT");
if (func != nullptr) {
return func(instance, pCreateInfo, pAllocator, pCallback);
} else {
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
void DestroyDebugReportCallbackEXT(VkInstance instance,
VkDebugReportCallbackEXT callback,
const VkAllocationCallbacks* pAllocator) {
auto func = (PFN_vkDestroyDebugReportCallbackEXT)vkGetInstanceProcAddr(
instance, "vkDestroyDebugReportCallbackEXT");
if (func != nullptr) {
func(instance, callback, pAllocator);
}
}
void setupDebugCallback() {
if (!enableValidationLayers) return;
VkDebugReportCallbackCreateInfoEXT createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT;
createInfo.flags =
VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT;
createInfo.pfnCallback = debugCallback;
if (CreateDebugReportCallbackEXT(instance, &createInfo, nullptr,
&callback) != VK_SUCCESS) {
throw std::runtime_error("failed to set up debug callback!");
}
}
void initWindow() {
glfwInit();
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan-CUDA Interop Sinewave",
nullptr, nullptr);
}
void createInstance() {
if (enableValidationLayers && !checkValidationLayerSupport()) {
throw std::runtime_error(
"validation layers requested, but not available!");
}
VkApplicationInfo appInfo = {};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "Vulkan CUDA Sinewave";
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = "No Engine";
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_0;
VkInstanceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.pApplicationInfo = &appInfo;
uint32_t glfwExtensionCount = 0;
const char** glfwExtensions;
glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
std::vector<const char*> enabledExtensionNameList;
enabledExtensionNameList.push_back(
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
enabledExtensionNameList.push_back(
VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
enabledExtensionNameList.push_back(
VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
for (int i = 0; i < glfwExtensionCount; i++) {
enabledExtensionNameList.push_back(glfwExtensions[i]);
}
if (enableValidationLayers) {
enabledExtensionNameList.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
createInfo.enabledLayerCount =
static_cast<uint32_t>(validationLayers.size());
createInfo.ppEnabledLayerNames = validationLayers.data();
} else {
createInfo.enabledLayerCount = 0;
}
createInfo.enabledExtensionCount = enabledExtensionNameList.size();
createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data();
if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) {
throw std::runtime_error("failed to create instance!");
} else {
std::cout << "Instance created successfully!!\n";
}
fpGetPhysicalDeviceProperties2 =
(PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(
instance, "vkGetPhysicalDeviceProperties2");
if (fpGetPhysicalDeviceProperties2 == NULL) {
throw std::runtime_error(
"Vulkan: Proc address for \"vkGetPhysicalDeviceProperties2KHR\" not "
"found.\n");
}
#ifdef _WIN64
fpGetMemoryWin32HandleKHR =
(PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr(
instance, "vkGetMemoryWin32HandleKHR");
if (fpGetMemoryWin32HandleKHR == NULL) {
throw std::runtime_error(
"Vulkan: Proc address for \"vkGetMemoryWin32HandleKHR\" not "
"found.\n");
}
#else
fpGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr(
instance, "vkGetMemoryFdKHR");
if (fpGetMemoryFdKHR == NULL) {
throw std::runtime_error(
"Vulkan: Proc address for \"vkGetMemoryFdKHR\" not found.\n");
}
#endif
}
void initVulkan() {
createInstance();
setupDebugCallback();
createSurface();
pickPhysicalDevice();
createLogicalDevice();
getKhrExtensionsFn();
createSwapChain();
createImageViews();
createRenderPass();
createDescriptorSetLayout();
createGraphicsPipeline();
createFramebuffers();
createCommandPool();
createVertexBuffer();
createUniformBuffer();
createDescriptorPool();
createDescriptorSet();
createCommandBuffers();
createSyncObjects();
createSyncObjectsExt();
}
void initCuda() {
setCudaVkDevice();
cudaVkImportVertexMem();
cudaInitVertexMem();
cudaVkImportSemaphore();
}
void createSurface() {
if (glfwCreateWindowSurface(instance, window, nullptr, &surface) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create window surface!");
}
}
void pickPhysicalDevice() {
uint32_t deviceCount = 0;
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);
if (deviceCount == 0) {
throw std::runtime_error("failed to find GPUs with Vulkan support!");
}
std::vector<VkPhysicalDevice> devices(deviceCount);
vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data());
for (const auto& device : devices) {
if (isDeviceSuitable(device)) {
physicalDevice = device;
break;
}
}
if (physicalDevice == VK_NULL_HANDLE) {
throw std::runtime_error("failed to find a suitable GPU!");
}
std::cout << "Selected physical device = " << physicalDevice << std::endl;
VkPhysicalDeviceIDProperties vkPhysicalDeviceIDProperties = {};
vkPhysicalDeviceIDProperties.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
vkPhysicalDeviceIDProperties.pNext = NULL;
VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {};
vkPhysicalDeviceProperties2.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDProperties;
fpGetPhysicalDeviceProperties2(physicalDevice,
&vkPhysicalDeviceProperties2);
memcpy(vkDeviceUUID, vkPhysicalDeviceIDProperties.deviceUUID,
sizeof(vkDeviceUUID));
}
int setCudaVkDevice() {
int current_device = 0;
int device_count = 0;
int devices_prohibited = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the GPU which is selected by Vulkan
while (current_device < device_count) {
cudaGetDeviceProperties(&deviceProp, current_device);
if ((deviceProp.computeMode != cudaComputeModeProhibited)) {
// Compare the cuda device UUID with vulkan UUID
int ret = memcmp(&deviceProp.uuid, &vkDeviceUUID, VK_UUID_SIZE);
if (ret == 0) {
checkCudaErrors(cudaSetDevice(current_device));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
current_device, deviceProp.name, deviceProp.major,
deviceProp.minor);
return current_device;
}
} else {
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count) {
fprintf(stderr,
"CUDA error:"
" No Vulkan-CUDA Interop capable GPU found.\n");
exit(EXIT_FAILURE);
}
return -1;
}
bool isDeviceSuitable(VkPhysicalDevice device) {
QueueFamilyIndices indices = findQueueFamilies(device);
bool extensionsSupported = checkDeviceExtensionSupport(device);
bool swapChainAdequate = false;
if (extensionsSupported) {
SwapChainSupportDetails swapChainSupport = querySwapChainSupport(device);
swapChainAdequate = !swapChainSupport.formats.empty() &&
!swapChainSupport.presentModes.empty();
}
return indices.isComplete() && extensionsSupported && swapChainAdequate;
}
bool checkDeviceExtensionSupport(VkPhysicalDevice device) {
uint32_t extensionCount;
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
nullptr);
std::vector<VkExtensionProperties> availableExtensions(extensionCount);
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
availableExtensions.data());
std::set<std::string> requiredExtensions(deviceExtensions.begin(),
deviceExtensions.end());
for (const auto& extension : availableExtensions) {
requiredExtensions.erase(extension.extensionName);
}
return requiredExtensions.empty();
}
QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) {
QueueFamilyIndices indices;
uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
nullptr);
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
queueFamilies.data());
int i = 0;
for (const auto& queueFamily : queueFamilies) {
if (queueFamily.queueCount > 0 &&
queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
indices.graphicsFamily = i;
}
VkBool32 presentSupport = false;
vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &presentSupport);
if (queueFamily.queueCount > 0 && presentSupport) {
indices.presentFamily = i;
}
if (indices.isComplete()) {
break;
}
i++;
}
return indices;
}
SwapChainSupportDetails querySwapChainSupport(VkPhysicalDevice device) {
SwapChainSupportDetails details;
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface,
&details.capabilities);
uint32_t formatCount;
vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount,
nullptr);
if (formatCount != 0) {
details.formats.resize(formatCount);
vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount,
details.formats.data());
}
uint32_t presentModeCount;
vkGetPhysicalDeviceSurfacePresentModesKHR(device, surface,
&presentModeCount, nullptr);
if (presentModeCount != 0) {
details.presentModes.resize(presentModeCount);
vkGetPhysicalDeviceSurfacePresentModesKHR(
device, surface, &presentModeCount, details.presentModes.data());
}
return details;
}
VkSurfaceFormatKHR chooseSwapSurfaceFormat(
const std::vector<VkSurfaceFormatKHR>& availableFormats) {
if (availableFormats.size() == 1 &&
availableFormats[0].format == VK_FORMAT_UNDEFINED) {
return {VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR};
}
for (const auto& availableFormat : availableFormats) {
if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM &&
availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
return availableFormat;
}
}
return availableFormats[0];
}
VkPresentModeKHR chooseSwapPresentMode(
const std::vector<VkPresentModeKHR> availablePresentModes) {
VkPresentModeKHR bestMode = VK_PRESENT_MODE_FIFO_KHR;
for (const auto& availablePresentMode : availablePresentModes) {
if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) {
return availablePresentMode;
} else if (availablePresentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) {
bestMode = availablePresentMode;
}
}
return bestMode;
}
VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities) {
if (capabilities.currentExtent.width !=
std::numeric_limits<uint32_t>::max()) {
return capabilities.currentExtent;
} else {
VkExtent2D actualExtent = {WIDTH, HEIGHT};
actualExtent.width = std::max(
capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, actualExtent.width));
actualExtent.height = std::max(
capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, actualExtent.height));
return actualExtent;
}
}
void createLogicalDevice() {
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);
std::vector<VkDeviceQueueCreateInfo> queueCreateInfos;
std::set<int> uniqueQueueFamilies = {indices.graphicsFamily,
indices.presentFamily};
float queuePriority = 1.0f;
for (int queueFamily : uniqueQueueFamilies) {
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queueFamily;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &queuePriority;
queueCreateInfos.push_back(queueCreateInfo);
}
VkPhysicalDeviceFeatures deviceFeatures = {};
VkDeviceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
createInfo.pQueueCreateInfos = queueCreateInfos.data();
createInfo.queueCreateInfoCount = queueCreateInfos.size();
createInfo.pEnabledFeatures = &deviceFeatures;
std::vector<const char*> enabledExtensionNameList;
for (int i = 0; i < deviceExtensions.size(); i++) {
enabledExtensionNameList.push_back(deviceExtensions[i]);
}
if (enableValidationLayers) {
createInfo.enabledLayerCount =
static_cast<uint32_t>(validationLayers.size());
createInfo.ppEnabledLayerNames = validationLayers.data();
} else {
createInfo.enabledLayerCount = 0;
}
createInfo.enabledExtensionCount =
static_cast<uint32_t>(enabledExtensionNameList.size());
createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data();
if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create logical device!");
}
vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphicsQueue);
vkGetDeviceQueue(device, indices.presentFamily, 0, &presentQueue);
}
void createSwapChain() {
SwapChainSupportDetails swapChainSupport =
querySwapChainSupport(physicalDevice);
VkSurfaceFormatKHR surfaceFormat =
chooseSwapSurfaceFormat(swapChainSupport.formats);
VkPresentModeKHR presentMode =
chooseSwapPresentMode(swapChainSupport.presentModes);
VkExtent2D extent = chooseSwapExtent(swapChainSupport.capabilities);
uint32_t imageCount = swapChainSupport.capabilities.minImageCount + 1;
if (swapChainSupport.capabilities.maxImageCount > 0 &&
imageCount > swapChainSupport.capabilities.maxImageCount) {
imageCount = swapChainSupport.capabilities.maxImageCount;
}
VkSwapchainCreateInfoKHR createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
createInfo.surface = surface;
createInfo.minImageCount = imageCount;
createInfo.imageFormat = surfaceFormat.format;
createInfo.imageColorSpace = surfaceFormat.colorSpace;
createInfo.imageExtent = extent;
createInfo.imageArrayLayers = 1;
createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);
uint32_t queueFamilyIndices[] = {(uint32_t)indices.graphicsFamily,
(uint32_t)indices.presentFamily};
if (indices.graphicsFamily != indices.presentFamily) {
createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
createInfo.queueFamilyIndexCount = 2;
createInfo.pQueueFamilyIndices = queueFamilyIndices;
} else {
createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
createInfo.queueFamilyIndexCount = 0; // Optional
createInfo.pQueueFamilyIndices = nullptr; // Optional
}
createInfo.preTransform = swapChainSupport.capabilities.currentTransform;
createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
createInfo.presentMode = presentMode;
createInfo.clipped = VK_TRUE;
createInfo.oldSwapchain = VK_NULL_HANDLE;
if (vkCreateSwapchainKHR(device, &createInfo, nullptr, &swapChain) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create swap chain!");
} else {
std::cout << "Swapchain created!!\n";
}
vkGetSwapchainImagesKHR(device, swapChain, &imageCount, nullptr);
swapChainImages.resize(imageCount);
vkGetSwapchainImagesKHR(device, swapChain, &imageCount,
swapChainImages.data());
swapChainImageFormat = surfaceFormat.format;
swapChainExtent = extent;
}
void createImageViews() {
swapChainImageViews.resize(swapChainImages.size());
for (size_t i = 0; i < swapChainImages.size(); i++) {
VkImageViewCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
createInfo.image = swapChainImages[i];
createInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
createInfo.format = swapChainImageFormat;
createInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
createInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
createInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
createInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
createInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
createInfo.subresourceRange.baseMipLevel = 0;
createInfo.subresourceRange.levelCount = 1;
createInfo.subresourceRange.baseArrayLayer = 0;
createInfo.subresourceRange.layerCount = 1;
if (vkCreateImageView(device, &createInfo, nullptr,
&swapChainImageViews[i]) != VK_SUCCESS) {
throw std::runtime_error("failed to create image views!");
}
}
}
void createDescriptorSetLayout() {
VkDescriptorSetLayoutBinding uboLayoutBinding = {};
uboLayoutBinding.binding = 0;
uboLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
uboLayoutBinding.descriptorCount = 1;
uboLayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
uboLayoutBinding.pImmutableSamplers = nullptr; // Optional
VkDescriptorSetLayoutCreateInfo layoutInfo = {};
layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layoutInfo.bindingCount = 1;
layoutInfo.pBindings = &uboLayoutBinding;
if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr,
&descriptorSetLayout) != VK_SUCCESS) {
throw std::runtime_error("failed to create descriptor set layout!");
}
}
void createGraphicsPipeline() {
auto vertShaderCode = readFile("vert.spv");
auto fragShaderCode = readFile("frag.spv");
VkShaderModule vertShaderModule;
VkShaderModule fragShaderModule;
vertShaderModule = createShaderModule(vertShaderCode);
fragShaderModule = createShaderModule(fragShaderCode);
VkPipelineShaderStageCreateInfo vertShaderStageInfo = {};
vertShaderStageInfo.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
vertShaderStageInfo.module = vertShaderModule;
vertShaderStageInfo.pName = "main";
VkPipelineShaderStageCreateInfo fragShaderStageInfo = {};
fragShaderStageInfo.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
fragShaderStageInfo.module = fragShaderModule;
fragShaderStageInfo.pName = "main";
VkPipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo,
fragShaderStageInfo};
VkPipelineVertexInputStateCreateInfo vertexInputInfo = {};
vertexInputInfo.sType =
VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
auto bindingDescription = Vertex::getBindingDescription();
auto attributeDescriptions = Vertex::getAttributeDescriptions();
vertexInputInfo.vertexBindingDescriptionCount = 1;
vertexInputInfo.pVertexBindingDescriptions = &bindingDescription;
vertexInputInfo.vertexAttributeDescriptionCount =
static_cast<uint32_t>(attributeDescriptions.size());
vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data();
VkPipelineInputAssemblyStateCreateInfo inputAssembly = {};
inputAssembly.sType =
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
inputAssembly.primitiveRestartEnable = VK_FALSE;
VkViewport viewport = {};
viewport.x = 0.0f;
viewport.y = 0.0f;
viewport.width = (float)swapChainExtent.width;
viewport.height = (float)swapChainExtent.height;
viewport.minDepth = 0.0f;
viewport.maxDepth = 1.0f;
VkRect2D scissor = {};
scissor.offset = {0, 0};
scissor.extent = swapChainExtent;
VkPipelineViewportStateCreateInfo viewportState = {};
viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewportState.viewportCount = 1;
viewportState.pViewports = &viewport;
viewportState.scissorCount = 1;
viewportState.pScissors = &scissor;
VkPipelineRasterizationStateCreateInfo rasterizer = {};
rasterizer.sType =
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterizer.depthClampEnable = VK_FALSE;
rasterizer.rasterizerDiscardEnable = VK_FALSE;
rasterizer.polygonMode = VK_POLYGON_MODE_FILL;
rasterizer.lineWidth = 1.0f;
rasterizer.cullMode = VK_CULL_MODE_BACK_BIT;
rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rasterizer.depthBiasEnable = VK_FALSE;
rasterizer.depthBiasConstantFactor = 0.0f; // Optional
rasterizer.depthBiasClamp = 0.0f; // Optional
rasterizer.depthBiasSlopeFactor = 0.0f; // Optional
VkPipelineMultisampleStateCreateInfo multisampling = {};
multisampling.sType =
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisampling.sampleShadingEnable = VK_FALSE;
multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
multisampling.minSampleShading = 1.0f; // Optional
multisampling.pSampleMask = nullptr; // Optional
multisampling.alphaToCoverageEnable = VK_FALSE; // Optional
multisampling.alphaToOneEnable = VK_FALSE; // Optional
VkPipelineColorBlendAttachmentState colorBlendAttachment = {};
colorBlendAttachment.colorWriteMask =
VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
colorBlendAttachment.blendEnable = VK_FALSE;
colorBlendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; // Optional
colorBlendAttachment.dstColorBlendFactor =
VK_BLEND_FACTOR_ZERO; // Optional
colorBlendAttachment.colorBlendOp = VK_BLEND_OP_ADD; // Optional
colorBlendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; // Optional
colorBlendAttachment.dstAlphaBlendFactor =
VK_BLEND_FACTOR_ZERO; // Optional
colorBlendAttachment.alphaBlendOp = VK_BLEND_OP_ADD; // Optional
VkPipelineColorBlendStateCreateInfo colorBlending = {};
colorBlending.sType =
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
colorBlending.logicOpEnable = VK_FALSE;
colorBlending.logicOp = VK_LOGIC_OP_COPY; // Optional
colorBlending.attachmentCount = 1;
colorBlending.pAttachments = &colorBlendAttachment;
colorBlending.blendConstants[0] = 0.0f; // Optional
colorBlending.blendConstants[1] = 0.0f; // Optional
colorBlending.blendConstants[2] = 0.0f; // Optional
colorBlending.blendConstants[3] = 0.0f; // Optional
#if 0
VkDynamicState dynamicStates[] = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_LINE_WIDTH
};
VkPipelineDynamicStateCreateInfo dynamicState = {};
dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamicState.dynamicStateCount = 2;
dynamicState.pDynamicStates = dynamicStates;
#endif
VkPipelineLayoutCreateInfo pipelineLayoutInfo = {};
pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelineLayoutInfo.setLayoutCount = 1; // Optional
pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout; // Optional
pipelineLayoutInfo.pushConstantRangeCount = 0; // Optional
pipelineLayoutInfo.pPushConstantRanges = nullptr; // Optional
if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr,
&pipelineLayout) != VK_SUCCESS) {
throw std::runtime_error("failed to create pipeline layout!");
}
VkGraphicsPipelineCreateInfo pipelineInfo = {};
pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pipelineInfo.stageCount = 2;
pipelineInfo.pStages = shaderStages;
pipelineInfo.pVertexInputState = &vertexInputInfo;
pipelineInfo.pInputAssemblyState = &inputAssembly;
pipelineInfo.pViewportState = &viewportState;
pipelineInfo.pRasterizationState = &rasterizer;
pipelineInfo.pMultisampleState = &multisampling;
pipelineInfo.pDepthStencilState = nullptr; // Optional
pipelineInfo.pColorBlendState = &colorBlending;
pipelineInfo.pDynamicState = nullptr; // Optional
pipelineInfo.layout = pipelineLayout;
pipelineInfo.renderPass = renderPass;
pipelineInfo.subpass = 0;
pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; // Optional
pipelineInfo.basePipelineIndex = -1; // Optional
if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo,
nullptr, &graphicsPipeline) != VK_SUCCESS) {
throw std::runtime_error("failed to create graphics pipeline!");
} else {
std::cout << "Pipeline created successfully!!\n";
}
vkDestroyShaderModule(device, fragShaderModule, nullptr);
vkDestroyShaderModule(device, vertShaderModule, nullptr);
}
void createRenderPass() {
VkAttachmentDescription colorAttachment = {};
colorAttachment.format = swapChainImageFormat;
colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
VkAttachmentReference colorAttachmentRef = {};
colorAttachmentRef.attachment = 0;
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
VkSubpassDescription subpass = {};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &colorAttachmentRef;
VkRenderPassCreateInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassInfo.attachmentCount = 1;
renderPassInfo.pAttachments = &colorAttachment;
renderPassInfo.subpassCount = 1;
renderPassInfo.pSubpasses = &subpass;
VkSubpassDependency dependency = {};
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
dependency.dstSubpass = 0;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.srcAccessMask = 0;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
renderPassInfo.dependencyCount = 1;
renderPassInfo.pDependencies = &dependency;
if (vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create render pass!");
}
}
void createFramebuffers() {
swapChainFramebuffers.resize(swapChainImageViews.size());
for (size_t i = 0; i < swapChainImageViews.size(); i++) {
VkImageView attachments[] = {swapChainImageViews[i]};
VkFramebufferCreateInfo framebufferInfo = {};
framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebufferInfo.renderPass = renderPass;
framebufferInfo.attachmentCount = 1;
framebufferInfo.pAttachments = attachments;
framebufferInfo.width = swapChainExtent.width;
framebufferInfo.height = swapChainExtent.height;
framebufferInfo.layers = 1;
if (vkCreateFramebuffer(device, &framebufferInfo, nullptr,
&swapChainFramebuffers[i]) != VK_SUCCESS) {
throw std::runtime_error("failed to create framebuffer!");
}
}
}
void createCommandPool() {
QueueFamilyIndices queueFamilyIndices = findQueueFamilies(physicalDevice);
VkCommandPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily;
poolInfo.flags = 0; // Optional
if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create command pool!");
}
}
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
VkMemoryPropertyFlags properties, VkBuffer& buffer,
VkDeviceMemory& bufferMemory) {
VkBufferCreateInfo bufferInfo = {};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = size;
bufferInfo.usage = usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
throw std::runtime_error("failed to create buffer!");
}
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
VkMemoryAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex =
findMemoryType(memRequirements.memoryTypeBits, properties);
if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) !=
VK_SUCCESS) {
throw std::runtime_error("failed to allocate buffer memory!");
}
vkBindBufferMemory(device, buffer, bufferMemory, 0);
}
void createBufferExtMem(VkDeviceSize size, VkBufferUsageFlags usage,
VkMemoryPropertyFlags properties,
VkExternalMemoryHandleTypeFlagsKHR extMemHandleType,
VkBuffer& buffer, VkDeviceMemory& bufferMemory) {
VkBufferCreateInfo bufferInfo = {};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = size;
bufferInfo.usage = usage;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
throw std::runtime_error("failed to create buffer!");
}
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
#ifdef _WIN64
WindowsSecurityAttributes winSecurityAttributes;
VkExportMemoryWin32HandleInfoKHR vulkanExportMemoryWin32HandleInfoKHR = {};
vulkanExportMemoryWin32HandleInfoKHR.sType =
VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
vulkanExportMemoryWin32HandleInfoKHR.pNext = NULL;
vulkanExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
vulkanExportMemoryWin32HandleInfoKHR.dwAccess =
DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
vulkanExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)NULL;
#endif
VkExportMemoryAllocateInfoKHR vulkanExportMemoryAllocateInfoKHR = {};
vulkanExportMemoryAllocateInfoKHR.sType =
VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
#ifdef _WIN64
vulkanExportMemoryAllocateInfoKHR.pNext =
extMemHandleType & VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
? &vulkanExportMemoryWin32HandleInfoKHR
: NULL;
vulkanExportMemoryAllocateInfoKHR.handleTypes = extMemHandleType;
#else
vulkanExportMemoryAllocateInfoKHR.pNext = NULL;
vulkanExportMemoryAllocateInfoKHR.handleTypes =
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
VkMemoryAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.pNext = &vulkanExportMemoryAllocateInfoKHR;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex =
findMemoryType(memRequirements.memoryTypeBits, properties);
if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) !=
VK_SUCCESS) {
throw std::runtime_error("failed to allocate external buffer memory!");
}
vkBindBufferMemory(device, buffer, bufferMemory, 0);
}
void createVertexBuffer() {
mesh_width = swapChainExtent.width / 2;
mesh_height = swapChainExtent.height / 2;
vertexBufSize = mesh_height * mesh_width;
VkDeviceSize bufferSize = sizeof(Vertex) * vertexBufSize;
#ifdef _WIN64
if (IsWindows8OrGreater()) {
createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
vertexBuffer, vertexBufferMemory);
} else {
createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
vertexBuffer, vertexBufferMemory);
}
#else
createBufferExtMem(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
vertexBuffer, vertexBufferMemory);
#endif
}
void cudaInitVertexMem() {
checkCudaErrors(cudaStreamCreate(&streamToRun));
dim3 block(16, 16, 1);
dim3 grid(mesh_width / 16, mesh_height / 16, 1);
Vertex* vertices = (Vertex*)cudaDevVertptr;
sinewave_gen_kernel<<<grid, block, 0, streamToRun>>>(vertices, mesh_width,
mesh_height, 1.0);
checkCudaErrors(cudaStreamSynchronize(streamToRun));
}
void createUniformBuffer() {
VkDeviceSize bufferSize = sizeof(UniformBufferObject);
createBuffer(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
uniformBuffer, uniformBufferMemory);
}
uint32_t findMemoryType(uint32_t typeFilter,
VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
if (typeFilter & (1 << i) && (memProperties.memoryTypes[i].propertyFlags &
properties) == properties) {
return i;
}
}
throw std::runtime_error("failed to find suitable memory type!");
}
void getKhrExtensionsFn() {
#ifdef _WIN64
fpGetSemaphoreWin32HandleKHR =
(PFN_vkGetSemaphoreWin32HandleKHR)vkGetDeviceProcAddr(
device, "vkGetSemaphoreWin32HandleKHR");
if (fpGetSemaphoreWin32HandleKHR == NULL) {
throw std::runtime_error(
"Vulkan: Proc address for \"vkGetSemaphoreWin32HandleKHR\" not "
"found.\n");
}
#else
fpGetSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetDeviceProcAddr(
device, "vkGetSemaphoreFdKHR");
if (fpGetSemaphoreFdKHR == NULL) {
throw std::runtime_error(
"Vulkan: Proc address for \"vkGetSemaphoreFdKHR\" not found.\n");
}
#endif
}
void createCommandBuffers() {
commandBuffers.resize(swapChainFramebuffers.size());
VkCommandBufferAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = commandPool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = (uint32_t)commandBuffers.size();
if (vkAllocateCommandBuffers(device, &allocInfo, commandBuffers.data()) !=
VK_SUCCESS) {
throw std::runtime_error("failed to allocate command buffers!");
}
for (size_t i = 0; i < commandBuffers.size(); i++) {
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
beginInfo.pInheritanceInfo = nullptr; // Optional
if (vkBeginCommandBuffer(commandBuffers[i], &beginInfo) != VK_SUCCESS) {
throw std::runtime_error("failed to begin recording command buffer!");
}
VkRenderPassBeginInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderPass = renderPass;
renderPassInfo.framebuffer = swapChainFramebuffers[i];
renderPassInfo.renderArea.offset = {0, 0};
renderPassInfo.renderArea.extent = swapChainExtent;
VkClearValue clearColor = {0.0f, 0.0f, 0.0f, 1.0f};
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &clearColor;
vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo,
VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS,
graphicsPipeline);
VkBuffer vertexBuffers[] = {vertexBuffer};
VkDeviceSize offsets[] = {0};
vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets);
vkCmdBindDescriptorSets(commandBuffers[i],
VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
0, 1, &descriptorSet, 0, nullptr);
vkCmdDraw(commandBuffers[i], static_cast<uint32_t>(vertexBufSize), 1, 0,
0);
vkCmdEndRenderPass(commandBuffers[i]);
if (vkEndCommandBuffer(commandBuffers[i]) != VK_SUCCESS) {
throw std::runtime_error("failed to record command buffer!");
}
}
}
VkShaderModule createShaderModule(const std::vector<char>& code) {
VkShaderModuleCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
createInfo.codeSize = code.size();
createInfo.pCode = reinterpret_cast<const uint32_t*>(code.data());
VkShaderModule shaderModule;
if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create shader module!");
}
return shaderModule;
}
static std::vector<char> readFile(const std::string& filename) {
char* file_path = sdkFindFilePath(filename.c_str(), execution_path.c_str());
std::ifstream file(file_path, std::ios::ate | std::ios::binary);
if (!file.is_open()) {
throw std::runtime_error("failed to open shader spv file!\n");
}
size_t fileSize = (size_t)file.tellg();
std::vector<char> buffer(fileSize);
file.seekg(0);
file.read(buffer.data(), fileSize);
file.close();
return buffer;
}
void mainLoop() {
updateUniformBuffer();
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
drawFrame();
}
vkDeviceWaitIdle(device);
}
void updateUniformBuffer() {
UniformBufferObject ubo = {};
mat4x4_identity(ubo.model);
mat4x4 Model;
mat4x4_dup(Model, ubo.model);
mat4x4_rotate(ubo.model, Model, 1.0f, 0.0f, 1.0f, degreesToRadians(45.0f));
vec3 eye = {2.0f, 2.0f, 2.0f};
vec3 center = {0.0f, 0.0f, 0.0f};
vec3 up = {0.0f, 0.0f, 1.0f};
mat4x4_look_at(ubo.view, eye, center, up);
mat4x4_perspective(ubo.proj, degreesToRadians(45.0f),
swapChainExtent.width / (float)swapChainExtent.height,
0.1f, 10.0f);
ubo.proj[1][1] *= -1;
void* data;
vkMapMemory(device, uniformBufferMemory, 0, sizeof(ubo), 0, &data);
memcpy(data, &ubo, sizeof(ubo));
vkUnmapMemory(device, uniformBufferMemory);
}
void createDescriptorPool() {
VkDescriptorPoolSize poolSize = {};
poolSize.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
poolSize.descriptorCount = 1;
VkDescriptorPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
poolInfo.poolSizeCount = 1;
poolInfo.pPoolSizes = &poolSize;
poolInfo.maxSets = 1;
if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptorPool) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create descriptor pool!");
}
}
void createDescriptorSet() {
VkDescriptorSetLayout layouts[] = {descriptorSetLayout};
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = descriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = layouts;
if (vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet) !=
VK_SUCCESS) {
throw std::runtime_error("failed to allocate descriptor set!");
}
VkDescriptorBufferInfo bufferInfo = {};
bufferInfo.buffer = uniformBuffer;
bufferInfo.offset = 0;
bufferInfo.range = sizeof(UniformBufferObject);
VkWriteDescriptorSet descriptorWrite = {};
descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrite.dstSet = descriptorSet;
descriptorWrite.dstBinding = 0;
descriptorWrite.dstArrayElement = 0;
descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorWrite.descriptorCount = 1;
descriptorWrite.pBufferInfo = &bufferInfo;
descriptorWrite.pImageInfo = nullptr; // Optional
descriptorWrite.pTexelBufferView = nullptr; // Optional
vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr);
}
void drawFrame() {
uint32_t imageIndex;
vkAcquireNextImageKHR(device, swapChain,
std::numeric_limits<uint64_t>::max(),
imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex);
if (!startSubmit) {
submitVulkan(imageIndex);
startSubmit = 1;
} else {
submitVulkanCuda(imageIndex);
}
VkPresentInfoKHR presentInfo = {};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
VkSemaphore signalSemaphores[] = {renderFinishedSemaphore};
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = signalSemaphores;
VkSwapchainKHR swapChains[] = {swapChain};
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = swapChains;
presentInfo.pImageIndices = &imageIndex;
presentInfo.pResults = nullptr; // Optional
vkQueuePresentKHR(presentQueue, &presentInfo);
cudaUpdateVertexBuffer();
// Added sleep of 5 millisecs so that CPU does not submit too much work to
// GPU
std::this_thread::sleep_for(std::chrono::microseconds(5000));
}
void submitVulkan(uint32_t imageIndex) {
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkSemaphore waitSemaphores[] = {imageAvailableSemaphore};
VkPipelineStageFlags waitStages[] = {
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.pWaitDstStageMask = waitStages;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffers[imageIndex];
VkSemaphore signalSemaphores[] = {renderFinishedSemaphore,
vkUpdateCudaVertexBufSemaphore};
submitInfo.signalSemaphoreCount = 2;
submitInfo.pSignalSemaphores = signalSemaphores;
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE) !=
VK_SUCCESS) {
throw std::runtime_error("failed to submit draw command buffer!");
}
}
void submitVulkanCuda(uint32_t imageIndex) {
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkSemaphore waitSemaphores[] = {imageAvailableSemaphore,
cudaUpdateVkVertexBufSemaphore};
VkPipelineStageFlags waitStages[] = {
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
submitInfo.waitSemaphoreCount = 2;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.pWaitDstStageMask = waitStages;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffers[imageIndex];
VkSemaphore signalSemaphores[] = {renderFinishedSemaphore,
vkUpdateCudaVertexBufSemaphore};
submitInfo.signalSemaphoreCount = 2;
submitInfo.pSignalSemaphores = signalSemaphores;
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE) !=
VK_SUCCESS) {
throw std::runtime_error("failed to submit draw command buffer!");
}
}
void createSyncObjects() {
VkSemaphoreCreateInfo semaphoreInfo = {};
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
if (vkCreateSemaphore(device, &semaphoreInfo, nullptr,
&imageAvailableSemaphore) != VK_SUCCESS ||
vkCreateSemaphore(device, &semaphoreInfo, nullptr,
&renderFinishedSemaphore) != VK_SUCCESS) {
throw std::runtime_error(
"failed to create synchronization objects for a frame!");
}
}
void createSyncObjectsExt() {
VkSemaphoreCreateInfo semaphoreInfo = {};
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
memset(&semaphoreInfo, 0, sizeof(semaphoreInfo));
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
#ifdef _WIN64
WindowsSecurityAttributes winSecurityAttributes;
VkExportSemaphoreWin32HandleInfoKHR
vulkanExportSemaphoreWin32HandleInfoKHR = {};
vulkanExportSemaphoreWin32HandleInfoKHR.sType =
VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
vulkanExportSemaphoreWin32HandleInfoKHR.pNext = NULL;
vulkanExportSemaphoreWin32HandleInfoKHR.pAttributes =
&winSecurityAttributes;
vulkanExportSemaphoreWin32HandleInfoKHR.dwAccess =
DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
vulkanExportSemaphoreWin32HandleInfoKHR.name = (LPCWSTR)NULL;
#endif
VkExportSemaphoreCreateInfoKHR vulkanExportSemaphoreCreateInfo = {};
vulkanExportSemaphoreCreateInfo.sType =
VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR;
#ifdef _WIN64
vulkanExportSemaphoreCreateInfo.pNext =
IsWindows8OrGreater() ? &vulkanExportSemaphoreWin32HandleInfoKHR : NULL;
vulkanExportSemaphoreCreateInfo.handleTypes =
IsWindows8OrGreater()
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
#else
vulkanExportSemaphoreCreateInfo.pNext = NULL;
vulkanExportSemaphoreCreateInfo.handleTypes =
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
semaphoreInfo.pNext = &vulkanExportSemaphoreCreateInfo;
if (vkCreateSemaphore(device, &semaphoreInfo, nullptr,
&cudaUpdateVkVertexBufSemaphore) != VK_SUCCESS ||
vkCreateSemaphore(device, &semaphoreInfo, nullptr,
&vkUpdateCudaVertexBufSemaphore) != VK_SUCCESS) {
throw std::runtime_error(
"failed to create synchronization objects for a CUDA-Vulkan!");
}
}
void cudaVkImportVertexMem() {
cudaExternalMemoryHandleDesc cudaExtMemHandleDesc;
memset(&cudaExtMemHandleDesc, 0, sizeof(cudaExtMemHandleDesc));
#ifdef _WIN64
cudaExtMemHandleDesc.type =
IsWindows8OrGreater() ? cudaExternalMemoryHandleTypeOpaqueWin32
: cudaExternalMemoryHandleTypeOpaqueWin32Kmt;
cudaExtMemHandleDesc.handle.win32.handle = getVkMemHandle(
IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
#else
cudaExtMemHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueFd;
cudaExtMemHandleDesc.handle.fd =
getVkMemHandle(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
#endif
cudaExtMemHandleDesc.size = sizeof(Vertex) * vertexBufSize;
checkCudaErrors(cudaImportExternalMemory(&cudaExtMemVertexBuffer,
&cudaExtMemHandleDesc));
cudaExternalMemoryBufferDesc cudaExtBufferDesc;
cudaExtBufferDesc.offset = 0;
cudaExtBufferDesc.size = sizeof(Vertex) * vertexBufSize;
cudaExtBufferDesc.flags = 0;
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(
&cudaDevVertptr, cudaExtMemVertexBuffer, &cudaExtBufferDesc));
printf("CUDA Imported Vulkan vertex buffer\n");
}
void cudaVkImportSemaphore() {
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc;
memset(&externalSemaphoreHandleDesc, 0,
sizeof(externalSemaphoreHandleDesc));
#ifdef _WIN64
externalSemaphoreHandleDesc.type =
IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32
: cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle(
IsWindows8OrGreater()
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
cudaUpdateVkVertexBufSemaphore);
#else
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
externalSemaphoreHandleDesc.handle.fd =
getVkSemaphoreHandle(VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
cudaUpdateVkVertexBufSemaphore);
#endif
externalSemaphoreHandleDesc.flags = 0;
checkCudaErrors(cudaImportExternalSemaphore(
&cudaExtCudaUpdateVkVertexBufSemaphore, &externalSemaphoreHandleDesc));
memset(&externalSemaphoreHandleDesc, 0,
sizeof(externalSemaphoreHandleDesc));
#ifdef _WIN64
externalSemaphoreHandleDesc.type =
IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32
: cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
;
externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle(
IsWindows8OrGreater()
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
vkUpdateCudaVertexBufSemaphore);
#else
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
externalSemaphoreHandleDesc.handle.fd =
getVkSemaphoreHandle(VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
vkUpdateCudaVertexBufSemaphore);
#endif
externalSemaphoreHandleDesc.flags = 0;
checkCudaErrors(cudaImportExternalSemaphore(
&cudaExtVkUpdateCudaVertexBufSemaphore, &externalSemaphoreHandleDesc));
printf("CUDA Imported Vulkan semaphore\n");
}
#ifdef _WIN64 // For windows
HANDLE getVkMemHandle(
VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) {
HANDLE handle;
VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {};
vkMemoryGetWin32HandleInfoKHR.sType =
VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
vkMemoryGetWin32HandleInfoKHR.pNext = NULL;
vkMemoryGetWin32HandleInfoKHR.memory = vertexBufferMemory;
vkMemoryGetWin32HandleInfoKHR.handleType =
(VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType;
fpGetMemoryWin32HandleKHR(device, &vkMemoryGetWin32HandleInfoKHR, &handle);
return handle;
}
#else
int getVkMemHandle(
VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) {
if (externalMemoryHandleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT) {
int fd;
VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {};
vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
vkMemoryGetFdInfoKHR.pNext = NULL;
vkMemoryGetFdInfoKHR.memory = vertexBufferMemory;
vkMemoryGetFdInfoKHR.handleType =
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
fpGetMemoryFdKHR(device, &vkMemoryGetFdInfoKHR, &fd);
return fd;
}
return -1;
}
#endif
#ifdef _WIN64
HANDLE getVkSemaphoreHandle(
VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType,
VkSemaphore& semVkCuda) {
HANDLE handle;
VkSemaphoreGetWin32HandleInfoKHR vulkanSemaphoreGetWin32HandleInfoKHR = {};
vulkanSemaphoreGetWin32HandleInfoKHR.sType =
VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
vulkanSemaphoreGetWin32HandleInfoKHR.pNext = NULL;
vulkanSemaphoreGetWin32HandleInfoKHR.semaphore = semVkCuda;
vulkanSemaphoreGetWin32HandleInfoKHR.handleType =
externalSemaphoreHandleType;
fpGetSemaphoreWin32HandleKHR(device, &vulkanSemaphoreGetWin32HandleInfoKHR,
&handle);
return handle;
}
#else
int getVkSemaphoreHandle(
VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType,
VkSemaphore& semVkCuda) {
if (externalSemaphoreHandleType ==
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
int fd;
VkSemaphoreGetFdInfoKHR vulkanSemaphoreGetFdInfoKHR = {};
vulkanSemaphoreGetFdInfoKHR.sType =
VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR;
vulkanSemaphoreGetFdInfoKHR.pNext = NULL;
vulkanSemaphoreGetFdInfoKHR.semaphore = semVkCuda;
vulkanSemaphoreGetFdInfoKHR.handleType =
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
fpGetSemaphoreFdKHR(device, &vulkanSemaphoreGetFdInfoKHR, &fd);
return fd;
}
return -1;
}
#endif
void cudaVkSemaphoreSignal(cudaExternalSemaphore_t& extSemaphore) {
cudaExternalSemaphoreSignalParams extSemaphoreSignalParams;
memset(&extSemaphoreSignalParams, 0, sizeof(extSemaphoreSignalParams));
extSemaphoreSignalParams.params.fence.value = 0;
extSemaphoreSignalParams.flags = 0;
checkCudaErrors(cudaSignalExternalSemaphoresAsync(
&extSemaphore, &extSemaphoreSignalParams, 1, streamToRun));
}
void cudaVkSemaphoreWait(cudaExternalSemaphore_t& extSemaphore) {
cudaExternalSemaphoreWaitParams extSemaphoreWaitParams;
memset(&extSemaphoreWaitParams, 0, sizeof(extSemaphoreWaitParams));
extSemaphoreWaitParams.params.fence.value = 0;
extSemaphoreWaitParams.flags = 0;
checkCudaErrors(cudaWaitExternalSemaphoresAsync(
&extSemaphore, &extSemaphoreWaitParams, 1, streamToRun));
}
void cudaUpdateVertexBuffer() {
cudaVkSemaphoreWait(cudaExtVkUpdateCudaVertexBufSemaphore);
dim3 block(16, 16, 1);
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
Vertex* pos = (Vertex*)cudaDevVertptr;
AnimTime += 0.01f;
sinewave_gen_kernel<<<grid, block, 0, streamToRun>>>(pos, mesh_width,
mesh_height, AnimTime);
cudaVkSemaphoreSignal(cudaExtCudaUpdateVkVertexBufSemaphore);
}
void cleanup() {
if (enableValidationLayers) {
DestroyDebugReportCallbackEXT(instance, callback, nullptr);
}
vkDestroySemaphore(device, renderFinishedSemaphore, nullptr);
vkDestroySemaphore(device, imageAvailableSemaphore, nullptr);
checkCudaErrors(
cudaDestroyExternalSemaphore(cudaExtCudaUpdateVkVertexBufSemaphore));
vkDestroySemaphore(device, cudaUpdateVkVertexBufSemaphore, nullptr);
checkCudaErrors(
cudaDestroyExternalSemaphore(cudaExtVkUpdateCudaVertexBufSemaphore));
vkDestroySemaphore(device, vkUpdateCudaVertexBufSemaphore, nullptr);
vkDestroyCommandPool(device, commandPool, nullptr);
for (auto framebuffer : swapChainFramebuffers) {
vkDestroyFramebuffer(device, framebuffer, nullptr);
}
for (auto imageView : swapChainImageViews) {
vkDestroyImageView(device, imageView, nullptr);
}
vkDestroyPipeline(device, graphicsPipeline, nullptr);
vkDestroyPipelineLayout(device, pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
vkDestroyBuffer(device, uniformBuffer, nullptr);
vkFreeMemory(device, uniformBufferMemory, nullptr);
vkDestroyRenderPass(device, renderPass, nullptr);
vkDestroySwapchainKHR(device, swapChain, nullptr);
checkCudaErrors(cudaDestroyExternalMemory(cudaExtMemVertexBuffer));
vkDestroyBuffer(device, vertexBuffer, nullptr);
vkFreeMemory(device, vertexBufferMemory, nullptr);
vkDestroyDescriptorPool(device, descriptorPool, nullptr);
vkDestroyDevice(device, nullptr);
vkDestroySurfaceKHR(instance, surface, nullptr);
vkDestroyInstance(instance, nullptr);
glfwDestroyWindow(window);
glfwTerminate();
}
};
int main(int argc, char* argv[]) {
execution_path = argv[0];
vulkanCudaApp app;
try {
app.run();
} catch (const std::runtime_error& e) {
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}