mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-28 18:49:14 +08:00
2637 lines
95 KiB
Plaintext
2637 lines
95 KiB
Plaintext
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#define GLFW_INCLUDE_VULKAN
|
|
#ifdef _WIN64
|
|
#include <aclapi.h>
|
|
#include <dxgi1_2.h>
|
|
#include <windows.h>
|
|
#include <VersionHelpers.h>
|
|
#define _USE_MATH_DEFINES
|
|
#endif
|
|
|
|
#include <GLFW/glfw3.h>
|
|
#include <vulkan/vulkan.h>
|
|
#ifdef _WIN64
|
|
#include <vulkan/vulkan_win32.h>
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <chrono>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <set>
|
|
#include <stdexcept>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
#include <helper_cuda.h>
|
|
#include <helper_image.h>
|
|
#include <helper_math.h>
|
|
|
|
#include "linmath.h"
|
|
|
|
#define WIDTH 800
|
|
#define HEIGHT 600
|
|
|
|
const int MAX_FRAMES = 4;
|
|
|
|
const std::vector<const char*> validationLayers = {
|
|
"VK_LAYER_KHRONOS_validation"};
|
|
|
|
#ifdef NDEBUG
|
|
const bool enableValidationLayers = true;
|
|
#else
|
|
const bool enableValidationLayers = false;
|
|
#endif
|
|
|
|
std::string execution_path;
|
|
|
|
VkResult CreateDebugUtilsMessengerEXT(
|
|
VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo,
|
|
const VkAllocationCallbacks* pAllocator,
|
|
VkDebugUtilsMessengerEXT* pDebugMessenger) {
|
|
auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
|
|
instance, "vkCreateDebugUtilsMessengerEXT");
|
|
if (func != nullptr) {
|
|
return func(instance, pCreateInfo, pAllocator, pDebugMessenger);
|
|
} else {
|
|
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
|
}
|
|
};
|
|
|
|
const std::vector<const char*> deviceExtensions = {
|
|
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
|
VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
|
|
VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
|
|
#ifdef _WIN64
|
|
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
|
|
VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
|
|
#else
|
|
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
|
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
|
|
#endif
|
|
};
|
|
|
|
#ifdef _WIN64
|
|
class WindowsSecurityAttributes {
|
|
protected:
|
|
SECURITY_ATTRIBUTES m_winSecurityAttributes;
|
|
PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
|
|
|
|
public:
|
|
WindowsSecurityAttributes();
|
|
SECURITY_ATTRIBUTES* operator&();
|
|
~WindowsSecurityAttributes();
|
|
};
|
|
|
|
WindowsSecurityAttributes::WindowsSecurityAttributes() {
|
|
m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(
|
|
1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**));
|
|
|
|
PSID* ppSID =
|
|
(PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
|
|
PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
|
|
|
|
InitializeSecurityDescriptor(m_winPSecurityDescriptor,
|
|
SECURITY_DESCRIPTOR_REVISION);
|
|
|
|
SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority =
|
|
SECURITY_WORLD_SID_AUTHORITY;
|
|
AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0,
|
|
0, 0, 0, 0, 0, ppSID);
|
|
|
|
EXPLICIT_ACCESS explicitAccess;
|
|
ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
|
|
explicitAccess.grfAccessPermissions =
|
|
STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
|
|
explicitAccess.grfAccessMode = SET_ACCESS;
|
|
explicitAccess.grfInheritance = INHERIT_ONLY;
|
|
explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
|
|
explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
|
|
explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
|
|
|
|
SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
|
|
|
|
SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
|
|
|
|
m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
|
|
m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
|
|
m_winSecurityAttributes.bInheritHandle = TRUE;
|
|
}
|
|
|
|
SECURITY_ATTRIBUTES* WindowsSecurityAttributes::operator&() {
|
|
return &m_winSecurityAttributes;
|
|
}
|
|
|
|
WindowsSecurityAttributes::~WindowsSecurityAttributes() {
|
|
PSID* ppSID =
|
|
(PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
|
|
PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
|
|
|
|
if (*ppSID) {
|
|
FreeSid(*ppSID);
|
|
}
|
|
if (*ppACL) {
|
|
LocalFree(*ppACL);
|
|
}
|
|
free(m_winPSecurityDescriptor);
|
|
}
|
|
#endif
|
|
|
|
void DestroyDebugUtilsMessengerEXT(VkInstance instance,
|
|
VkDebugUtilsMessengerEXT debugMessenger,
|
|
const VkAllocationCallbacks* pAllocator) {
|
|
auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(
|
|
instance, "vkDestroyDebugUtilsMessengerEXT");
|
|
if (func != nullptr) {
|
|
func(instance, debugMessenger, pAllocator);
|
|
}
|
|
}
|
|
|
|
struct QueueFamilyIndices {
|
|
int graphicsFamily = -1;
|
|
int presentFamily = -1;
|
|
|
|
bool isComplete() { return graphicsFamily >= 0 && presentFamily >= 0; }
|
|
};
|
|
|
|
struct SwapChainSupportDetails {
|
|
VkSurfaceCapabilitiesKHR capabilities;
|
|
std::vector<VkSurfaceFormatKHR> formats;
|
|
std::vector<VkPresentModeKHR> presentModes;
|
|
};
|
|
|
|
typedef float vec2[2];
|
|
|
|
struct Vertex {
|
|
vec4 pos;
|
|
vec3 color;
|
|
vec2 texCoord;
|
|
|
|
static VkVertexInputBindingDescription getBindingDescription() {
|
|
VkVertexInputBindingDescription bindingDescription = {};
|
|
bindingDescription.binding = 0;
|
|
bindingDescription.stride = sizeof(Vertex);
|
|
bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
|
|
|
|
return bindingDescription;
|
|
}
|
|
|
|
static std::array<VkVertexInputAttributeDescription, 3>
|
|
getAttributeDescriptions() {
|
|
std::array<VkVertexInputAttributeDescription, 3> attributeDescriptions = {};
|
|
|
|
attributeDescriptions[0].binding = 0;
|
|
attributeDescriptions[0].location = 0;
|
|
attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
|
|
attributeDescriptions[0].offset = offsetof(Vertex, pos);
|
|
|
|
attributeDescriptions[1].binding = 0;
|
|
attributeDescriptions[1].location = 1;
|
|
attributeDescriptions[1].format = VK_FORMAT_R32G32B32_SFLOAT;
|
|
attributeDescriptions[1].offset = offsetof(Vertex, color);
|
|
|
|
attributeDescriptions[2].binding = 0;
|
|
attributeDescriptions[2].location = 2;
|
|
attributeDescriptions[2].format = VK_FORMAT_R32G32_SFLOAT;
|
|
attributeDescriptions[2].offset = offsetof(Vertex, texCoord);
|
|
|
|
return attributeDescriptions;
|
|
}
|
|
};
|
|
|
|
struct UniformBufferObject {
|
|
alignas(16) mat4x4 model;
|
|
alignas(16) mat4x4 view;
|
|
alignas(16) mat4x4 proj;
|
|
};
|
|
|
|
const std::vector<Vertex> vertices = {
|
|
{{-1.0f, -1.0f, 0.0f, 1.0f}, {1.0f, 0.0f, 0.0f}, {0.0f, 0.0f}},
|
|
{{1.0f, -1.0f, 0.0f, 1.0f}, {0.0f, 1.0f, 0.0f}, {1.0f, 0.0f}},
|
|
{{1.0f, 1.0f, 0.0f, 1.0f}, {0.0f, 0.0f, 1.0f}, {1.0f, 1.0f}},
|
|
{{-1.0f, 1.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 1.0f}, {0.0f, 1.0f}}};
|
|
|
|
const std::vector<uint16_t> indices = {0, 1, 2, 2, 3, 0};
|
|
|
|
// convert floating point rgba color to 32-bit integer
|
|
__device__ unsigned int rgbaFloatToInt(float4 rgba) {
|
|
rgba.x = __saturatef(rgba.x); // clamp to [0.0, 1.0]
|
|
rgba.y = __saturatef(rgba.y);
|
|
rgba.z = __saturatef(rgba.z);
|
|
rgba.w = __saturatef(rgba.w);
|
|
return ((unsigned int)(rgba.w * 255.0f) << 24) |
|
|
((unsigned int)(rgba.z * 255.0f) << 16) |
|
|
((unsigned int)(rgba.y * 255.0f) << 8) |
|
|
((unsigned int)(rgba.x * 255.0f));
|
|
}
|
|
|
|
__device__ float4 rgbaIntToFloat(unsigned int c) {
|
|
float4 rgba;
|
|
rgba.x = (c & 0xff) * 0.003921568627f; // /255.0f;
|
|
rgba.y = ((c >> 8) & 0xff) * 0.003921568627f; // /255.0f;
|
|
rgba.z = ((c >> 16) & 0xff) * 0.003921568627f; // /255.0f;
|
|
rgba.w = ((c >> 24) & 0xff) * 0.003921568627f; // /255.0f;
|
|
return rgba;
|
|
}
|
|
|
|
int filter_radius = 14;
|
|
int g_nFilterSign = 1;
|
|
|
|
// This varies the filter radius, so we can see automatic animation
|
|
void varySigma() {
|
|
filter_radius += g_nFilterSign;
|
|
|
|
if (filter_radius > 64) {
|
|
filter_radius = 64; // clamp to 64 and then negate sign
|
|
g_nFilterSign = -1;
|
|
} else if (filter_radius < 0) {
|
|
filter_radius = 0;
|
|
g_nFilterSign = 1;
|
|
}
|
|
}
|
|
|
|
// row pass using texture lookups
|
|
__global__ void d_boxfilter_rgba_x(cudaSurfaceObject_t* dstSurfMipMapArray,
|
|
cudaTextureObject_t textureMipMapInput,
|
|
size_t baseWidth, size_t baseHeight,
|
|
size_t mipLevels, int filter_radius) {
|
|
float scale = 1.0f / (float)((filter_radius << 1) + 1);
|
|
unsigned int y = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
if (y < baseHeight) {
|
|
for (uint32_t mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) {
|
|
uint32_t width =
|
|
(baseWidth >> mipLevelIdx) ? (baseWidth >> mipLevelIdx) : 1;
|
|
uint32_t height =
|
|
(baseHeight >> mipLevelIdx) ? (baseHeight >> mipLevelIdx) : 1;
|
|
if (y < height && filter_radius < width) {
|
|
float px = 1.0 / width;
|
|
float py = 1.0 / height;
|
|
float4 t = make_float4(0.0f);
|
|
for (int x = -filter_radius; x <= filter_radius; x++) {
|
|
t += tex2DLod<float4>(textureMipMapInput, x * px, y * py,
|
|
(float)mipLevelIdx);
|
|
}
|
|
|
|
unsigned int dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], 0, y);
|
|
|
|
for (int x = 1; x < width; x++) {
|
|
t += tex2DLod<float4>(textureMipMapInput, (x + filter_radius) * px,
|
|
y * py, (float)mipLevelIdx);
|
|
t -=
|
|
tex2DLod<float4>(textureMipMapInput, (x - filter_radius - 1) * px,
|
|
y * py, (float)mipLevelIdx);
|
|
unsigned int dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx],
|
|
x * sizeof(uchar4), y);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// column pass using coalesced global memory reads
|
|
__global__ void d_boxfilter_rgba_y(cudaSurfaceObject_t* dstSurfMipMapArray,
|
|
cudaSurfaceObject_t* srcSurfMipMapArray,
|
|
size_t baseWidth, size_t baseHeight,
|
|
size_t mipLevels, int filter_radius) {
|
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
float scale = 1.0f / (float)((filter_radius << 1) + 1);
|
|
|
|
for (uint32_t mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) {
|
|
uint32_t width =
|
|
(baseWidth >> mipLevelIdx) ? (baseWidth >> mipLevelIdx) : 1;
|
|
uint32_t height =
|
|
(baseHeight >> mipLevelIdx) ? (baseHeight >> mipLevelIdx) : 1;
|
|
|
|
if (x < width && height > filter_radius) {
|
|
float4 t;
|
|
// do left edge
|
|
int colInBytes = x * sizeof(uchar4);
|
|
unsigned int pixFirst = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, 0);
|
|
t = rgbaIntToFloat(pixFirst) * filter_radius;
|
|
|
|
for (int y = 0; (y < (filter_radius + 1)) && (y < height); y++) {
|
|
unsigned int pix = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, y);
|
|
t += rgbaIntToFloat(pix);
|
|
}
|
|
|
|
unsigned int dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, 0);
|
|
|
|
for (int y = 1; (y < filter_radius + 1) && ((y + filter_radius) < height);
|
|
y++) {
|
|
unsigned int pix = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, y + filter_radius);
|
|
t += rgbaIntToFloat(pix);
|
|
t -= rgbaIntToFloat(pixFirst);
|
|
|
|
dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y);
|
|
}
|
|
|
|
// main loop
|
|
for (int y = (filter_radius + 1); y < (height - filter_radius); y++) {
|
|
unsigned int pix = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, y + filter_radius);
|
|
t += rgbaIntToFloat(pix);
|
|
|
|
pix = surf2Dread<unsigned int>(srcSurfMipMapArray[mipLevelIdx],
|
|
colInBytes, y - filter_radius - 1);
|
|
t -= rgbaIntToFloat(pix);
|
|
|
|
dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y);
|
|
}
|
|
|
|
// do right edge
|
|
unsigned int pixLast = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, height - 1);
|
|
for (int y = height - filter_radius;
|
|
(y < height) && ((y - filter_radius - 1) > 1); y++) {
|
|
t += rgbaIntToFloat(pixLast);
|
|
unsigned int pix = surf2Dread<unsigned int>(
|
|
srcSurfMipMapArray[mipLevelIdx], colInBytes, y - filter_radius - 1);
|
|
t -= rgbaIntToFloat(pix);
|
|
dataB = rgbaFloatToInt(t * scale);
|
|
surf2Dwrite(dataB, dstSurfMipMapArray[mipLevelIdx], colInBytes, y);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
class vulkanImageCUDA {
|
|
public:
|
|
void loadImageData(const std::string& filename) {
|
|
// load image (needed so we can get the width and height before we create
|
|
// the window
|
|
char* image_path =
|
|
sdkFindFilePath(filename.c_str(), execution_path.c_str());
|
|
|
|
if (image_path == 0) {
|
|
printf("Error finding image file '%s'\n", filename.c_str());
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
sdkLoadPPM4(image_path, (unsigned char**)&image_data, &imageWidth,
|
|
&imageHeight);
|
|
|
|
if (!image_data) {
|
|
printf("Error opening file '%s'\n", image_path);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
printf("Loaded '%s', %d x %d pixels\n", image_path, imageWidth,
|
|
imageHeight);
|
|
}
|
|
|
|
void run() {
|
|
initWindow();
|
|
initVulkan();
|
|
initCuda();
|
|
mainLoop();
|
|
cleanup();
|
|
}
|
|
|
|
private:
|
|
GLFWwindow* window;
|
|
|
|
VkInstance instance;
|
|
VkDebugUtilsMessengerEXT debugMessenger;
|
|
VkSurfaceKHR surface;
|
|
|
|
VkPhysicalDevice physicalDevice = VK_NULL_HANDLE;
|
|
VkDevice device;
|
|
uint8_t vkDeviceUUID[VK_UUID_SIZE];
|
|
|
|
VkQueue graphicsQueue;
|
|
VkQueue presentQueue;
|
|
|
|
VkSwapchainKHR swapChain;
|
|
std::vector<VkImage> swapChainImages;
|
|
VkFormat swapChainImageFormat;
|
|
VkExtent2D swapChainExtent;
|
|
std::vector<VkImageView> swapChainImageViews;
|
|
std::vector<VkFramebuffer> swapChainFramebuffers;
|
|
|
|
VkRenderPass renderPass;
|
|
VkDescriptorSetLayout descriptorSetLayout;
|
|
VkPipelineLayout pipelineLayout;
|
|
VkPipeline graphicsPipeline;
|
|
|
|
VkCommandPool commandPool;
|
|
|
|
VkImage textureImage;
|
|
VkDeviceMemory textureImageMemory;
|
|
VkImageView textureImageView;
|
|
VkSampler textureSampler;
|
|
|
|
VkBuffer vertexBuffer;
|
|
VkDeviceMemory vertexBufferMemory;
|
|
VkBuffer indexBuffer;
|
|
VkDeviceMemory indexBufferMemory;
|
|
|
|
std::vector<VkBuffer> uniformBuffers;
|
|
std::vector<VkDeviceMemory> uniformBuffersMemory;
|
|
|
|
VkDescriptorPool descriptorPool;
|
|
std::vector<VkDescriptorSet> descriptorSets;
|
|
|
|
std::vector<VkCommandBuffer> commandBuffers;
|
|
|
|
std::vector<VkSemaphore> imageAvailableSemaphores;
|
|
std::vector<VkSemaphore> renderFinishedSemaphores;
|
|
VkSemaphore cudaUpdateVkSemaphore, vkUpdateCudaSemaphore;
|
|
std::vector<VkFence> inFlightFences;
|
|
|
|
size_t currentFrame = 0;
|
|
|
|
bool framebufferResized = false;
|
|
|
|
#ifdef _WIN64
|
|
PFN_vkGetMemoryWin32HandleKHR fpGetMemoryWin32HandleKHR;
|
|
PFN_vkGetSemaphoreWin32HandleKHR fpGetSemaphoreWin32HandleKHR;
|
|
#else
|
|
PFN_vkGetMemoryFdKHR fpGetMemoryFdKHR = NULL;
|
|
PFN_vkGetSemaphoreFdKHR fpGetSemaphoreFdKHR = NULL;
|
|
#endif
|
|
|
|
PFN_vkGetPhysicalDeviceProperties2 fpGetPhysicalDeviceProperties2;
|
|
|
|
unsigned int* image_data = NULL;
|
|
unsigned int imageWidth, imageHeight;
|
|
unsigned int mipLevels = 1;
|
|
size_t totalImageMemSize;
|
|
|
|
// CUDA objects
|
|
cudaExternalMemory_t cudaExtMemImageBuffer;
|
|
cudaMipmappedArray_t cudaMipmappedImageArray, cudaMipmappedImageArrayTemp,
|
|
cudaMipmappedImageArrayOrig;
|
|
std::vector<cudaSurfaceObject_t> surfaceObjectList, surfaceObjectListTemp;
|
|
cudaSurfaceObject_t *d_surfaceObjectList, *d_surfaceObjectListTemp;
|
|
cudaTextureObject_t textureObjMipMapInput;
|
|
|
|
cudaExternalSemaphore_t cudaExtCudaUpdateVkSemaphore;
|
|
cudaExternalSemaphore_t cudaExtVkUpdateCudaSemaphore;
|
|
cudaStream_t streamToRun;
|
|
|
|
void initWindow() {
|
|
glfwInit();
|
|
|
|
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
|
|
|
|
window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan Image CUDA Box Filter",
|
|
nullptr, nullptr);
|
|
glfwSetWindowUserPointer(window, this);
|
|
glfwSetFramebufferSizeCallback(window, framebufferResizeCallback);
|
|
}
|
|
|
|
static void framebufferResizeCallback(GLFWwindow* window, int width,
|
|
int height) {
|
|
auto app =
|
|
reinterpret_cast<vulkanImageCUDA*>(glfwGetWindowUserPointer(window));
|
|
app->framebufferResized = true;
|
|
}
|
|
|
|
void initVulkan() {
|
|
createInstance();
|
|
setupDebugMessenger();
|
|
createSurface();
|
|
pickPhysicalDevice();
|
|
createLogicalDevice();
|
|
getKhrExtensionsFn();
|
|
createSwapChain();
|
|
createImageViews();
|
|
createRenderPass();
|
|
createDescriptorSetLayout();
|
|
createGraphicsPipeline();
|
|
createFramebuffers();
|
|
createCommandPool();
|
|
createTextureImage();
|
|
createTextureImageView();
|
|
createTextureSampler();
|
|
createVertexBuffer();
|
|
createIndexBuffer();
|
|
createUniformBuffers();
|
|
createDescriptorPool();
|
|
createDescriptorSets();
|
|
createCommandBuffers();
|
|
createSyncObjects();
|
|
createSyncObjectsExt();
|
|
}
|
|
|
|
void initCuda() {
|
|
setCudaVkDevice();
|
|
checkCudaErrors(cudaStreamCreate(&streamToRun));
|
|
cudaVkImportImageMem();
|
|
cudaVkImportSemaphore();
|
|
}
|
|
|
|
void mainLoop() {
|
|
updateUniformBuffer();
|
|
while (!glfwWindowShouldClose(window)) {
|
|
glfwPollEvents();
|
|
drawFrame();
|
|
}
|
|
|
|
vkDeviceWaitIdle(device);
|
|
}
|
|
|
|
void cleanupSwapChain() {
|
|
for (auto framebuffer : swapChainFramebuffers) {
|
|
vkDestroyFramebuffer(device, framebuffer, nullptr);
|
|
}
|
|
|
|
vkFreeCommandBuffers(device, commandPool,
|
|
static_cast<uint32_t>(commandBuffers.size()),
|
|
commandBuffers.data());
|
|
|
|
vkDestroyPipeline(device, graphicsPipeline, nullptr);
|
|
vkDestroyPipelineLayout(device, pipelineLayout, nullptr);
|
|
vkDestroyRenderPass(device, renderPass, nullptr);
|
|
|
|
for (auto imageView : swapChainImageViews) {
|
|
vkDestroyImageView(device, imageView, nullptr);
|
|
}
|
|
|
|
vkDestroySwapchainKHR(device, swapChain, nullptr);
|
|
|
|
for (size_t i = 0; i < swapChainImages.size(); i++) {
|
|
vkDestroyBuffer(device, uniformBuffers[i], nullptr);
|
|
vkFreeMemory(device, uniformBuffersMemory[i], nullptr);
|
|
}
|
|
|
|
vkDestroyDescriptorPool(device, descriptorPool, nullptr);
|
|
}
|
|
|
|
void cleanup() {
|
|
cleanupSwapChain();
|
|
|
|
vkDestroySampler(device, textureSampler, nullptr);
|
|
vkDestroyImageView(device, textureImageView, nullptr);
|
|
|
|
for (int i = 0; i < mipLevels; i++) {
|
|
checkCudaErrors(cudaDestroySurfaceObject(surfaceObjectList[i]));
|
|
checkCudaErrors(cudaDestroySurfaceObject(surfaceObjectListTemp[i]));
|
|
}
|
|
|
|
checkCudaErrors(cudaFree(d_surfaceObjectList));
|
|
checkCudaErrors(cudaFree(d_surfaceObjectListTemp));
|
|
checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArrayTemp));
|
|
checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArrayOrig));
|
|
checkCudaErrors(cudaFreeMipmappedArray(cudaMipmappedImageArray));
|
|
checkCudaErrors(cudaDestroyTextureObject(textureObjMipMapInput));
|
|
checkCudaErrors(cudaDestroyExternalMemory(cudaExtMemImageBuffer));
|
|
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtCudaUpdateVkSemaphore));
|
|
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtVkUpdateCudaSemaphore));
|
|
|
|
vkDestroyImage(device, textureImage, nullptr);
|
|
vkFreeMemory(device, textureImageMemory, nullptr);
|
|
|
|
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
|
|
|
|
vkDestroyBuffer(device, indexBuffer, nullptr);
|
|
vkFreeMemory(device, indexBufferMemory, nullptr);
|
|
|
|
vkDestroyBuffer(device, vertexBuffer, nullptr);
|
|
vkFreeMemory(device, vertexBufferMemory, nullptr);
|
|
|
|
vkDestroySemaphore(device, cudaUpdateVkSemaphore, nullptr);
|
|
vkDestroySemaphore(device, vkUpdateCudaSemaphore, nullptr);
|
|
|
|
for (size_t i = 0; i < MAX_FRAMES; i++) {
|
|
vkDestroySemaphore(device, renderFinishedSemaphores[i], nullptr);
|
|
vkDestroySemaphore(device, imageAvailableSemaphores[i], nullptr);
|
|
vkDestroyFence(device, inFlightFences[i], nullptr);
|
|
}
|
|
|
|
vkDestroyCommandPool(device, commandPool, nullptr);
|
|
|
|
vkDestroyDevice(device, nullptr);
|
|
|
|
if (enableValidationLayers) {
|
|
DestroyDebugUtilsMessengerEXT(instance, debugMessenger, nullptr);
|
|
}
|
|
|
|
vkDestroySurfaceKHR(instance, surface, nullptr);
|
|
vkDestroyInstance(instance, nullptr);
|
|
|
|
glfwDestroyWindow(window);
|
|
|
|
glfwTerminate();
|
|
}
|
|
|
|
void recreateSwapChain() {
|
|
int width = 0, height = 0;
|
|
while (width == 0 || height == 0) {
|
|
glfwGetFramebufferSize(window, &width, &height);
|
|
glfwWaitEvents();
|
|
}
|
|
|
|
vkDeviceWaitIdle(device);
|
|
|
|
cleanupSwapChain();
|
|
|
|
createSwapChain();
|
|
createImageViews();
|
|
createRenderPass();
|
|
createGraphicsPipeline();
|
|
createFramebuffers();
|
|
createUniformBuffers();
|
|
createDescriptorPool();
|
|
createDescriptorSets();
|
|
createCommandBuffers();
|
|
}
|
|
|
|
void createInstance() {
|
|
if (enableValidationLayers && !checkValidationLayerSupport()) {
|
|
throw std::runtime_error(
|
|
"validation layers requested, but not available!");
|
|
}
|
|
|
|
VkApplicationInfo appInfo = {};
|
|
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
|
appInfo.pApplicationName = "Vulkan Image CUDA Interop";
|
|
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
|
|
appInfo.pEngineName = "No Engine";
|
|
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
|
|
appInfo.apiVersion = VK_API_VERSION_1_1;
|
|
|
|
VkInstanceCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
|
createInfo.pApplicationInfo = &appInfo;
|
|
|
|
auto extensions = getRequiredExtensions();
|
|
createInfo.enabledExtensionCount = static_cast<uint32_t>(extensions.size());
|
|
createInfo.ppEnabledExtensionNames = extensions.data();
|
|
|
|
VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo;
|
|
if (enableValidationLayers) {
|
|
createInfo.enabledLayerCount =
|
|
static_cast<uint32_t>(validationLayers.size());
|
|
createInfo.ppEnabledLayerNames = validationLayers.data();
|
|
|
|
populateDebugMessengerCreateInfo(debugCreateInfo);
|
|
createInfo.pNext = (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo;
|
|
} else {
|
|
createInfo.enabledLayerCount = 0;
|
|
|
|
createInfo.pNext = nullptr;
|
|
}
|
|
|
|
if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create instance!");
|
|
}
|
|
|
|
fpGetPhysicalDeviceProperties2 =
|
|
(PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(
|
|
instance, "vkGetPhysicalDeviceProperties2");
|
|
if (fpGetPhysicalDeviceProperties2 == NULL) {
|
|
throw std::runtime_error(
|
|
"Vulkan: Proc address for \"vkGetPhysicalDeviceProperties2KHR\" not "
|
|
"found.\n");
|
|
}
|
|
|
|
#ifdef _WIN64
|
|
fpGetMemoryWin32HandleKHR =
|
|
(PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr(
|
|
instance, "vkGetMemoryWin32HandleKHR");
|
|
if (fpGetMemoryWin32HandleKHR == NULL) {
|
|
throw std::runtime_error(
|
|
"Vulkan: Proc address for \"vkGetMemoryWin32HandleKHR\" not "
|
|
"found.\n");
|
|
}
|
|
#else
|
|
fpGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr(
|
|
instance, "vkGetMemoryFdKHR");
|
|
if (fpGetMemoryFdKHR == NULL) {
|
|
throw std::runtime_error(
|
|
"Vulkan: Proc address for \"vkGetMemoryFdKHR\" not found.\n");
|
|
} else {
|
|
std::cout << "Vulkan proc address for vkGetMemoryFdKHR - "
|
|
<< fpGetMemoryFdKHR << std::endl;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void populateDebugMessengerCreateInfo(
|
|
VkDebugUtilsMessengerCreateInfoEXT& createInfo) {
|
|
createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT;
|
|
createInfo.messageSeverity =
|
|
VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
|
|
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
|
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT;
|
|
createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
|
|
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
|
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT;
|
|
createInfo.pfnUserCallback = debugCallback;
|
|
}
|
|
|
|
void setupDebugMessenger() {
|
|
if (!enableValidationLayers) return;
|
|
|
|
VkDebugUtilsMessengerCreateInfoEXT createInfo;
|
|
populateDebugMessengerCreateInfo(createInfo);
|
|
|
|
if (CreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr,
|
|
&debugMessenger) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to set up debug messenger!");
|
|
}
|
|
}
|
|
|
|
void createSurface() {
|
|
if (glfwCreateWindowSurface(instance, window, nullptr, &surface) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create window surface!");
|
|
}
|
|
}
|
|
|
|
void pickPhysicalDevice() {
|
|
uint32_t deviceCount = 0;
|
|
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);
|
|
|
|
if (deviceCount == 0) {
|
|
throw std::runtime_error("failed to find GPUs with Vulkan support!");
|
|
}
|
|
|
|
std::vector<VkPhysicalDevice> devices(deviceCount);
|
|
vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data());
|
|
|
|
for (const auto& device : devices) {
|
|
if (isDeviceSuitable(device)) {
|
|
physicalDevice = device;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (physicalDevice == VK_NULL_HANDLE) {
|
|
throw std::runtime_error("failed to find a suitable GPU!");
|
|
}
|
|
|
|
std::cout << "Selected physical device = " << physicalDevice << std::endl;
|
|
|
|
VkPhysicalDeviceIDProperties vkPhysicalDeviceIDProperties = {};
|
|
vkPhysicalDeviceIDProperties.sType =
|
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
|
|
vkPhysicalDeviceIDProperties.pNext = NULL;
|
|
|
|
VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {};
|
|
vkPhysicalDeviceProperties2.sType =
|
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
|
|
vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDProperties;
|
|
|
|
fpGetPhysicalDeviceProperties2(physicalDevice,
|
|
&vkPhysicalDeviceProperties2);
|
|
|
|
memcpy(vkDeviceUUID, vkPhysicalDeviceIDProperties.deviceUUID,
|
|
sizeof(vkDeviceUUID));
|
|
}
|
|
|
|
void getKhrExtensionsFn() {
|
|
#ifdef _WIN64
|
|
|
|
fpGetSemaphoreWin32HandleKHR =
|
|
(PFN_vkGetSemaphoreWin32HandleKHR)vkGetDeviceProcAddr(
|
|
device, "vkGetSemaphoreWin32HandleKHR");
|
|
if (fpGetSemaphoreWin32HandleKHR == NULL) {
|
|
throw std::runtime_error(
|
|
"Vulkan: Proc address for \"vkGetSemaphoreWin32HandleKHR\" not "
|
|
"found.\n");
|
|
}
|
|
#else
|
|
fpGetSemaphoreFdKHR = (PFN_vkGetSemaphoreFdKHR)vkGetDeviceProcAddr(
|
|
device, "vkGetSemaphoreFdKHR");
|
|
if (fpGetSemaphoreFdKHR == NULL) {
|
|
throw std::runtime_error(
|
|
"Vulkan: Proc address for \"vkGetSemaphoreFdKHR\" not found.\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int setCudaVkDevice() {
|
|
int current_device = 0;
|
|
int device_count = 0;
|
|
int devices_prohibited = 0;
|
|
|
|
cudaDeviceProp deviceProp;
|
|
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
|
|
|
if (device_count == 0) {
|
|
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
// Find the GPU which is selected by Vulkan
|
|
while (current_device < device_count) {
|
|
cudaGetDeviceProperties(&deviceProp, current_device);
|
|
|
|
if ((deviceProp.computeMode != cudaComputeModeProhibited)) {
|
|
// Compare the cuda device UUID with vulkan UUID
|
|
int ret = memcmp(&deviceProp.uuid, &vkDeviceUUID, VK_UUID_SIZE);
|
|
if (ret == 0) {
|
|
checkCudaErrors(cudaSetDevice(current_device));
|
|
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
|
|
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
|
|
current_device, deviceProp.name, deviceProp.major,
|
|
deviceProp.minor);
|
|
|
|
return current_device;
|
|
}
|
|
|
|
} else {
|
|
devices_prohibited++;
|
|
}
|
|
|
|
current_device++;
|
|
}
|
|
|
|
if (devices_prohibited == device_count) {
|
|
fprintf(stderr,
|
|
"CUDA error:"
|
|
" No Vulkan-CUDA Interop capable GPU found.\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
void createLogicalDevice() {
|
|
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);
|
|
|
|
std::vector<VkDeviceQueueCreateInfo> queueCreateInfos;
|
|
std::set<int> uniqueQueueFamilies = {indices.graphicsFamily,
|
|
indices.presentFamily};
|
|
|
|
float queuePriority = 1.0f;
|
|
for (int queueFamily : uniqueQueueFamilies) {
|
|
VkDeviceQueueCreateInfo queueCreateInfo = {};
|
|
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
|
queueCreateInfo.queueFamilyIndex = queueFamily;
|
|
queueCreateInfo.queueCount = 1;
|
|
queueCreateInfo.pQueuePriorities = &queuePriority;
|
|
queueCreateInfos.push_back(queueCreateInfo);
|
|
}
|
|
|
|
VkPhysicalDeviceFeatures deviceFeatures = {};
|
|
deviceFeatures.samplerAnisotropy = VK_TRUE;
|
|
|
|
VkDeviceCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
|
|
|
createInfo.pQueueCreateInfos = queueCreateInfos.data();
|
|
createInfo.queueCreateInfoCount = queueCreateInfos.size();
|
|
|
|
createInfo.pEnabledFeatures = &deviceFeatures;
|
|
std::vector<const char*> enabledExtensionNameList;
|
|
|
|
for (int i = 0; i < deviceExtensions.size(); i++) {
|
|
enabledExtensionNameList.push_back(deviceExtensions[i]);
|
|
}
|
|
if (enableValidationLayers) {
|
|
createInfo.enabledLayerCount =
|
|
static_cast<uint32_t>(validationLayers.size());
|
|
createInfo.ppEnabledLayerNames = validationLayers.data();
|
|
} else {
|
|
createInfo.enabledLayerCount = 0;
|
|
}
|
|
createInfo.enabledExtensionCount =
|
|
static_cast<uint32_t>(enabledExtensionNameList.size());
|
|
createInfo.ppEnabledExtensionNames = enabledExtensionNameList.data();
|
|
|
|
if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create logical device!");
|
|
}
|
|
vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphicsQueue);
|
|
vkGetDeviceQueue(device, indices.presentFamily, 0, &presentQueue);
|
|
}
|
|
|
|
void createSwapChain() {
|
|
SwapChainSupportDetails swapChainSupport =
|
|
querySwapChainSupport(physicalDevice);
|
|
|
|
VkSurfaceFormatKHR surfaceFormat =
|
|
chooseSwapSurfaceFormat(swapChainSupport.formats);
|
|
VkPresentModeKHR presentMode =
|
|
chooseSwapPresentMode(swapChainSupport.presentModes);
|
|
VkExtent2D extent = chooseSwapExtent(swapChainSupport.capabilities);
|
|
|
|
uint32_t imageCount = swapChainSupport.capabilities.minImageCount + 1;
|
|
if (swapChainSupport.capabilities.maxImageCount > 0 &&
|
|
imageCount > swapChainSupport.capabilities.maxImageCount) {
|
|
imageCount = swapChainSupport.capabilities.maxImageCount;
|
|
}
|
|
|
|
VkSwapchainCreateInfoKHR createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
|
|
createInfo.surface = surface;
|
|
|
|
createInfo.minImageCount = imageCount;
|
|
createInfo.imageFormat = surfaceFormat.format;
|
|
createInfo.imageColorSpace = surfaceFormat.colorSpace;
|
|
createInfo.imageExtent = extent;
|
|
createInfo.imageArrayLayers = 1;
|
|
createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
|
|
|
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);
|
|
uint32_t queueFamilyIndices[] = {(uint32_t)indices.graphicsFamily,
|
|
(uint32_t)indices.presentFamily};
|
|
|
|
if (indices.graphicsFamily != indices.presentFamily) {
|
|
createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
|
|
createInfo.queueFamilyIndexCount = 2;
|
|
createInfo.pQueueFamilyIndices = queueFamilyIndices;
|
|
} else {
|
|
createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
}
|
|
|
|
createInfo.preTransform = swapChainSupport.capabilities.currentTransform;
|
|
createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
|
|
createInfo.presentMode = presentMode;
|
|
createInfo.clipped = VK_TRUE;
|
|
|
|
if (vkCreateSwapchainKHR(device, &createInfo, nullptr, &swapChain) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create swap chain!");
|
|
}
|
|
|
|
vkGetSwapchainImagesKHR(device, swapChain, &imageCount, nullptr);
|
|
swapChainImages.resize(imageCount);
|
|
vkGetSwapchainImagesKHR(device, swapChain, &imageCount,
|
|
swapChainImages.data());
|
|
|
|
swapChainImageFormat = surfaceFormat.format;
|
|
swapChainExtent = extent;
|
|
}
|
|
|
|
void createImageViews() {
|
|
swapChainImageViews.resize(swapChainImages.size());
|
|
|
|
for (size_t i = 0; i < swapChainImages.size(); i++) {
|
|
swapChainImageViews[i] =
|
|
createImageView(swapChainImages[i], swapChainImageFormat);
|
|
}
|
|
}
|
|
|
|
void createRenderPass() {
|
|
VkAttachmentDescription colorAttachment = {};
|
|
colorAttachment.format = swapChainImageFormat;
|
|
colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
|
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
|
|
|
VkAttachmentReference colorAttachmentRef = {};
|
|
colorAttachmentRef.attachment = 0;
|
|
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
|
|
VkSubpassDescription subpass = {};
|
|
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
|
subpass.colorAttachmentCount = 1;
|
|
subpass.pColorAttachments = &colorAttachmentRef;
|
|
|
|
VkSubpassDependency dependency = {};
|
|
dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
|
|
dependency.dstSubpass = 0;
|
|
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
dependency.srcAccessMask = 0;
|
|
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
|
|
VkRenderPassCreateInfo renderPassInfo = {};
|
|
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
|
renderPassInfo.attachmentCount = 1;
|
|
renderPassInfo.pAttachments = &colorAttachment;
|
|
renderPassInfo.subpassCount = 1;
|
|
renderPassInfo.pSubpasses = &subpass;
|
|
renderPassInfo.dependencyCount = 1;
|
|
renderPassInfo.pDependencies = &dependency;
|
|
|
|
if (vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create render pass!");
|
|
}
|
|
}
|
|
|
|
void createDescriptorSetLayout() {
|
|
VkDescriptorSetLayoutBinding uboLayoutBinding = {};
|
|
uboLayoutBinding.binding = 0;
|
|
uboLayoutBinding.descriptorCount = 1;
|
|
uboLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
uboLayoutBinding.pImmutableSamplers = nullptr;
|
|
uboLayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
|
|
|
VkDescriptorSetLayoutBinding samplerLayoutBinding = {};
|
|
samplerLayoutBinding.binding = 1;
|
|
samplerLayoutBinding.descriptorCount = 1;
|
|
samplerLayoutBinding.descriptorType =
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
samplerLayoutBinding.pImmutableSamplers = nullptr;
|
|
samplerLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
|
|
std::array<VkDescriptorSetLayoutBinding, 2> bindings = {
|
|
uboLayoutBinding, samplerLayoutBinding};
|
|
VkDescriptorSetLayoutCreateInfo layoutInfo = {};
|
|
layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
|
layoutInfo.bindingCount = static_cast<uint32_t>(bindings.size());
|
|
layoutInfo.pBindings = bindings.data();
|
|
|
|
if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr,
|
|
&descriptorSetLayout) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create descriptor set layout!");
|
|
}
|
|
}
|
|
|
|
void createGraphicsPipeline() {
|
|
auto vertShaderCode = readFile("vert.spv");
|
|
auto fragShaderCode = readFile("frag.spv");
|
|
|
|
VkShaderModule vertShaderModule = createShaderModule(vertShaderCode);
|
|
VkShaderModule fragShaderModule = createShaderModule(fragShaderCode);
|
|
|
|
VkPipelineShaderStageCreateInfo vertShaderStageInfo = {};
|
|
vertShaderStageInfo.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
|
|
vertShaderStageInfo.module = vertShaderModule;
|
|
vertShaderStageInfo.pName = "main";
|
|
|
|
VkPipelineShaderStageCreateInfo fragShaderStageInfo = {};
|
|
fragShaderStageInfo.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
fragShaderStageInfo.module = fragShaderModule;
|
|
fragShaderStageInfo.pName = "main";
|
|
|
|
VkPipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo,
|
|
fragShaderStageInfo};
|
|
|
|
VkPipelineVertexInputStateCreateInfo vertexInputInfo = {};
|
|
vertexInputInfo.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
|
|
|
auto bindingDescription = Vertex::getBindingDescription();
|
|
auto attributeDescriptions = Vertex::getAttributeDescriptions();
|
|
|
|
vertexInputInfo.vertexBindingDescriptionCount = 1;
|
|
vertexInputInfo.vertexAttributeDescriptionCount =
|
|
static_cast<uint32_t>(attributeDescriptions.size());
|
|
vertexInputInfo.pVertexBindingDescriptions = &bindingDescription;
|
|
vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data();
|
|
|
|
VkPipelineInputAssemblyStateCreateInfo inputAssembly = {};
|
|
inputAssembly.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
|
inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
inputAssembly.primitiveRestartEnable = VK_FALSE;
|
|
|
|
VkViewport viewport = {};
|
|
viewport.x = 0.0f;
|
|
viewport.y = 0.0f;
|
|
viewport.width = (float)swapChainExtent.width;
|
|
viewport.height = (float)swapChainExtent.height;
|
|
viewport.minDepth = 0.0f;
|
|
viewport.maxDepth = 1.0f;
|
|
|
|
VkRect2D scissor = {};
|
|
scissor.offset = {0, 0};
|
|
scissor.extent = swapChainExtent;
|
|
|
|
VkPipelineViewportStateCreateInfo viewportState = {};
|
|
viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
|
viewportState.viewportCount = 1;
|
|
viewportState.pViewports = &viewport;
|
|
viewportState.scissorCount = 1;
|
|
viewportState.pScissors = &scissor;
|
|
|
|
VkPipelineRasterizationStateCreateInfo rasterizer = {};
|
|
rasterizer.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
|
rasterizer.depthClampEnable = VK_FALSE;
|
|
rasterizer.rasterizerDiscardEnable = VK_FALSE;
|
|
rasterizer.polygonMode = VK_POLYGON_MODE_FILL;
|
|
rasterizer.lineWidth = 1.0f;
|
|
rasterizer.cullMode = VK_CULL_MODE_BACK_BIT;
|
|
rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
|
rasterizer.depthBiasEnable = VK_FALSE;
|
|
|
|
VkPipelineMultisampleStateCreateInfo multisampling = {};
|
|
multisampling.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
|
multisampling.sampleShadingEnable = VK_FALSE;
|
|
multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
|
|
|
VkPipelineColorBlendAttachmentState colorBlendAttachment = {};
|
|
colorBlendAttachment.colorWriteMask =
|
|
VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
|
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
|
|
colorBlendAttachment.blendEnable = VK_FALSE;
|
|
|
|
VkPipelineColorBlendStateCreateInfo colorBlending = {};
|
|
colorBlending.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
|
colorBlending.logicOpEnable = VK_FALSE;
|
|
colorBlending.logicOp = VK_LOGIC_OP_COPY;
|
|
colorBlending.attachmentCount = 1;
|
|
colorBlending.pAttachments = &colorBlendAttachment;
|
|
colorBlending.blendConstants[0] = 0.0f;
|
|
colorBlending.blendConstants[1] = 0.0f;
|
|
colorBlending.blendConstants[2] = 0.0f;
|
|
colorBlending.blendConstants[3] = 0.0f;
|
|
|
|
VkPipelineLayoutCreateInfo pipelineLayoutInfo = {};
|
|
pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
|
pipelineLayoutInfo.setLayoutCount = 1;
|
|
pipelineLayoutInfo.pSetLayouts = &descriptorSetLayout;
|
|
|
|
if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr,
|
|
&pipelineLayout) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create pipeline layout!");
|
|
}
|
|
|
|
VkGraphicsPipelineCreateInfo pipelineInfo = {};
|
|
pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
|
pipelineInfo.stageCount = 2;
|
|
pipelineInfo.pStages = shaderStages;
|
|
pipelineInfo.pVertexInputState = &vertexInputInfo;
|
|
pipelineInfo.pInputAssemblyState = &inputAssembly;
|
|
pipelineInfo.pViewportState = &viewportState;
|
|
pipelineInfo.pRasterizationState = &rasterizer;
|
|
pipelineInfo.pMultisampleState = &multisampling;
|
|
pipelineInfo.pColorBlendState = &colorBlending;
|
|
pipelineInfo.layout = pipelineLayout;
|
|
pipelineInfo.renderPass = renderPass;
|
|
pipelineInfo.subpass = 0;
|
|
pipelineInfo.basePipelineHandle = VK_NULL_HANDLE;
|
|
|
|
if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo,
|
|
nullptr, &graphicsPipeline) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create graphics pipeline!");
|
|
}
|
|
|
|
vkDestroyShaderModule(device, fragShaderModule, nullptr);
|
|
vkDestroyShaderModule(device, vertShaderModule, nullptr);
|
|
}
|
|
|
|
void createFramebuffers() {
|
|
swapChainFramebuffers.resize(swapChainImageViews.size());
|
|
|
|
for (size_t i = 0; i < swapChainImageViews.size(); i++) {
|
|
VkImageView attachments[] = {swapChainImageViews[i]};
|
|
|
|
VkFramebufferCreateInfo framebufferInfo = {};
|
|
framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
|
framebufferInfo.renderPass = renderPass;
|
|
framebufferInfo.attachmentCount = 1;
|
|
framebufferInfo.pAttachments = attachments;
|
|
framebufferInfo.width = swapChainExtent.width;
|
|
framebufferInfo.height = swapChainExtent.height;
|
|
framebufferInfo.layers = 1;
|
|
|
|
if (vkCreateFramebuffer(device, &framebufferInfo, nullptr,
|
|
&swapChainFramebuffers[i]) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create framebuffer!");
|
|
}
|
|
}
|
|
}
|
|
|
|
void createCommandPool() {
|
|
QueueFamilyIndices queueFamilyIndices = findQueueFamilies(physicalDevice);
|
|
|
|
VkCommandPoolCreateInfo poolInfo = {};
|
|
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily;
|
|
|
|
if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create graphics command pool!");
|
|
}
|
|
}
|
|
|
|
void createTextureImage() {
|
|
VkDeviceSize imageSize = imageWidth * imageHeight * 4;
|
|
mipLevels = static_cast<uint32_t>(
|
|
std::floor(std::log2(std::max(imageWidth, imageHeight)))) +
|
|
1;
|
|
printf("mipLevels = %d\n", mipLevels);
|
|
|
|
if (!image_data) {
|
|
throw std::runtime_error("failed to load texture image!");
|
|
}
|
|
|
|
VkBuffer stagingBuffer;
|
|
VkDeviceMemory stagingBufferMemory;
|
|
createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
|
stagingBuffer, stagingBufferMemory);
|
|
|
|
void* data;
|
|
vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data);
|
|
memcpy(data, image_data, static_cast<size_t>(imageSize));
|
|
vkUnmapMemory(device, stagingBufferMemory);
|
|
|
|
// VK_FORMAT_R8G8B8A8_UNORM changed to VK_FORMAT_R8G8B8A8_UINT
|
|
createImage(
|
|
imageWidth, imageHeight, VK_FORMAT_R8G8B8A8_UNORM,
|
|
VK_IMAGE_TILING_OPTIMAL,
|
|
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, textureImage, textureImageMemory);
|
|
|
|
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_UINT,
|
|
VK_IMAGE_LAYOUT_UNDEFINED,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
copyBufferToImage(stagingBuffer, textureImage,
|
|
static_cast<uint32_t>(imageWidth),
|
|
static_cast<uint32_t>(imageHeight));
|
|
|
|
vkDestroyBuffer(device, stagingBuffer, nullptr);
|
|
vkFreeMemory(device, stagingBufferMemory, nullptr);
|
|
|
|
generateMipmaps(textureImage, VK_FORMAT_R8G8B8A8_UNORM);
|
|
}
|
|
|
|
void generateMipmaps(VkImage image, VkFormat imageFormat) {
|
|
VkFormatProperties formatProperties;
|
|
vkGetPhysicalDeviceFormatProperties(physicalDevice, imageFormat,
|
|
&formatProperties);
|
|
|
|
if (!(formatProperties.optimalTilingFeatures &
|
|
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT)) {
|
|
throw std::runtime_error(
|
|
"texture image format does not support linear blitting!");
|
|
}
|
|
|
|
VkCommandBuffer commandBuffer = beginSingleTimeCommands();
|
|
|
|
VkImageMemoryBarrier barrier = {};
|
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
barrier.image = image;
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
barrier.subresourceRange.baseArrayLayer = 0;
|
|
barrier.subresourceRange.layerCount = 1;
|
|
barrier.subresourceRange.levelCount = 1;
|
|
|
|
int32_t mipWidth = imageWidth;
|
|
int32_t mipHeight = imageHeight;
|
|
|
|
for (uint32_t i = 1; i < mipLevels; i++) {
|
|
barrier.subresourceRange.baseMipLevel = i - 1;
|
|
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
|
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
|
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
|
|
|
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
|
|
nullptr, 1, &barrier);
|
|
|
|
VkImageBlit blit = {};
|
|
blit.srcOffsets[0] = {0, 0, 0};
|
|
blit.srcOffsets[1] = {mipWidth, mipHeight, 1};
|
|
blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
blit.srcSubresource.mipLevel = i - 1;
|
|
blit.srcSubresource.baseArrayLayer = 0;
|
|
blit.srcSubresource.layerCount = 1;
|
|
blit.dstOffsets[0] = {0, 0, 0};
|
|
blit.dstOffsets[1] = {mipWidth > 1 ? mipWidth / 2 : 1,
|
|
mipHeight > 1 ? mipHeight / 2 : 1, 1};
|
|
blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
blit.dstSubresource.mipLevel = i;
|
|
blit.dstSubresource.baseArrayLayer = 0;
|
|
blit.dstSubresource.layerCount = 1;
|
|
|
|
vkCmdBlitImage(commandBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
|
image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit,
|
|
VK_FILTER_LINEAR);
|
|
|
|
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
|
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
|
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr,
|
|
0, nullptr, 1, &barrier);
|
|
|
|
if (mipWidth > 1) mipWidth /= 2;
|
|
if (mipHeight > 1) mipHeight /= 2;
|
|
}
|
|
|
|
barrier.subresourceRange.baseMipLevel = mipLevels - 1;
|
|
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
|
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr,
|
|
0, nullptr, 1, &barrier);
|
|
|
|
endSingleTimeCommands(commandBuffer);
|
|
}
|
|
|
|
#ifdef _WIN64 // For windows
|
|
HANDLE getVkImageMemHandle(
|
|
VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) {
|
|
HANDLE handle;
|
|
|
|
VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {};
|
|
vkMemoryGetWin32HandleInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
|
|
vkMemoryGetWin32HandleInfoKHR.pNext = NULL;
|
|
vkMemoryGetWin32HandleInfoKHR.memory = textureImageMemory;
|
|
vkMemoryGetWin32HandleInfoKHR.handleType =
|
|
(VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType;
|
|
|
|
fpGetMemoryWin32HandleKHR(device, &vkMemoryGetWin32HandleInfoKHR, &handle);
|
|
return handle;
|
|
}
|
|
HANDLE getVkSemaphoreHandle(
|
|
VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType,
|
|
VkSemaphore& semVkCuda) {
|
|
HANDLE handle;
|
|
|
|
VkSemaphoreGetWin32HandleInfoKHR vulkanSemaphoreGetWin32HandleInfoKHR = {};
|
|
vulkanSemaphoreGetWin32HandleInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
|
|
vulkanSemaphoreGetWin32HandleInfoKHR.pNext = NULL;
|
|
vulkanSemaphoreGetWin32HandleInfoKHR.semaphore = semVkCuda;
|
|
vulkanSemaphoreGetWin32HandleInfoKHR.handleType =
|
|
externalSemaphoreHandleType;
|
|
|
|
fpGetSemaphoreWin32HandleKHR(device, &vulkanSemaphoreGetWin32HandleInfoKHR,
|
|
&handle);
|
|
|
|
return handle;
|
|
}
|
|
#else
|
|
int getVkImageMemHandle(
|
|
VkExternalMemoryHandleTypeFlagsKHR externalMemoryHandleType) {
|
|
if (externalMemoryHandleType ==
|
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
|
|
int fd;
|
|
|
|
VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {};
|
|
vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
|
|
vkMemoryGetFdInfoKHR.pNext = NULL;
|
|
vkMemoryGetFdInfoKHR.memory = textureImageMemory;
|
|
vkMemoryGetFdInfoKHR.handleType =
|
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
|
|
|
fpGetMemoryFdKHR(device, &vkMemoryGetFdInfoKHR, &fd);
|
|
|
|
return fd;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int getVkSemaphoreHandle(
|
|
VkExternalSemaphoreHandleTypeFlagBitsKHR externalSemaphoreHandleType,
|
|
VkSemaphore& semVkCuda) {
|
|
if (externalSemaphoreHandleType ==
|
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
|
|
int fd;
|
|
|
|
VkSemaphoreGetFdInfoKHR vulkanSemaphoreGetFdInfoKHR = {};
|
|
vulkanSemaphoreGetFdInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR;
|
|
vulkanSemaphoreGetFdInfoKHR.pNext = NULL;
|
|
vulkanSemaphoreGetFdInfoKHR.semaphore = semVkCuda;
|
|
vulkanSemaphoreGetFdInfoKHR.handleType =
|
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
|
|
|
fpGetSemaphoreFdKHR(device, &vulkanSemaphoreGetFdInfoKHR, &fd);
|
|
|
|
return fd;
|
|
}
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
void createTextureImageView() {
|
|
textureImageView = createImageView(textureImage, VK_FORMAT_R8G8B8A8_UNORM);
|
|
}
|
|
|
|
void createTextureSampler() {
|
|
VkSamplerCreateInfo samplerInfo = {};
|
|
samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
|
|
samplerInfo.magFilter = VK_FILTER_LINEAR;
|
|
samplerInfo.minFilter = VK_FILTER_LINEAR;
|
|
samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
|
samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
|
samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
|
|
samplerInfo.anisotropyEnable = VK_TRUE;
|
|
samplerInfo.maxAnisotropy = 16;
|
|
samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
|
|
samplerInfo.unnormalizedCoordinates = VK_FALSE;
|
|
samplerInfo.compareEnable = VK_FALSE;
|
|
samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS;
|
|
samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
|
samplerInfo.minLod = 0; // Optional
|
|
samplerInfo.maxLod = static_cast<float>(mipLevels);
|
|
samplerInfo.mipLodBias = 0; // Optional
|
|
|
|
if (vkCreateSampler(device, &samplerInfo, nullptr, &textureSampler) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create texture sampler!");
|
|
}
|
|
}
|
|
|
|
VkImageView createImageView(VkImage image, VkFormat format) {
|
|
VkImageViewCreateInfo viewInfo = {};
|
|
viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
|
viewInfo.image = image;
|
|
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
|
viewInfo.format = format;
|
|
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
viewInfo.subresourceRange.baseMipLevel = 0;
|
|
viewInfo.subresourceRange.levelCount = mipLevels;
|
|
viewInfo.subresourceRange.baseArrayLayer = 0;
|
|
viewInfo.subresourceRange.layerCount = 1;
|
|
|
|
VkImageView imageView;
|
|
if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create texture image view!");
|
|
}
|
|
|
|
return imageView;
|
|
}
|
|
|
|
void createImage(uint32_t width, uint32_t height, VkFormat format,
|
|
VkImageTiling tiling, VkImageUsageFlags usage,
|
|
VkMemoryPropertyFlags properties, VkImage& image,
|
|
VkDeviceMemory& imageMemory) {
|
|
VkImageCreateInfo imageInfo = {};
|
|
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
|
imageInfo.imageType = VK_IMAGE_TYPE_2D;
|
|
imageInfo.extent.width = width;
|
|
imageInfo.extent.height = height;
|
|
imageInfo.extent.depth = 1;
|
|
imageInfo.mipLevels = mipLevels;
|
|
imageInfo.arrayLayers = 1;
|
|
imageInfo.format = format;
|
|
imageInfo.tiling = tiling;
|
|
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
imageInfo.usage = usage;
|
|
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
VkExternalMemoryImageCreateInfo vkExternalMemImageCreateInfo = {};
|
|
vkExternalMemImageCreateInfo.sType =
|
|
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
|
|
vkExternalMemImageCreateInfo.pNext = NULL;
|
|
#ifdef _WIN64
|
|
vkExternalMemImageCreateInfo.handleTypes =
|
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
|
|
#else
|
|
vkExternalMemImageCreateInfo.handleTypes =
|
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
|
#endif
|
|
|
|
imageInfo.pNext = &vkExternalMemImageCreateInfo;
|
|
|
|
if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create image!");
|
|
}
|
|
|
|
VkMemoryRequirements memRequirements;
|
|
vkGetImageMemoryRequirements(device, image, &memRequirements);
|
|
|
|
#ifdef _WIN64
|
|
WindowsSecurityAttributes winSecurityAttributes;
|
|
|
|
VkExportMemoryWin32HandleInfoKHR vulkanExportMemoryWin32HandleInfoKHR = {};
|
|
vulkanExportMemoryWin32HandleInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
|
|
vulkanExportMemoryWin32HandleInfoKHR.pNext = NULL;
|
|
vulkanExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
|
|
vulkanExportMemoryWin32HandleInfoKHR.dwAccess =
|
|
DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
|
|
vulkanExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)NULL;
|
|
#endif
|
|
VkExportMemoryAllocateInfoKHR vulkanExportMemoryAllocateInfoKHR = {};
|
|
vulkanExportMemoryAllocateInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
|
|
#ifdef _WIN64
|
|
vulkanExportMemoryAllocateInfoKHR.pNext =
|
|
IsWindows8OrGreater() ? &vulkanExportMemoryWin32HandleInfoKHR : NULL;
|
|
vulkanExportMemoryAllocateInfoKHR.handleTypes =
|
|
IsWindows8OrGreater()
|
|
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
|
|
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
|
|
#else
|
|
vulkanExportMemoryAllocateInfoKHR.pNext = NULL;
|
|
vulkanExportMemoryAllocateInfoKHR.handleTypes =
|
|
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
|
#endif
|
|
|
|
VkMemoryAllocateInfo allocInfo = {};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
allocInfo.allocationSize = memRequirements.size;
|
|
allocInfo.pNext = &vulkanExportMemoryAllocateInfoKHR;
|
|
allocInfo.memoryTypeIndex =
|
|
findMemoryType(memRequirements.memoryTypeBits, properties);
|
|
|
|
VkMemoryRequirements vkMemoryRequirements = {};
|
|
vkGetImageMemoryRequirements(device, image, &vkMemoryRequirements);
|
|
totalImageMemSize = vkMemoryRequirements.size;
|
|
|
|
if (vkAllocateMemory(device, &allocInfo, nullptr, &textureImageMemory) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to allocate image memory!");
|
|
}
|
|
|
|
vkBindImageMemory(device, image, textureImageMemory, 0);
|
|
}
|
|
|
|
void cudaVkImportSemaphore() {
|
|
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc;
|
|
memset(&externalSemaphoreHandleDesc, 0,
|
|
sizeof(externalSemaphoreHandleDesc));
|
|
#ifdef _WIN64
|
|
externalSemaphoreHandleDesc.type =
|
|
IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32
|
|
: cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
|
|
externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle(
|
|
IsWindows8OrGreater()
|
|
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
|
|
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
|
|
cudaUpdateVkSemaphore);
|
|
#else
|
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
|
|
externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle(
|
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, cudaUpdateVkSemaphore);
|
|
#endif
|
|
externalSemaphoreHandleDesc.flags = 0;
|
|
|
|
checkCudaErrors(cudaImportExternalSemaphore(&cudaExtCudaUpdateVkSemaphore,
|
|
&externalSemaphoreHandleDesc));
|
|
|
|
memset(&externalSemaphoreHandleDesc, 0,
|
|
sizeof(externalSemaphoreHandleDesc));
|
|
#ifdef _WIN64
|
|
externalSemaphoreHandleDesc.type =
|
|
IsWindows8OrGreater() ? cudaExternalSemaphoreHandleTypeOpaqueWin32
|
|
: cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt;
|
|
;
|
|
externalSemaphoreHandleDesc.handle.win32.handle = getVkSemaphoreHandle(
|
|
IsWindows8OrGreater()
|
|
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
|
|
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
|
|
vkUpdateCudaSemaphore);
|
|
#else
|
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd;
|
|
externalSemaphoreHandleDesc.handle.fd = getVkSemaphoreHandle(
|
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, vkUpdateCudaSemaphore);
|
|
#endif
|
|
externalSemaphoreHandleDesc.flags = 0;
|
|
checkCudaErrors(cudaImportExternalSemaphore(&cudaExtVkUpdateCudaSemaphore,
|
|
&externalSemaphoreHandleDesc));
|
|
printf("CUDA Imported Vulkan semaphore\n");
|
|
}
|
|
|
|
void cudaVkImportImageMem() {
|
|
cudaExternalMemoryHandleDesc cudaExtMemHandleDesc;
|
|
memset(&cudaExtMemHandleDesc, 0, sizeof(cudaExtMemHandleDesc));
|
|
#ifdef _WIN64
|
|
cudaExtMemHandleDesc.type =
|
|
IsWindows8OrGreater() ? cudaExternalMemoryHandleTypeOpaqueWin32
|
|
: cudaExternalMemoryHandleTypeOpaqueWin32Kmt;
|
|
cudaExtMemHandleDesc.handle.win32.handle = getVkImageMemHandle(
|
|
IsWindows8OrGreater()
|
|
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
|
|
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
|
|
#else
|
|
cudaExtMemHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueFd;
|
|
|
|
cudaExtMemHandleDesc.handle.fd =
|
|
getVkImageMemHandle(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
|
|
#endif
|
|
cudaExtMemHandleDesc.size = totalImageMemSize;
|
|
|
|
checkCudaErrors(cudaImportExternalMemory(&cudaExtMemImageBuffer,
|
|
&cudaExtMemHandleDesc));
|
|
|
|
cudaExternalMemoryMipmappedArrayDesc externalMemoryMipmappedArrayDesc;
|
|
|
|
memset(&externalMemoryMipmappedArrayDesc, 0,
|
|
sizeof(externalMemoryMipmappedArrayDesc));
|
|
|
|
cudaExtent extent = make_cudaExtent(imageWidth, imageHeight, 0);
|
|
cudaChannelFormatDesc formatDesc;
|
|
formatDesc.x = 8;
|
|
formatDesc.y = 8;
|
|
formatDesc.z = 8;
|
|
formatDesc.w = 8;
|
|
formatDesc.f = cudaChannelFormatKindUnsigned;
|
|
|
|
externalMemoryMipmappedArrayDesc.offset = 0;
|
|
externalMemoryMipmappedArrayDesc.formatDesc = formatDesc;
|
|
externalMemoryMipmappedArrayDesc.extent = extent;
|
|
externalMemoryMipmappedArrayDesc.flags = 0;
|
|
externalMemoryMipmappedArrayDesc.numLevels = mipLevels;
|
|
|
|
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(
|
|
&cudaMipmappedImageArray, cudaExtMemImageBuffer,
|
|
&externalMemoryMipmappedArrayDesc));
|
|
|
|
checkCudaErrors(cudaMallocMipmappedArray(&cudaMipmappedImageArrayTemp,
|
|
&formatDesc, extent, mipLevels));
|
|
checkCudaErrors(cudaMallocMipmappedArray(&cudaMipmappedImageArrayOrig,
|
|
&formatDesc, extent, mipLevels));
|
|
|
|
for (int mipLevelIdx = 0; mipLevelIdx < mipLevels; mipLevelIdx++) {
|
|
cudaArray_t cudaMipLevelArray, cudaMipLevelArrayTemp,
|
|
cudaMipLevelArrayOrig;
|
|
cudaResourceDesc resourceDesc;
|
|
|
|
checkCudaErrors(cudaGetMipmappedArrayLevel(
|
|
&cudaMipLevelArray, cudaMipmappedImageArray, mipLevelIdx));
|
|
checkCudaErrors(cudaGetMipmappedArrayLevel(
|
|
&cudaMipLevelArrayTemp, cudaMipmappedImageArrayTemp, mipLevelIdx));
|
|
checkCudaErrors(cudaGetMipmappedArrayLevel(
|
|
&cudaMipLevelArrayOrig, cudaMipmappedImageArrayOrig, mipLevelIdx));
|
|
|
|
uint32_t width =
|
|
(imageWidth >> mipLevelIdx) ? (imageWidth >> mipLevelIdx) : 1;
|
|
uint32_t height =
|
|
(imageHeight >> mipLevelIdx) ? (imageHeight >> mipLevelIdx) : 1;
|
|
checkCudaErrors(cudaMemcpy2DArrayToArray(
|
|
cudaMipLevelArrayOrig, 0, 0, cudaMipLevelArray, 0, 0,
|
|
width * sizeof(uchar4), height, cudaMemcpyDeviceToDevice));
|
|
|
|
memset(&resourceDesc, 0, sizeof(resourceDesc));
|
|
resourceDesc.resType = cudaResourceTypeArray;
|
|
resourceDesc.res.array.array = cudaMipLevelArray;
|
|
|
|
cudaSurfaceObject_t surfaceObject;
|
|
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
|
|
|
|
surfaceObjectList.push_back(surfaceObject);
|
|
|
|
memset(&resourceDesc, 0, sizeof(resourceDesc));
|
|
resourceDesc.resType = cudaResourceTypeArray;
|
|
resourceDesc.res.array.array = cudaMipLevelArrayTemp;
|
|
|
|
cudaSurfaceObject_t surfaceObjectTemp;
|
|
checkCudaErrors(
|
|
cudaCreateSurfaceObject(&surfaceObjectTemp, &resourceDesc));
|
|
surfaceObjectListTemp.push_back(surfaceObjectTemp);
|
|
}
|
|
|
|
cudaResourceDesc resDescr;
|
|
memset(&resDescr, 0, sizeof(cudaResourceDesc));
|
|
|
|
resDescr.resType = cudaResourceTypeMipmappedArray;
|
|
resDescr.res.mipmap.mipmap = cudaMipmappedImageArrayOrig;
|
|
|
|
cudaTextureDesc texDescr;
|
|
memset(&texDescr, 0, sizeof(cudaTextureDesc));
|
|
|
|
texDescr.normalizedCoords = true;
|
|
texDescr.filterMode = cudaFilterModeLinear;
|
|
texDescr.mipmapFilterMode = cudaFilterModeLinear;
|
|
|
|
texDescr.addressMode[0] = cudaAddressModeWrap;
|
|
texDescr.addressMode[1] = cudaAddressModeWrap;
|
|
|
|
texDescr.maxMipmapLevelClamp = float(mipLevels - 1);
|
|
|
|
texDescr.readMode = cudaReadModeNormalizedFloat;
|
|
|
|
checkCudaErrors(cudaCreateTextureObject(&textureObjMipMapInput, &resDescr,
|
|
&texDescr, NULL));
|
|
|
|
checkCudaErrors(cudaMalloc((void**)&d_surfaceObjectList,
|
|
sizeof(cudaSurfaceObject_t) * mipLevels));
|
|
checkCudaErrors(cudaMalloc((void**)&d_surfaceObjectListTemp,
|
|
sizeof(cudaSurfaceObject_t) * mipLevels));
|
|
|
|
checkCudaErrors(cudaMemcpy(d_surfaceObjectList, surfaceObjectList.data(),
|
|
sizeof(cudaSurfaceObject_t) * mipLevels,
|
|
cudaMemcpyHostToDevice));
|
|
checkCudaErrors(cudaMemcpy(
|
|
d_surfaceObjectListTemp, surfaceObjectListTemp.data(),
|
|
sizeof(cudaSurfaceObject_t) * mipLevels, cudaMemcpyHostToDevice));
|
|
|
|
printf("CUDA Kernel Vulkan image buffer\n");
|
|
}
|
|
|
|
void cudaUpdateVkImage() {
|
|
cudaVkSemaphoreWait(cudaExtVkUpdateCudaSemaphore);
|
|
|
|
int nthreads = 128;
|
|
|
|
/*Perform 2D box filter on image using CUDA */
|
|
d_boxfilter_rgba_x<<<imageHeight / nthreads, nthreads, 0, streamToRun>>>(
|
|
d_surfaceObjectListTemp, textureObjMipMapInput, imageWidth, imageHeight,
|
|
mipLevels, filter_radius);
|
|
|
|
d_boxfilter_rgba_y<<<imageWidth / nthreads, nthreads, 0, streamToRun>>>(
|
|
d_surfaceObjectList, d_surfaceObjectListTemp, imageWidth, imageHeight,
|
|
mipLevels, filter_radius);
|
|
|
|
varySigma();
|
|
|
|
cudaVkSemaphoreSignal(cudaExtCudaUpdateVkSemaphore);
|
|
}
|
|
|
|
void transitionImageLayout(VkImage image, VkFormat format,
|
|
VkImageLayout oldLayout, VkImageLayout newLayout) {
|
|
VkCommandBuffer commandBuffer = beginSingleTimeCommands();
|
|
|
|
VkImageMemoryBarrier barrier = {};
|
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
barrier.oldLayout = oldLayout;
|
|
barrier.newLayout = newLayout;
|
|
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
|
barrier.image = image;
|
|
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
barrier.subresourceRange.baseMipLevel = 0;
|
|
barrier.subresourceRange.levelCount = mipLevels;
|
|
barrier.subresourceRange.baseArrayLayer = 0;
|
|
barrier.subresourceRange.layerCount = 1;
|
|
|
|
VkPipelineStageFlags sourceStage;
|
|
VkPipelineStageFlags destinationStage;
|
|
|
|
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
|
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
|
barrier.srcAccessMask = 0;
|
|
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
|
|
newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
|
|
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
|
|
|
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
|
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
} else {
|
|
throw std::invalid_argument("unsupported layout transition!");
|
|
}
|
|
|
|
vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0,
|
|
nullptr, 0, nullptr, 1, &barrier);
|
|
|
|
endSingleTimeCommands(commandBuffer);
|
|
}
|
|
|
|
void copyBufferToImage(VkBuffer buffer, VkImage image, uint32_t width,
|
|
uint32_t height) {
|
|
VkCommandBuffer commandBuffer = beginSingleTimeCommands();
|
|
|
|
VkBufferImageCopy region = {};
|
|
region.bufferOffset = 0;
|
|
region.bufferRowLength = 0;
|
|
region.bufferImageHeight = 0;
|
|
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
|
region.imageSubresource.mipLevel = 0;
|
|
region.imageSubresource.baseArrayLayer = 0;
|
|
region.imageSubresource.layerCount = 1;
|
|
region.imageOffset = {0, 0, 0};
|
|
region.imageExtent = {width, height, 1};
|
|
|
|
vkCmdCopyBufferToImage(commandBuffer, buffer, image,
|
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
|
|
|
|
endSingleTimeCommands(commandBuffer);
|
|
}
|
|
|
|
void createVertexBuffer() {
|
|
VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();
|
|
|
|
VkBuffer stagingBuffer;
|
|
VkDeviceMemory stagingBufferMemory;
|
|
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
|
stagingBuffer, stagingBufferMemory);
|
|
|
|
void* data;
|
|
vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data);
|
|
memcpy(data, vertices.data(), (size_t)bufferSize);
|
|
vkUnmapMemory(device, stagingBufferMemory);
|
|
|
|
createBuffer(
|
|
bufferSize,
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory);
|
|
|
|
copyBuffer(stagingBuffer, vertexBuffer, bufferSize);
|
|
|
|
vkDestroyBuffer(device, stagingBuffer, nullptr);
|
|
vkFreeMemory(device, stagingBufferMemory, nullptr);
|
|
}
|
|
|
|
void createIndexBuffer() {
|
|
VkDeviceSize bufferSize = sizeof(indices[0]) * indices.size();
|
|
|
|
VkBuffer stagingBuffer;
|
|
VkDeviceMemory stagingBufferMemory;
|
|
createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
|
stagingBuffer, stagingBufferMemory);
|
|
|
|
void* data;
|
|
vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data);
|
|
memcpy(data, indices.data(), (size_t)bufferSize);
|
|
vkUnmapMemory(device, stagingBufferMemory);
|
|
|
|
createBuffer(
|
|
bufferSize,
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory);
|
|
|
|
copyBuffer(stagingBuffer, indexBuffer, bufferSize);
|
|
|
|
vkDestroyBuffer(device, stagingBuffer, nullptr);
|
|
vkFreeMemory(device, stagingBufferMemory, nullptr);
|
|
}
|
|
|
|
void createUniformBuffers() {
|
|
VkDeviceSize bufferSize = sizeof(UniformBufferObject);
|
|
|
|
uniformBuffers.resize(swapChainImages.size());
|
|
uniformBuffersMemory.resize(swapChainImages.size());
|
|
|
|
for (size_t i = 0; i < swapChainImages.size(); i++) {
|
|
createBuffer(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
|
uniformBuffers[i], uniformBuffersMemory[i]);
|
|
}
|
|
}
|
|
|
|
void createDescriptorPool() {
|
|
std::array<VkDescriptorPoolSize, 2> poolSizes = {};
|
|
poolSizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
poolSizes[0].descriptorCount =
|
|
static_cast<uint32_t>(swapChainImages.size());
|
|
poolSizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
poolSizes[1].descriptorCount =
|
|
static_cast<uint32_t>(swapChainImages.size());
|
|
|
|
VkDescriptorPoolCreateInfo poolInfo = {};
|
|
poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
|
poolInfo.poolSizeCount = static_cast<uint32_t>(poolSizes.size());
|
|
poolInfo.pPoolSizes = poolSizes.data();
|
|
poolInfo.maxSets = static_cast<uint32_t>(swapChainImages.size());
|
|
|
|
if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptorPool) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create descriptor pool!");
|
|
}
|
|
}
|
|
|
|
void createDescriptorSets() {
|
|
std::vector<VkDescriptorSetLayout> layouts(swapChainImages.size(),
|
|
descriptorSetLayout);
|
|
VkDescriptorSetAllocateInfo allocInfo = {};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
|
allocInfo.descriptorPool = descriptorPool;
|
|
allocInfo.descriptorSetCount =
|
|
static_cast<uint32_t>(swapChainImages.size());
|
|
allocInfo.pSetLayouts = layouts.data();
|
|
|
|
descriptorSets.resize(swapChainImages.size());
|
|
if (vkAllocateDescriptorSets(device, &allocInfo, descriptorSets.data()) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to allocate descriptor sets!");
|
|
}
|
|
|
|
for (size_t i = 0; i < swapChainImages.size(); i++) {
|
|
VkDescriptorBufferInfo bufferInfo = {};
|
|
bufferInfo.buffer = uniformBuffers[i];
|
|
bufferInfo.offset = 0;
|
|
bufferInfo.range = sizeof(UniformBufferObject);
|
|
|
|
VkDescriptorImageInfo imageInfo = {};
|
|
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
|
imageInfo.imageView = textureImageView;
|
|
imageInfo.sampler = textureSampler;
|
|
|
|
std::array<VkWriteDescriptorSet, 2> descriptorWrites = {};
|
|
|
|
descriptorWrites[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
descriptorWrites[0].dstSet = descriptorSets[i];
|
|
descriptorWrites[0].dstBinding = 0;
|
|
descriptorWrites[0].dstArrayElement = 0;
|
|
descriptorWrites[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
descriptorWrites[0].descriptorCount = 1;
|
|
descriptorWrites[0].pBufferInfo = &bufferInfo;
|
|
|
|
descriptorWrites[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
descriptorWrites[1].dstSet = descriptorSets[i];
|
|
descriptorWrites[1].dstBinding = 1;
|
|
descriptorWrites[1].dstArrayElement = 0;
|
|
descriptorWrites[1].descriptorType =
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
descriptorWrites[1].descriptorCount = 1;
|
|
descriptorWrites[1].pImageInfo = &imageInfo;
|
|
|
|
vkUpdateDescriptorSets(device,
|
|
static_cast<uint32_t>(descriptorWrites.size()),
|
|
descriptorWrites.data(), 0, nullptr);
|
|
}
|
|
}
|
|
|
|
void createBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
|
|
VkMemoryPropertyFlags properties, VkBuffer& buffer,
|
|
VkDeviceMemory& bufferMemory) {
|
|
VkBufferCreateInfo bufferInfo = {};
|
|
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
bufferInfo.size = size;
|
|
bufferInfo.usage = usage;
|
|
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
|
|
if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create buffer!");
|
|
}
|
|
|
|
VkMemoryRequirements memRequirements;
|
|
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
|
|
|
|
VkMemoryAllocateInfo allocInfo = {};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
|
|
allocInfo.allocationSize = memRequirements.size;
|
|
allocInfo.memoryTypeIndex =
|
|
findMemoryType(memRequirements.memoryTypeBits, properties);
|
|
|
|
if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to allocate buffer memory!");
|
|
}
|
|
|
|
vkBindBufferMemory(device, buffer, bufferMemory, 0);
|
|
}
|
|
|
|
VkCommandBuffer beginSingleTimeCommands() {
|
|
VkCommandBufferAllocateInfo allocInfo = {};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
allocInfo.commandPool = commandPool;
|
|
allocInfo.commandBufferCount = 1;
|
|
|
|
VkCommandBuffer commandBuffer;
|
|
vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer);
|
|
|
|
VkCommandBufferBeginInfo beginInfo = {};
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
|
|
|
vkBeginCommandBuffer(commandBuffer, &beginInfo);
|
|
|
|
return commandBuffer;
|
|
}
|
|
|
|
void endSingleTimeCommands(VkCommandBuffer commandBuffer) {
|
|
vkEndCommandBuffer(commandBuffer);
|
|
|
|
VkSubmitInfo submitInfo = {};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
submitInfo.commandBufferCount = 1;
|
|
submitInfo.pCommandBuffers = &commandBuffer;
|
|
|
|
vkQueueSubmit(graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE);
|
|
vkQueueWaitIdle(graphicsQueue);
|
|
|
|
vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer);
|
|
}
|
|
|
|
void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
|
|
VkCommandBuffer commandBuffer = beginSingleTimeCommands();
|
|
|
|
VkBufferCopy copyRegion = {};
|
|
copyRegion.size = size;
|
|
vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ©Region);
|
|
|
|
endSingleTimeCommands(commandBuffer);
|
|
}
|
|
|
|
uint32_t findMemoryType(uint32_t typeFilter,
|
|
VkMemoryPropertyFlags properties) {
|
|
VkPhysicalDeviceMemoryProperties memProperties;
|
|
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
|
|
|
|
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
|
|
if ((typeFilter & (1 << i)) &&
|
|
(memProperties.memoryTypes[i].propertyFlags & properties) ==
|
|
properties) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
throw std::runtime_error("failed to find suitable memory type!");
|
|
}
|
|
|
|
void createCommandBuffers() {
|
|
commandBuffers.resize(swapChainFramebuffers.size());
|
|
|
|
VkCommandBufferAllocateInfo allocInfo = {};
|
|
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
allocInfo.commandPool = commandPool;
|
|
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
allocInfo.commandBufferCount = (uint32_t)commandBuffers.size();
|
|
|
|
if (vkAllocateCommandBuffers(device, &allocInfo, commandBuffers.data()) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to allocate command buffers!");
|
|
}
|
|
|
|
for (size_t i = 0; i < commandBuffers.size(); i++) {
|
|
VkCommandBufferBeginInfo beginInfo = {};
|
|
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
|
|
|
|
if (vkBeginCommandBuffer(commandBuffers[i], &beginInfo) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to begin recording command buffer!");
|
|
}
|
|
|
|
VkRenderPassBeginInfo renderPassInfo = {};
|
|
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
renderPassInfo.renderPass = renderPass;
|
|
renderPassInfo.framebuffer = swapChainFramebuffers[i];
|
|
renderPassInfo.renderArea.offset = {0, 0};
|
|
renderPassInfo.renderArea.extent = swapChainExtent;
|
|
|
|
VkClearValue clearColor = {0.0f, 0.0f, 0.0f, 1.0f};
|
|
renderPassInfo.clearValueCount = 1;
|
|
renderPassInfo.pClearValues = &clearColor;
|
|
|
|
vkCmdBeginRenderPass(commandBuffers[i], &renderPassInfo,
|
|
VK_SUBPASS_CONTENTS_INLINE);
|
|
|
|
vkCmdBindPipeline(commandBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
graphicsPipeline);
|
|
|
|
VkBuffer vertexBuffers[] = {vertexBuffer};
|
|
VkDeviceSize offsets[] = {0};
|
|
vkCmdBindVertexBuffers(commandBuffers[i], 0, 1, vertexBuffers, offsets);
|
|
|
|
vkCmdBindIndexBuffer(commandBuffers[i], indexBuffer, 0,
|
|
VK_INDEX_TYPE_UINT16);
|
|
|
|
vkCmdBindDescriptorSets(commandBuffers[i],
|
|
VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout,
|
|
0, 1, &descriptorSets[i], 0, nullptr);
|
|
|
|
vkCmdDrawIndexed(commandBuffers[i], static_cast<uint32_t>(indices.size()),
|
|
1, 0, 0, 0);
|
|
// vkCmdDraw(commandBuffers[i], static_cast<uint32_t>(vertices.size()), 1,
|
|
// 0, 0);
|
|
|
|
vkCmdEndRenderPass(commandBuffers[i]);
|
|
|
|
if (vkEndCommandBuffer(commandBuffers[i]) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to record command buffer!");
|
|
}
|
|
}
|
|
}
|
|
|
|
void createSyncObjects() {
|
|
imageAvailableSemaphores.resize(MAX_FRAMES);
|
|
renderFinishedSemaphores.resize(MAX_FRAMES);
|
|
inFlightFences.resize(MAX_FRAMES);
|
|
|
|
VkSemaphoreCreateInfo semaphoreInfo = {};
|
|
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
|
|
|
VkFenceCreateInfo fenceInfo = {};
|
|
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
|
|
|
for (size_t i = 0; i < MAX_FRAMES; i++) {
|
|
if (vkCreateSemaphore(device, &semaphoreInfo, nullptr,
|
|
&imageAvailableSemaphores[i]) != VK_SUCCESS ||
|
|
vkCreateSemaphore(device, &semaphoreInfo, nullptr,
|
|
&renderFinishedSemaphores[i]) != VK_SUCCESS ||
|
|
vkCreateFence(device, &fenceInfo, nullptr, &inFlightFences[i]) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error(
|
|
"failed to create synchronization objects for a frame!");
|
|
}
|
|
}
|
|
}
|
|
|
|
void createSyncObjectsExt() {
|
|
VkSemaphoreCreateInfo semaphoreInfo = {};
|
|
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
|
|
|
memset(&semaphoreInfo, 0, sizeof(semaphoreInfo));
|
|
semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
|
|
|
#ifdef _WIN64
|
|
WindowsSecurityAttributes winSecurityAttributes;
|
|
|
|
VkExportSemaphoreWin32HandleInfoKHR
|
|
vulkanExportSemaphoreWin32HandleInfoKHR = {};
|
|
vulkanExportSemaphoreWin32HandleInfoKHR.sType =
|
|
VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
|
|
vulkanExportSemaphoreWin32HandleInfoKHR.pNext = NULL;
|
|
vulkanExportSemaphoreWin32HandleInfoKHR.pAttributes =
|
|
&winSecurityAttributes;
|
|
vulkanExportSemaphoreWin32HandleInfoKHR.dwAccess =
|
|
DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
|
|
vulkanExportSemaphoreWin32HandleInfoKHR.name = (LPCWSTR)NULL;
|
|
#endif
|
|
VkExportSemaphoreCreateInfoKHR vulkanExportSemaphoreCreateInfo = {};
|
|
vulkanExportSemaphoreCreateInfo.sType =
|
|
VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR;
|
|
#ifdef _WIN64
|
|
vulkanExportSemaphoreCreateInfo.pNext =
|
|
IsWindows8OrGreater() ? &vulkanExportSemaphoreWin32HandleInfoKHR : NULL;
|
|
vulkanExportSemaphoreCreateInfo.handleTypes =
|
|
IsWindows8OrGreater()
|
|
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
|
|
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;
|
|
#else
|
|
vulkanExportSemaphoreCreateInfo.pNext = NULL;
|
|
vulkanExportSemaphoreCreateInfo.handleTypes =
|
|
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
|
|
#endif
|
|
semaphoreInfo.pNext = &vulkanExportSemaphoreCreateInfo;
|
|
|
|
if (vkCreateSemaphore(device, &semaphoreInfo, nullptr,
|
|
&cudaUpdateVkSemaphore) != VK_SUCCESS ||
|
|
vkCreateSemaphore(device, &semaphoreInfo, nullptr,
|
|
&vkUpdateCudaSemaphore) != VK_SUCCESS) {
|
|
throw std::runtime_error(
|
|
"failed to create synchronization objects for a CUDA-Vulkan!");
|
|
}
|
|
}
|
|
|
|
void updateUniformBuffer() {
|
|
UniformBufferObject ubo = {};
|
|
|
|
mat4x4_identity(ubo.model);
|
|
mat4x4 Model;
|
|
mat4x4_dup(Model, ubo.model);
|
|
mat4x4_rotate(ubo.model, Model, 0.0f, 0.0f, 1.0f, degreesToRadians(135.0f));
|
|
|
|
vec3 eye = {2.0f, 2.0f, 2.0f};
|
|
vec3 center = {0.0f, 0.0f, 0.0f};
|
|
vec3 up = {0.0f, 0.0f, 1.0f};
|
|
mat4x4_look_at(ubo.view, eye, center, up);
|
|
|
|
mat4x4_perspective(ubo.proj, degreesToRadians(45.0f),
|
|
swapChainExtent.width / (float)swapChainExtent.height,
|
|
0.1f, 10.0f);
|
|
ubo.proj[1][1] *= -1;
|
|
|
|
for (size_t i = 0; i < swapChainImages.size(); i++) {
|
|
void* data;
|
|
vkMapMemory(device, uniformBuffersMemory[i], 0, sizeof(ubo), 0, &data);
|
|
memcpy(data, &ubo, sizeof(ubo));
|
|
vkUnmapMemory(device, uniformBuffersMemory[i]);
|
|
}
|
|
}
|
|
|
|
void drawFrame() {
|
|
static int startSubmit = 0;
|
|
|
|
vkWaitForFences(device, 1, &inFlightFences[currentFrame], VK_TRUE,
|
|
std::numeric_limits<uint64_t>::max());
|
|
|
|
uint32_t imageIndex;
|
|
VkResult result = vkAcquireNextImageKHR(
|
|
device, swapChain, std::numeric_limits<uint64_t>::max(),
|
|
imageAvailableSemaphores[currentFrame], VK_NULL_HANDLE, &imageIndex);
|
|
|
|
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
|
|
recreateSwapChain();
|
|
return;
|
|
} else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) {
|
|
throw std::runtime_error("failed to acquire swap chain image!");
|
|
}
|
|
|
|
vkResetFences(device, 1, &inFlightFences[currentFrame]);
|
|
|
|
if (!startSubmit) {
|
|
submitVulkan(imageIndex);
|
|
startSubmit = 1;
|
|
} else {
|
|
submitVulkanCuda(imageIndex);
|
|
}
|
|
|
|
VkPresentInfoKHR presentInfo = {};
|
|
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
|
|
|
VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame]};
|
|
|
|
presentInfo.waitSemaphoreCount = 1;
|
|
presentInfo.pWaitSemaphores = signalSemaphores;
|
|
|
|
VkSwapchainKHR swapChains[] = {swapChain};
|
|
presentInfo.swapchainCount = 1;
|
|
presentInfo.pSwapchains = swapChains;
|
|
presentInfo.pImageIndices = &imageIndex;
|
|
presentInfo.pResults = nullptr; // Optional
|
|
|
|
result = vkQueuePresentKHR(presentQueue, &presentInfo);
|
|
|
|
if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR ||
|
|
framebufferResized) {
|
|
framebufferResized = false;
|
|
recreateSwapChain();
|
|
} else if (result != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to present swap chain image!");
|
|
}
|
|
|
|
cudaUpdateVkImage();
|
|
|
|
currentFrame = (currentFrame + 1) % MAX_FRAMES;
|
|
// Added sleep of 10 millisecs so that CPU does not submit too much work to
|
|
// GPU
|
|
std::this_thread::sleep_for(std::chrono::microseconds(10000));
|
|
char title[256];
|
|
sprintf(title, "Vulkan Image CUDA Box Filter (radius=%d)", filter_radius);
|
|
glfwSetWindowTitle(window, title);
|
|
}
|
|
|
|
void cudaVkSemaphoreSignal(cudaExternalSemaphore_t& extSemaphore) {
|
|
cudaExternalSemaphoreSignalParams extSemaphoreSignalParams;
|
|
memset(&extSemaphoreSignalParams, 0, sizeof(extSemaphoreSignalParams));
|
|
|
|
extSemaphoreSignalParams.params.fence.value = 0;
|
|
extSemaphoreSignalParams.flags = 0;
|
|
checkCudaErrors(cudaSignalExternalSemaphoresAsync(
|
|
&extSemaphore, &extSemaphoreSignalParams, 1, streamToRun));
|
|
}
|
|
|
|
void cudaVkSemaphoreWait(cudaExternalSemaphore_t& extSemaphore) {
|
|
cudaExternalSemaphoreWaitParams extSemaphoreWaitParams;
|
|
|
|
memset(&extSemaphoreWaitParams, 0, sizeof(extSemaphoreWaitParams));
|
|
|
|
extSemaphoreWaitParams.params.fence.value = 0;
|
|
extSemaphoreWaitParams.flags = 0;
|
|
|
|
checkCudaErrors(cudaWaitExternalSemaphoresAsync(
|
|
&extSemaphore, &extSemaphoreWaitParams, 1, streamToRun));
|
|
}
|
|
|
|
void submitVulkan(uint32_t imageIndex) {
|
|
VkSubmitInfo submitInfo = {};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
|
|
VkSemaphore waitSemaphores[] = {imageAvailableSemaphores[currentFrame]};
|
|
VkPipelineStageFlags waitStages[] = {
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT};
|
|
submitInfo.waitSemaphoreCount = 1;
|
|
submitInfo.pWaitSemaphores = waitSemaphores;
|
|
submitInfo.pWaitDstStageMask = waitStages;
|
|
submitInfo.commandBufferCount = 1;
|
|
submitInfo.pCommandBuffers = &commandBuffers[imageIndex];
|
|
|
|
VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame],
|
|
vkUpdateCudaSemaphore};
|
|
|
|
submitInfo.signalSemaphoreCount = 2;
|
|
submitInfo.pSignalSemaphores = signalSemaphores;
|
|
|
|
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo,
|
|
inFlightFences[currentFrame]) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to submit draw command buffer!");
|
|
}
|
|
}
|
|
|
|
void submitVulkanCuda(uint32_t imageIndex) {
|
|
VkSubmitInfo submitInfo = {};
|
|
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
|
|
VkSemaphore waitSemaphores[] = {imageAvailableSemaphores[currentFrame],
|
|
cudaUpdateVkSemaphore};
|
|
VkPipelineStageFlags waitStages[] = {
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
|
|
submitInfo.waitSemaphoreCount = 2;
|
|
submitInfo.pWaitSemaphores = waitSemaphores;
|
|
submitInfo.pWaitDstStageMask = waitStages;
|
|
submitInfo.commandBufferCount = 1;
|
|
submitInfo.pCommandBuffers = &commandBuffers[imageIndex];
|
|
|
|
VkSemaphore signalSemaphores[] = {renderFinishedSemaphores[currentFrame],
|
|
vkUpdateCudaSemaphore};
|
|
|
|
submitInfo.signalSemaphoreCount = 2;
|
|
submitInfo.pSignalSemaphores = signalSemaphores;
|
|
|
|
if (vkQueueSubmit(graphicsQueue, 1, &submitInfo,
|
|
inFlightFences[currentFrame]) != VK_SUCCESS) {
|
|
throw std::runtime_error("failed to submit draw command buffer!");
|
|
}
|
|
}
|
|
|
|
VkShaderModule createShaderModule(const std::vector<char>& code) {
|
|
VkShaderModuleCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
|
createInfo.codeSize = code.size();
|
|
createInfo.pCode = reinterpret_cast<const uint32_t*>(code.data());
|
|
|
|
VkShaderModule shaderModule;
|
|
if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) !=
|
|
VK_SUCCESS) {
|
|
throw std::runtime_error("failed to create shader module!");
|
|
}
|
|
|
|
return shaderModule;
|
|
}
|
|
|
|
VkSurfaceFormatKHR chooseSwapSurfaceFormat(
|
|
const std::vector<VkSurfaceFormatKHR>& availableFormats) {
|
|
if (availableFormats.size() == 1 &&
|
|
availableFormats[0].format == VK_FORMAT_UNDEFINED) {
|
|
return {VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR};
|
|
}
|
|
|
|
for (const auto& availableFormat : availableFormats) {
|
|
if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM &&
|
|
availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
|
|
return availableFormat;
|
|
}
|
|
}
|
|
|
|
return availableFormats[0];
|
|
}
|
|
|
|
VkPresentModeKHR chooseSwapPresentMode(
|
|
const std::vector<VkPresentModeKHR>& availablePresentModes) {
|
|
VkPresentModeKHR bestMode = VK_PRESENT_MODE_FIFO_KHR;
|
|
|
|
for (const auto& availablePresentMode : availablePresentModes) {
|
|
if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) {
|
|
return availablePresentMode;
|
|
} else if (availablePresentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) {
|
|
bestMode = availablePresentMode;
|
|
}
|
|
}
|
|
|
|
return bestMode;
|
|
}
|
|
|
|
VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities) {
|
|
if (capabilities.currentExtent.width !=
|
|
std::numeric_limits<uint32_t>::max()) {
|
|
return capabilities.currentExtent;
|
|
} else {
|
|
int width, height;
|
|
glfwGetFramebufferSize(window, &width, &height);
|
|
|
|
VkExtent2D actualExtent = {static_cast<uint32_t>(width),
|
|
static_cast<uint32_t>(height)};
|
|
|
|
actualExtent.width = std::max(
|
|
capabilities.minImageExtent.width,
|
|
std::min(capabilities.maxImageExtent.width, actualExtent.width));
|
|
actualExtent.height = std::max(
|
|
capabilities.minImageExtent.height,
|
|
std::min(capabilities.maxImageExtent.height, actualExtent.height));
|
|
|
|
return actualExtent;
|
|
}
|
|
}
|
|
|
|
SwapChainSupportDetails querySwapChainSupport(VkPhysicalDevice device) {
|
|
SwapChainSupportDetails details;
|
|
|
|
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface,
|
|
&details.capabilities);
|
|
|
|
uint32_t formatCount;
|
|
vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount,
|
|
nullptr);
|
|
|
|
if (formatCount != 0) {
|
|
details.formats.resize(formatCount);
|
|
vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount,
|
|
details.formats.data());
|
|
}
|
|
|
|
uint32_t presentModeCount;
|
|
vkGetPhysicalDeviceSurfacePresentModesKHR(device, surface,
|
|
&presentModeCount, nullptr);
|
|
|
|
if (presentModeCount != 0) {
|
|
details.presentModes.resize(presentModeCount);
|
|
vkGetPhysicalDeviceSurfacePresentModesKHR(
|
|
device, surface, &presentModeCount, details.presentModes.data());
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
bool isDeviceSuitable(VkPhysicalDevice device) {
|
|
QueueFamilyIndices indices = findQueueFamilies(device);
|
|
|
|
bool extensionsSupported = checkDeviceExtensionSupport(device);
|
|
|
|
bool swapChainAdequate = false;
|
|
if (extensionsSupported) {
|
|
SwapChainSupportDetails swapChainSupport = querySwapChainSupport(device);
|
|
swapChainAdequate = !swapChainSupport.formats.empty() &&
|
|
!swapChainSupport.presentModes.empty();
|
|
}
|
|
|
|
VkPhysicalDeviceFeatures supportedFeatures;
|
|
vkGetPhysicalDeviceFeatures(device, &supportedFeatures);
|
|
|
|
return indices.isComplete() && extensionsSupported && swapChainAdequate &&
|
|
supportedFeatures.samplerAnisotropy;
|
|
}
|
|
|
|
bool checkDeviceExtensionSupport(VkPhysicalDevice device) {
|
|
uint32_t extensionCount;
|
|
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
|
|
nullptr);
|
|
|
|
std::vector<VkExtensionProperties> availableExtensions(extensionCount);
|
|
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
|
|
availableExtensions.data());
|
|
|
|
std::set<std::string> requiredExtensions(deviceExtensions.begin(),
|
|
deviceExtensions.end());
|
|
|
|
for (const auto& extension : availableExtensions) {
|
|
requiredExtensions.erase(extension.extensionName);
|
|
}
|
|
|
|
return requiredExtensions.empty();
|
|
}
|
|
|
|
QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) {
|
|
QueueFamilyIndices indices;
|
|
|
|
uint32_t queueFamilyCount = 0;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
|
|
nullptr);
|
|
|
|
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
|
|
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
|
|
queueFamilies.data());
|
|
|
|
int i = 0;
|
|
for (const auto& queueFamily : queueFamilies) {
|
|
if (queueFamily.queueCount > 0 &&
|
|
queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
|
|
indices.graphicsFamily = i;
|
|
}
|
|
|
|
VkBool32 presentSupport = false;
|
|
vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, &presentSupport);
|
|
|
|
if (queueFamily.queueCount > 0 && presentSupport) {
|
|
indices.presentFamily = i;
|
|
}
|
|
|
|
if (indices.isComplete()) {
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return indices;
|
|
}
|
|
|
|
std::vector<const char*> getRequiredExtensions() {
|
|
uint32_t glfwExtensionCount = 0;
|
|
const char** glfwExtensions;
|
|
glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount);
|
|
|
|
std::vector<const char*> extensions(glfwExtensions,
|
|
glfwExtensions + glfwExtensionCount);
|
|
|
|
if (enableValidationLayers) {
|
|
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
|
}
|
|
|
|
return extensions;
|
|
}
|
|
|
|
bool checkValidationLayerSupport() {
|
|
uint32_t layerCount;
|
|
vkEnumerateInstanceLayerProperties(&layerCount, nullptr);
|
|
|
|
std::vector<VkLayerProperties> availableLayers(layerCount);
|
|
vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data());
|
|
|
|
for (const char* layerName : validationLayers) {
|
|
bool layerFound = false;
|
|
|
|
for (const auto& layerProperties : availableLayers) {
|
|
if (strcmp(layerName, layerProperties.layerName) == 0) {
|
|
layerFound = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!layerFound) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static std::vector<char> readFile(const std::string& filename) {
|
|
char* file_path = sdkFindFilePath(filename.c_str(), execution_path.c_str());
|
|
std::ifstream file(file_path, std::ios::ate | std::ios::binary);
|
|
|
|
if (!file.is_open()) {
|
|
throw std::runtime_error("failed to open file!");
|
|
}
|
|
|
|
size_t fileSize = (size_t)file.tellg();
|
|
std::vector<char> buffer(fileSize);
|
|
|
|
file.seekg(0);
|
|
file.read(buffer.data(), fileSize);
|
|
|
|
file.close();
|
|
|
|
return buffer;
|
|
}
|
|
|
|
static VKAPI_ATTR VkBool32 VKAPI_CALL
|
|
debugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
|
VkDebugUtilsMessageTypeFlagsEXT messageType,
|
|
const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
|
|
void* pUserData) {
|
|
std::cerr << "validation layer: " << pCallbackData->pMessage << std::endl;
|
|
|
|
return VK_FALSE;
|
|
}
|
|
};
|
|
|
|
int main(int argc, char** argv) {
|
|
execution_path = argv[0];
|
|
std::string image_filename = "lenaRGB.ppm";
|
|
|
|
if (checkCmdLineFlag(argc, (const char**)argv, "file")) {
|
|
getCmdLineArgumentString(argc, (const char**)argv, "file",
|
|
(char**)&image_filename);
|
|
}
|
|
|
|
vulkanImageCUDA app;
|
|
|
|
try {
|
|
// This app only works on ppm images
|
|
app.loadImageData(image_filename);
|
|
app.run();
|
|
} catch (const std::exception& e) {
|
|
std::cerr << e.what() << std::endl;
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|