diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c1f791d..784a15a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## Changelog +### CUDA 13.2 +* Added the MSVC compile flag `-Xcompiler=/Zc:preprocessor` in CMakeLists.txt to comply with CUDA13.2 CCCL. Previously, using the traditional preprocessor triggered the warning “MSVC/cl.exe with traditional preprocessor is used…”, which now leads to a build error. + ### CUDA 13.1 * Minor bug fixes and enhancements, no structural or functional changes diff --git a/CMakeLists.txt b/CMakeLists.txt index 92e580e2..49d69f9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,11 @@ endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda") +# Add MSVC-specific flags for standard-conforming preprocessor (required for CCCL) +if(MSVC) + add_compile_options($<$:-Xcompiler=/Zc:preprocessor>) +endif() + # Include installation configuration before processing samples include(cmake/InstallSamples.cmake) diff --git a/README.md b/README.md index 4686e11d..258daaa2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # CUDA Samples -Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 13.1](https://developer.nvidia.com/cuda-downloads). +Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 13.2](https://developer.nvidia.com/cuda-downloads). ## Release Notes diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu index 81d0b08d..0411006c 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu @@ -25,6 +25,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include // helper functions for CUDA error check #include @@ -38,7 +39,7 @@ const int manualBlockSize = 32; // execution configuration, including anything the launch configurator // API suggests. //////////////////////////////////////////////////////////////////////////////// -__global__ void square(int *array, int arrayCount) +__global__ void square(uint32_t *array, int arrayCount) { extern __shared__ int dynamicSmem[]; int idx = threadIdx.x + blockIdx.x * blockDim.x; @@ -99,7 +100,7 @@ static double reportPotentialOccupancy(void *kernel, int blockSize, size_t dynam // This function configures the launch based on the "automatic" // argument, records the runtime, and reports occupancy and runtime. //////////////////////////////////////////////////////////////////////////////// -static int launchConfig(int *array, int arrayCount, bool automatic) +static int launchConfig(uint32_t *array, int arrayCount, bool automatic) { int blockSize; int minGridSize; @@ -166,20 +167,20 @@ static int launchConfig(int *array, int arrayCount, bool automatic) //////////////////////////////////////////////////////////////////////////////// static int test(bool automaticLaunchConfig, const int count = 1000000) { - int *array; - int *dArray; - int size = count * sizeof(int); + uint32_t *array; + uint32_t *dArray; + int size = count * sizeof(uint32_t); - array = new int[count]; + array = new uint32_t[count]; - for (int i = 0; i < count; i += 1) { + for (uint32_t i = 0; i < count; i += 1) { array[i] = i; } checkCudaErrors(cudaMalloc(&dArray, size)); checkCudaErrors(cudaMemcpy(dArray, array, size, cudaMemcpyHostToDevice)); - for (int i = 0; i < count; i += 1) { + for (uint32_t i = 0; i < count; i += 1) { array[i] = 0; } @@ -189,8 +190,9 @@ static int test(bool automaticLaunchConfig, const int count = 1000000) checkCudaErrors(cudaFree(dArray)); // Verify the return data + // Both GPU and CPU use uint32_t * uint32_t, which has well-defined overflow behavior (modulo 2^32) // - for (int i = 0; i < count; i += 1) { + for (uint32_t i = 0; i < count; i += 1) { if (array[i] != i * i) { std::cout << "element " << i << " expected " << i * i << " actual " << array[i] << std::endl; return 1; diff --git a/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt index 7441b7d8..f81cb309 100644 --- a/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt @@ -39,10 +39,7 @@ if(${OpenGL_FOUND}) # Add target for particles add_executable(particles particleSystem.cpp particleSystem_cuda.cu particles.cpp render_particles.cpp shaders.cpp) -target_compile_options(particles PRIVATE - $<$:--extended-lambda> - $<$,$>:-Xcompiler="/Zc:preprocessor"> -) +target_compile_options(particles PRIVATE $<$:--extended-lambda>) target_compile_features(particles PRIVATE cxx_std_17 cuda_std_17) diff --git a/Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu b/Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu index ce9e82a8..dbfa1296 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu +++ b/Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu @@ -135,8 +135,8 @@ extern "C" thrust::device_ptr d_pos4((float4 *)pos); thrust::device_ptr d_vel4((float4 *)vel); - thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(d_pos4, d_vel4)), - thrust::make_zip_iterator(thrust::make_tuple(d_pos4 + numParticles, d_vel4 + numParticles)), + thrust::for_each(thrust::make_zip_iterator(d_pos4, d_vel4), + thrust::make_zip_iterator(d_pos4 + numParticles, d_vel4 + numParticles), integrate_functor(deltaTime)); } diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh b/Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh index b855b7c4..6e47f0a5 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh +++ b/Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh @@ -41,6 +41,9 @@ #include "thrust/iterator/zip_iterator.h" #include "thrust/sort.h" +// for cuda::std::get +#include + namespace cg = cooperative_groups; #include "helper_math.h" #include "math_constants.h" @@ -60,8 +63,8 @@ struct integrate_functor template __device__ void operator()(Tuple t) { - volatile float4 posData = thrust::get<0>(t); - volatile float4 velData = thrust::get<1>(t); + volatile float4 posData = cuda::std::get<0>(t); + volatile float4 velData = cuda::std::get<1>(t); float3 pos = make_float3(posData.x, posData.y, posData.z); float3 vel = make_float3(velData.x, velData.y, velData.z); @@ -107,8 +110,8 @@ struct integrate_functor } // store new position and velocity - thrust::get<0>(t) = make_float4(pos, posData.w); - thrust::get<1>(t) = make_float4(vel, velData.w); + cuda::std::get<0>(t) = make_float4(pos, posData.w); + cuda::std::get<1>(t) = make_float4(vel, velData.w); } }; diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu index b0c63605..0a265e00 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu @@ -505,13 +505,12 @@ private: thrust::device_ptr dMinScannedEdges = pools.uintEdges.get(); thrust::device_ptr dMinScannedWeights = pools.floatEdges.get(); - thrust::inclusive_scan_by_key( - dEdgesFlags, - dEdgesFlags + edgesCount_, - thrust::make_zip_iterator(thrust::make_tuple(dWeights_, dEdges_)), - thrust::make_zip_iterator(thrust::make_tuple(dMinScannedWeights, dMinScannedEdges)), - thrust::greater_equal(), - thrust::minimum>()); + thrust::inclusive_scan_by_key(dEdgesFlags, + dEdgesFlags + edgesCount_, + thrust::make_zip_iterator(dWeights_, dEdges_), + thrust::make_zip_iterator(dMinScannedWeights, dMinScannedEdges), + thrust::greater_equal(), + thrust::minimum>()); // To make things clear. // Let "dEdgesFlags" denote groups of edges that @@ -562,11 +561,10 @@ private: thrust::sequence(dClusteredVerticesIDs, dClusteredVerticesIDs + verticesCount_); - thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(thrust::device_ptr(dSuccessors), - thrust::device_ptr(dClusteredVerticesIDs))), - thrust::make_zip_iterator( - thrust::make_tuple(thrust::device_ptr(dSuccessors + verticesCount_), - thrust::device_ptr(dClusteredVerticesIDs + verticesCount_)))); + thrust::sort(thrust::make_zip_iterator(thrust::device_ptr(dSuccessors), + thrust::device_ptr(dClusteredVerticesIDs)), + thrust::make_zip_iterator(thrust::device_ptr(dSuccessors + verticesCount_), + thrust::device_ptr(dClusteredVerticesIDs + verticesCount_))); // Mark those groups. thrust::device_ptr dVerticesFlags_ = pools.uintVertices.get(); @@ -653,9 +651,8 @@ private: // the step of the algorithm to that one. Hence we need to // preserve the structure of the original graph: neighbours and // weights should be grouped by vertex. - thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(dNewStartpoints, dSurvivedEdgesIDs)), - thrust::make_zip_iterator( - thrust::make_tuple(dNewStartpoints + edgesCount_, dSurvivedEdgesIDs + edgesCount_))); + thrust::sort(thrust::make_zip_iterator(dNewStartpoints, dSurvivedEdgesIDs), + thrust::make_zip_iterator(dNewStartpoints + edgesCount_, dSurvivedEdgesIDs + edgesCount_)); // Find the group of contracted edges. uint *invalidEdgesPtr = diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu index 60112fda..804d2550 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu @@ -30,6 +30,9 @@ #include #include +// for cuda::std::tuple +#include + namespace cg = cooperative_groups; #include @@ -610,7 +613,7 @@ struct Random_generator return a; } - __host__ __device__ __forceinline__ thrust::tuple operator()() + __host__ __device__ __forceinline__ cuda::std::tuple operator()() { #ifdef __CUDA_ARCH__ unsigned seed = hash(blockIdx.x * blockDim.x + threadIdx.x + count); @@ -622,7 +625,7 @@ struct Random_generator #endif thrust::default_random_engine rng(seed); thrust::random::uniform_real_distribution distrib; - return thrust::make_tuple(distrib(rng), distrib(rng)); + return cuda::std::make_tuple(distrib(rng), distrib(rng)); } }; @@ -644,9 +647,8 @@ bool cdpQuadtree(int warp_size) // Generate random points. Random_generator rnd; - thrust::generate(thrust::make_zip_iterator(thrust::make_tuple(x_d0.begin(), y_d0.begin())), - thrust::make_zip_iterator(thrust::make_tuple(x_d0.end(), y_d0.end())), - rnd); + thrust::generate( + thrust::make_zip_iterator(x_d0.begin(), y_d0.begin()), thrust::make_zip_iterator(x_d0.end(), y_d0.end()), rnd); // Host structures to analyze the device ones. Points points_init[2]; diff --git a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp index a5436c75..779a5fd2 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp +++ b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp @@ -144,8 +144,9 @@ VulkanBaseApp::~VulkanBaseApp() } #ifdef _VK_TIMELINE_SEMAPHORE - if (m_vkPresentationSemaphore != VK_NULL_HANDLE) { - vkDestroySemaphore(m_device, m_vkPresentationSemaphore, nullptr); + for (size_t i = 0; i < m_vkImageAcquiredSemaphores.size(); i++) { + vkDestroySemaphore(m_device, m_vkImageAcquiredSemaphores[i], nullptr); + vkDestroySemaphore(m_device, m_vkRenderCompleteSemaphores[i], nullptr); } #endif /* _VK_TIMELINE_SEMAPHORE */ @@ -1351,8 +1352,15 @@ void VulkanBaseApp::createSyncObjects() } #ifdef _VK_TIMELINE_SEMAPHORE - if (vkCreateSemaphore(m_device, &semaphoreInfo, nullptr, &m_vkPresentationSemaphore) != VK_SUCCESS) { - throw std::runtime_error("Failed to create binary semaphore!"); + m_vkImageAcquiredSemaphores.resize(MAX_FRAMES_IN_FLIGHT); + m_vkRenderCompleteSemaphores.resize(MAX_FRAMES_IN_FLIGHT); + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { + if (vkCreateSemaphore(m_device, &semaphoreInfo, nullptr, &m_vkImageAcquiredSemaphores[i]) != VK_SUCCESS) { + throw std::runtime_error("Failed to create image acquired semaphore!"); + } + if (vkCreateSemaphore(m_device, &semaphoreInfo, nullptr, &m_vkRenderCompleteSemaphores[i]) != VK_SUCCESS) { + throw std::runtime_error("Failed to create render complete semaphore!"); + } } #endif /* _VK_TIMELINE_SEMAPHORE */ } @@ -1634,6 +1642,12 @@ void VulkanBaseApp::drawFrame() static uint64_t waitValue = 0; static uint64_t signalValue = 1; + size_t currentFrameIdx = m_currentFrame % MAX_FRAMES_IN_FLIGHT; + + // Wait for this frame's fence to avoid VUID-vkAcquireNextImageKHR-semaphore-01779 + // Ensures previous frame's GPU work completes before reusing per-frame semaphores + vkWaitForFences(m_device, 1, &m_inFlightFences[currentFrameIdx], VK_TRUE, std::numeric_limits::max()); + VkSemaphoreWaitInfo semaphoreWaitInfo = {}; semaphoreWaitInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; semaphoreWaitInfo.pSemaphores = &m_vkTimelineSemaphore; @@ -1641,20 +1655,26 @@ void VulkanBaseApp::drawFrame() semaphoreWaitInfo.pValues = &waitValue; vkWaitSemaphores(m_device, &semaphoreWaitInfo, std::numeric_limits::max()); + // Use separate binary semaphores for swapchain sync to avoid VUID-01779 + // Timeline semaphore (m_vkTimelineSemaphore) is dedicated to Vulkan-CUDA interop uint32_t imageIndex; VkResult result = vkAcquireNextImageKHR(m_device, m_swapChain, std::numeric_limits::max(), - m_vkPresentationSemaphore, + m_vkImageAcquiredSemaphores[currentFrameIdx], VK_NULL_HANDLE, &imageIndex); if (result == VK_ERROR_OUT_OF_DATE_KHR) { recreateSwapChain(); + return; } else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) { throw std::runtime_error("Failed to acquire swap chain image!"); } + // Reset the fence for this frame + vkResetFences(m_device, 1, &m_inFlightFences[currentFrameIdx]); + updateUniformBuffer(imageIndex); VkSubmitInfo submitInfo = {}; @@ -1663,6 +1683,8 @@ void VulkanBaseApp::drawFrame() std::vector waitSemaphores; std::vector waitStages; + waitSemaphores.push_back(m_vkImageAcquiredSemaphores[currentFrameIdx]); + waitStages.push_back(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); waitSemaphores.push_back(m_vkTimelineSemaphore); waitStages.push_back(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); @@ -1675,26 +1697,30 @@ void VulkanBaseApp::drawFrame() std::vector signalSemaphores; signalSemaphores.push_back(m_vkTimelineSemaphore); + signalSemaphores.push_back(m_vkRenderCompleteSemaphores[currentFrameIdx]); submitInfo.signalSemaphoreCount = (uint32_t)signalSemaphores.size(); submitInfo.pSignalSemaphores = signalSemaphores.data(); VkTimelineSemaphoreSubmitInfo timelineInfo = {}; timelineInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - timelineInfo.waitSemaphoreValueCount = 1; - timelineInfo.pWaitSemaphoreValues = &waitValue; - timelineInfo.signalSemaphoreValueCount = 1; - timelineInfo.pSignalSemaphoreValues = &signalValue; + timelineInfo.waitSemaphoreValueCount = 2; + uint64_t waitValues[] = {0, waitValue}; + timelineInfo.pWaitSemaphoreValues = waitValues; + timelineInfo.signalSemaphoreValueCount = 2; + uint64_t signalValues[] = {signalValue, 0}; + timelineInfo.pSignalSemaphoreValues = signalValues; submitInfo.pNext = &timelineInfo; - if (vkQueueSubmit(m_graphicsQueue, 1, &submitInfo, VK_NULL_HANDLE) != VK_SUCCESS) { + // Submit with fence to track when this frame completes + if (vkQueueSubmit(m_graphicsQueue, 1, &submitInfo, m_inFlightFences[currentFrameIdx]) != VK_SUCCESS) { throw std::runtime_error("failed to submit draw command buffer!"); } VkPresentInfoKHR presentInfo = {}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; presentInfo.waitSemaphoreCount = 1; - presentInfo.pWaitSemaphores = &m_vkPresentationSemaphore; + presentInfo.pWaitSemaphores = &m_vkRenderCompleteSemaphores[currentFrameIdx]; VkSwapchainKHR swapChains[] = {m_swapChain}; presentInfo.swapchainCount = 1; diff --git a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h index 7354881d..d42c89f3 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h +++ b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h @@ -107,7 +107,8 @@ protected: std::vector m_inFlightFences; std::vector m_uniformBuffers; std::vector m_uniformMemory; - VkSemaphore m_vkPresentationSemaphore; + std::vector m_vkImageAcquiredSemaphores; + std::vector m_vkRenderCompleteSemaphores; VkSemaphore m_vkTimelineSemaphore; VkDescriptorSetLayout m_descriptorSetLayout; VkDescriptorPool m_descriptorPool; diff --git a/Samples/5_Domain_Specific/simpleVulkan/main.cpp b/Samples/5_Domain_Specific/simpleVulkan/main.cpp index 2419220e..ecce8868 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/main.cpp +++ b/Samples/5_Domain_Specific/simpleVulkan/main.cpp @@ -38,7 +38,7 @@ typedef float vec2[2]; std::string execution_path; -#ifndef NDEBUG +#ifdef NDEBUG #define ENABLE_VALIDATION (false) #else #define ENABLE_VALIDATION (true) diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp b/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp index 3f84d5ea..879559f7 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp @@ -44,7 +44,7 @@ #include "helper_multiprocess.h" // #define DEBUG -#ifndef DEBUG +#ifdef NDEBUG #define ENABLE_VALIDATION (false) #else #define ENABLE_VALIDATION (true) diff --git a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu b/Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu index 69af1466..ab06d847 100644 --- a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu +++ b/Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu @@ -127,9 +127,9 @@ extern "C" thrust::device_ptr d_oldVel(oldVel); thrust::for_each( - thrust::make_zip_iterator(thrust::make_tuple(d_newPos, d_newVel, d_oldPos, d_oldVel)), - thrust::make_zip_iterator(thrust::make_tuple( - d_newPos + numParticles, d_newVel + numParticles, d_oldPos + numParticles, d_oldVel + numParticles)), + thrust::make_zip_iterator(d_newPos, d_newVel, d_oldPos, d_oldVel), + thrust::make_zip_iterator( + d_newPos + numParticles, d_newVel + numParticles, d_oldPos + numParticles, d_oldVel + numParticles), integrate_functor(deltaTime, noiseTex)); } @@ -143,8 +143,8 @@ extern "C" thrust::device_ptr d_keys(keys); thrust::device_ptr d_indices(indices); - thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(d_pos, d_keys)), - thrust::make_zip_iterator(thrust::make_tuple(d_pos + numParticles, d_keys + numParticles)), + thrust::for_each(thrust::make_zip_iterator(d_pos, d_keys), + thrust::make_zip_iterator(d_pos + numParticles, d_keys + numParticles), calcDepth_functor(sortVector)); thrust::sequence(d_indices, d_indices + numParticles); diff --git a/Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh b/Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh index 8e6e449b..f43fcd15 100644 --- a/Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh +++ b/Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh @@ -40,6 +40,9 @@ #include "thrust/iterator/zip_iterator.h" #include "thrust/sort.h" +// for cuda::std::get +#include + cudaTextureObject_t noiseTex; // simulation parameters __constant__ SimParams cudaParams; @@ -65,8 +68,8 @@ struct integrate_functor template __device__ void operator()(Tuple t) { - volatile float4 posData = thrust::get<2>(t); - volatile float4 velData = thrust::get<3>(t); + volatile float4 posData = cuda::std::get<2>(t); + volatile float4 velData = cuda::std::get<3>(t); float3 pos = make_float3(posData.x, posData.y, posData.z); float3 vel = make_float3(velData.x, velData.y, velData.z); @@ -95,8 +98,8 @@ struct integrate_functor vel *= cudaParams.globalDamping; // store new position and velocity - thrust::get<0>(t) = make_float4(pos, age); - thrust::get<1>(t) = make_float4(vel, velData.w); + cuda::std::get<0>(t) = make_float4(pos, age); + cuda::std::get<1>(t) = make_float4(vel, velData.w); } }; @@ -111,10 +114,10 @@ struct calcDepth_functor template __host__ __device__ void operator()(Tuple t) { - volatile float4 p = thrust::get<0>(t); - float key = -dot(make_float3(p.x, p.y, p.z), + volatile float4 p = cuda::std::get<0>(t); + float key = -dot(make_float3(p.x, p.y, p.z), sortVector); // project onto sort vector - thrust::get<1>(t) = key; + cuda::std::get<1>(t) = key; } }; diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu index f991da41..51920a77 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu @@ -69,9 +69,9 @@ const int MAX_FRAMES = 4; const std::vector validationLayers = {"VK_LAYER_KHRONOS_validation"}; #ifdef NDEBUG -const bool enableValidationLayers = true; -#else const bool enableValidationLayers = false; +#else +const bool enableValidationLayers = true; #endif std::string execution_path;