Merge branch 'shawnz_bug_fix' into 'master'

Fixing Bug 3570274, 3570274, 5952280, 5970525 and 5970583

See merge request cuda-samples/cuda-samples!150
This commit is contained in:
Rob Armstrong 2026-03-30 08:09:12 -07:00
commit a4526d5229
63 changed files with 85 additions and 64 deletions

View File

@ -604,6 +604,11 @@ NVJPEG library provides high-performance, GPU accelerated JPEG decoding function
NVRTC (CUDA RunTime Compilation) is a runtime compilation library for CUDA C++.
#### NVJITLINK
NVJITLINK is a CUDA runtime library that links multiple GPU device code objects at runtime and enables justintime linktime optimization (JIT LTO).
It can take device code from tools such as NVCC and NVRTC, link and optimize it on the fly, and produce the final GPU binary.For more details, see: https://docs.nvidia.com/cuda/nvjitlink/index.html
#### Stream Priorities
Stream Priorities allows the creation of streams with specified priorities. Stream Priorities is only available on GPUs with SM architecture of 3.5 or above.

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties
## Dependencies needed to build/run
[OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas)
[OpenMP](../../../README.md#openmp), [UVM](../../../README.md#unified-virtual-memory), [CUBLAS](../../../README.md#cublas)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[FP16](../../../README.md#fp16)
[FP16](../../../README.md#16-bit-floating-point)
## Prerequisites

View File

@ -94,7 +94,7 @@ scalarProductKernel_intrinsics(half2 const *const a, half2 const *const b, float
shArray[threadIdx.x] = __float2half2_rn(0.f);
half2 value = __float2half2_rn(0.f);
for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) {
for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) {
value = __hfma2(a[i], b[i], value);
}
@ -118,7 +118,7 @@ scalarProductKernel_native(half2 const *const a, half2 const *const b, float *co
half2 value(0.f, 0.f);
shArray[threadIdx.x] = value;
for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) {
for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) {
value = a[i] * b[i] + value;
}

View File

@ -255,6 +255,7 @@ void runTest(int argc, char **argv)
checkCudaErrors(cuMemFree(d_A));
checkCudaErrors(cuMemFree(d_B));
checkCudaErrors(cuMemFree(d_C));
checkCudaErrors(cuModuleUnload(cuModule));
checkCudaErrors(cuCtxDestroy(cuContext));
}

View File

@ -68,6 +68,7 @@ extern "C" void computeGold(float *, const float *, const float *, unsigned int,
// Globals
////////////////////////////////////////////////////////////////////////////////
CUcontext g_cuContext;
CUmodule g_cuModule;
bool noprompt = false;
static const char *sSDKsample = "matrixMulDynlinkJIT (CUDA dynamic linking)";
@ -90,7 +91,6 @@ CUresult initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *block_size
{
CUresult status;
CUdevice cuDevice;
CUmodule cuModule;
CUfunction cuFunction;
int major, minor, block_size, devID = 0;
char deviceName[256];
@ -184,11 +184,11 @@ CUresult initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *block_size
printf("> Compiling CUDA module\n");
#if defined(_WIN64) || defined(__LP64__)
status =
cuModuleLoadDataEx(&cuModule, matrixMul_kernel_64_ptxdump, jitNumOptions, jitOptions, (void **)jitOptVals);
status = cuModuleLoadDataEx(
&g_cuModule, matrixMul_kernel_64_ptxdump, jitNumOptions, jitOptions, (void **)jitOptVals);
#else
status =
cuModuleLoadDataEx(&cuModule, matrixMul_kernel_32_ptxdump, jitNumOptions, jitOptions, (void **)jitOptVals);
status = cuModuleLoadDataEx(
&g_cuModule, matrixMul_kernel_32_ptxdump, jitNumOptions, jitOptions, (void **)jitOptVals);
#endif
printf("> PTX JIT log:\n%s\n", jitLogBuffer);
@ -206,9 +206,10 @@ CUresult initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *block_size
// retrieve CUDA function from the compiled module
status = cuModuleGetFunction(
&cuFunction, cuModule, (block_size == 16) ? "matrixMul_bs16_32bit" : "matrixMul_bs32_32bit");
&cuFunction, g_cuModule, (block_size == 16) ? "matrixMul_bs16_32bit" : "matrixMul_bs32_32bit");
if (CUDA_SUCCESS != status) {
cuModuleUnload(g_cuModule);
cuCtxDestroy(g_cuContext);
exit(EXIT_FAILURE);
}
@ -337,6 +338,7 @@ int main(int argc, char **argv)
checkCudaErrors(cuMemFree(d_A));
checkCudaErrors(cuMemFree(d_B));
checkCudaErrors(cuMemFree(d_C));
checkCudaErrors(cuModuleUnload(g_cuModule));
checkCudaErrors(cuCtxDestroy(g_cuContext));
printf("Test run %s\n", (1 == res) ? "success!" : "failed!");

View File

@ -26,7 +26,7 @@ x86_64, armv7l, aarch64
cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg)
[C++11 CUDA](../../../README.md#c11-cuda), [MBCG](../../../README.md#multi-block-cooperative-groups)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64
cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, ppc64le
cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
## Dependencies needed to build/run
[IPC](../../../README.md#ipc)
[IPC](../../../README.md#cuda-interprocess-communication)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree
## Dependencies needed to build/run
[MPI](../../../README.md#mpi)
[MPI](../../../README.md#message-passing-interface)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -297,6 +297,7 @@ void runTest(int argc, char **argv)
free(image_path);
free(ref_path);
checkCudaErrors(cuModuleUnload(cuModule));
checkCudaErrors(cuCtxDestroy(cuContext));
exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE);

View File

@ -26,7 +26,7 @@ x86_64, ppc64le
cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties, cudaFree
## Dependencies needed to build/run
[UVM](../../../README.md#uvm)
[UVM](../../../README.md#unified-virtual-memory)
## Prerequisites

View File

@ -211,6 +211,7 @@ int CleanupNoFailure()
free(h_C);
}
checkCudaErrors(cuModuleUnload(cuModule));
checkCudaErrors(cuCtxDestroy(cuContext));
return EXIT_SUCCESS;

View File

@ -247,6 +247,7 @@ int CleanupNoFailure()
free(h_C);
}
checkCudaErrors(cuModuleUnload(cuModule));
checkCudaErrors(cuCtxDestroy(cuContext));
return EXIT_SUCCESS;

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyFromSymbol, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,8 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites
Download and install the [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsGLRegisterBuffer, cudaGraphicsUnregisterResource, cudaMalloc, cudaInit, cudaGLInit
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize
## Dependencies needed to build/run
[MBCG](../../../README.md#mbcg), [CPP11](../../../README.md#cpp11)
[MBCG](../../../README.md#multi-block-cooperative-groups), [C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l, aarch64
cudaMemcpy, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaMemcpy, cudaGetMipmappedArrayLevel, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaArrayGetInfo, cudaGetLastError, cudaDestroyTextureObject, cudaGraphicsGLRegisterBuffer, cudaFreeMipmappedArray, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaGetLastError, cudaPeekAtLastError, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[CDP](../../../README.md#cdp)
[CDP](../../../README.md#cuda-dynamic-parallellism)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaMemcpy, cudaFree, cudaGetDeviceCount, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[CDP](../../../README.md#cdp)
[CDP](../../../README.md#cuda-dynamic-parallellism)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaMemcpy, cudaFree, cudaGetLastError, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[CDP](../../../README.md#cdp)
[CDP](../../../README.md#cuda-dynamic-parallellism)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaDeviceSynchronize, cudaGetLastError, cudaGetDeviceProperties, cudaDeviceSetLimit
## Dependencies needed to build/run
[CDP](../../../README.md#cdp)
[CDP](../../../README.md#cuda-dynamic-parallellism)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaStreamCreateWithFlags, cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[CDP](../../../README.md#cdp)
[CDP](../../../README.md#cuda-dynamic-parallellism)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l, aarch64
cudaStreamCreateWithFlags, cudaMalloc, cudaDeviceGetAttribute, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemsetAsync, cudaMemcpyAsync, cudaEventCreate
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l, aarch64
cuDeviceCanAccessPeer, cuMemImportFromShareableHandle, cuModuleLoadDataEx, cuModuleGetFunction, cuMemSetAccess, cuModuleLoad, cuStreamCreate, cuMemRelease, cuInit, cuLaunchKernel, cuMemcpyDtoHAsync, cuMemCreate, cuDeviceGet, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuMemExportToShareableHandle, cuStreamSynchronize, cuCtxEnablePeerAccess, cuDeviceGetAttribute, cuOccupancyMaxActiveBlocksPerMultiprocessor, cuCtxSetCurrent, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuStreamDestroy, cuMemAddressReserve
## Dependencies needed to build/run
[IPC](../../../README.md#ipc)
[IPC](../../../README.md#cuda-interprocess-communication)
## Prerequisites

View File

@ -419,6 +419,7 @@ static void childProcess(int devId, int id, char **argv)
// Clean up!
checkCudaErrors(cuStreamDestroy(stream));
checkCudaErrors(cuModuleUnload(cuModule));
checkCudaErrors(cuCtxDestroy(ctx));
// Unmap the allocations from our address space. Unmapping will also free the

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
[C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaEventRecord, cudaLaunchCooperativeKernel, cudaEventDestroy, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaEventCreate
## Dependencies needed to build/run
[UVM](../../../README.md#uvm), [MBCG](../../../README.md#mbcg)
[UVM](../../../README.md#unified-virtual-memory), [MBCG](../../../README.md#multi-block-cooperative-groups)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaHostAlloc, cudaMemPrefetchAsync, cudaFree, cudaLaunchCooperativeKernel, cudaMallocManaged, cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaFreeHost, cudaMemset, cudaStreamCreate, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMemAdvise, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaDeviceCanAccessPeer
## Dependencies needed to build/run
[UVM](../../../README.md#uvm), [MDCG](../../../README.md#mdcg), [CPP11](../../../README.md#cpp11)
[UVM](../../../README.md#unified-virtual-memory), [MDCG](../../../README.md#multi-device-cooperative-groups), [C++11 CUDA](../../../README.md#c11-cuda)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l
cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
[UVM](../../../README.md#unified-virtual-memory), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaCalculateSlopeKernel, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsUnregisterResource, cudaGenerateSpectrumKernel, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew), [CUFFT](../../../README.md#cufft)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaMalloc, cudaGetErrorString, cudaMemcpy, cudaFree
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl), [CURAND](../../../README.md#curand)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew), [CURAND](../../../README.md#curand)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaMemcpy, cudaFree, cudaMemcpyFromSymbol, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[callback](../../../README.md#callback), [CUFFT](../../../README.md#cufft)
[CUFFT Callback Routines](../../../README.md#cufft-callback-routines), [CUFFT](../../../README.md#cufft)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaGLUnregisterBufferObject, cudaDeviceSynchronize, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaRuntimeGetVersion, cudaGraphicsUnmapResources, cudaMallocPitch, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew), [CUFFT](../../../README.md#cufft)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGLUnregisterBufferObject, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer, cudaEventCreate
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGetChannelDesc, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGetDevice, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaWaitExternalSemaphoresAsync, cudaVertMem, cudaImportExternalSemaphore, cudaWaitSemaphore, cudaHeightMap, cudaSetDevice, cudaGetDeviceCount, cudaSignalExternalSemaphoresAsync, cudaTimelineSemaphore, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalMemory, cudaGetDeviceProperties, cudaDestroyExternalSemaphore
## Dependencies needed to build/run
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan), [GLFW](../../../README.md#glfw)
## Prerequisites

View File

@ -29,7 +29,7 @@ cuMemCreate, cuMemAddressReserve, cuMemGetAllocationGranularity, cuMemAddressFre
cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaSetDevice, cudaLaunchHostFunc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDestroyExternalSemaphore, cudaSignalSemaphore, cudaWaitSemaphore, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan), [GLFW](../../../README.md#glfw)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaExtent, cudaPitchedPtr, cudaCreateTextureObject, cudaMemcpyToSymbol
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaMemcpy, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMemcpyToSymbol, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaDestroyTextureObject, cudaMemset, cudaGraphicsGLRegisterBuffer, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, armv7l
cudaProfilerStop, cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
[X11](../../../README.md#x11), [OpenGL](../../../README.md#opengl), [Freeglut](../../../README.md#freeglut), [GLEW](../../../README.md#glew)
## Prerequisites

View File

@ -26,7 +26,7 @@ x86_64, aarch64
cudaVkSemaphoreSignal, cudaWaitExternalSemaphoresAsync, cudaMemcpy, cudaVkImportSemaphore, cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaExtent, cudaMallocMipmappedArray, cudaCreateSurfaceObject, cudaStreamCreate, cudaSignalExternalSemaphoresAsync, cudaUpdateVkImage, cudaCreateTextureObject, cudaGetDeviceCount, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyTextureObject, cudaVkImportImageMem, cudaDestroyExternalMemory, cudaVkSemaphoreWait, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
[X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan), [GLFW](../../../README.md#glfw)
## Prerequisites

View File

@ -24,7 +24,7 @@ x86_64, armv7l, aarch64
cudaMemcpy, cudaStreamDestroy, cudaMemPrefetchAsync, cudaFree, cudaMallocHost, cudaMallocManaged, cudaStreamAttachMemAsync, cudaHostGetDevicePointer, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
## Dependencies needed to build/run
[UVM](../../../README.md#uvm)
[UVM](../../../README.md#unified-virtual-memory)
## Prerequisites

View File

@ -664,7 +664,17 @@ int main(int argc, char **argv)
verboseResults = 1;
}
int device_id = findCudaDevice(argc, (const char **)argv);
// set device
cudaDeviceProp device_prop;
int device_id = findCudaDevice(argc, (const char **)argv);
checkCudaErrors(cudaGetDeviceProperties(&device_prop, device_id));
if (!device_prop.managedMemory) {
// This samples requires being run on a device that supports Unified Memory
fprintf(stderr, "Unified Memory not supported on this device\n");
exit(EXIT_WAIVED);
}
matrixMultiplyPerfRunner(reportAsBandwidth, print_launch_transfer_results, print_std_deviation, device_id);

View File

@ -29,7 +29,7 @@ cuEventRecord, cuDeviceGetAttribute, cuEventCreate, cuCtxSynchronize, cuEventDes
cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc
## Dependencies needed to build/run
[EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
[EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [OpenGL ES](../../../README.md#opengl-es)
## Prerequisites

View File

@ -26,7 +26,7 @@ armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GLES](../../../README.md#gles), [CUFFT](../../../README.md#cufft)
[X11](../../../README.md#x11), [OpenGL ES](../../../README.md#opengl-es), [CUFFT](../../../README.md#cufft)
## Prerequisites

View File

@ -26,7 +26,7 @@ armv7l
cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GLES](../../../README.md#gles)
[X11](../../../README.md#x11), [OpenGL ES](../../../README.md#opengl-es)
## Prerequisites

View File

@ -26,7 +26,7 @@ armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GLES](../../../README.md#gles)
[X11](../../../README.md#x11), [OpenGL ES](../../../README.md#opengl-es)
## Prerequisites

View File

@ -31,7 +31,7 @@ armv7l
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[EGLOutput](../../../README.md#egloutput), [GLES](../../../README.md#gles)
[EGLOutput](../../../README.md#egloutput), [OpenGL ES](../../../README.md#opengl-es)
## Prerequisites