Compare commits

..

No commits in common. "master" and "v12.9" have entirely different histories.

5 changed files with 9 additions and 27 deletions

View File

@ -1,5 +1,9 @@
# 1. Utilities
### [bandwidthTest](./bandwidthTest)
This is a simple test program to measure the memcopy bandwidth of the GPU and memcpy bandwidth across PCI-e. This test application is capable of measuring device to device copy bandwidth, host to device copy bandwidth for pageable and page-locked memory, and device to host copy bandwidth for pageable and page-locked memory.
### [deviceQuery](./deviceQuery)
This sample enumerates the properties of the CUDA devices present in the system.
@ -8,8 +12,3 @@ This sample enumerates the properties of the CUDA devices present using CUDA Dri
### [topologyQuery](./topologyQuery)
A simple example on how to query the topology of a system with multiple GPU
## Note
### bandwidthTest
The bandwidthTest sample was out-of-date and has been removed as of the CUDA Samples 12.9 release (see the [change log](../../CHANGELOG.md)). For up-to-date bandwidth measurements, refer instead to the [NVBandwith](https://github.com/nvidia/nvbandwidth) utility.

View File

@ -34,10 +34,8 @@
#include <vulkan/vulkan.h>
#ifdef _WIN64
#define NOMINMAX
// Add windows.h to the include path
#include <windows.h>
// Add vulkan_win32.h to the include path
#include <vulkan/vulkan_win32.h>
#include <windows.h>
#endif /* _WIN64 */
/* remove _VK_TIMELINE_SEMAPHORE to use binary semaphores */

View File

@ -34,10 +34,8 @@
#include <vulkan/vulkan.h>
#ifdef _WIN64
#define NOMINMAX
// Add windows.h to the include path firstly as dependency for other Windows headers
#include <windows.h>
// Add other Windows headers
#include <vulkan/vulkan_win32.h>
#include <windows.h>
#endif /* _WIN64 */
struct GLFWwindow;

View File

@ -27,12 +27,10 @@
#define GLFW_INCLUDE_VULKAN
#ifdef _WIN64
// Add windows.h to the include path firstly as dependency for other Windows headers
#include <windows.h>
// Add other Windows headers
#include <VersionHelpers.h>
#include <aclapi.h>
#include <dxgi1_2.h>
#include <windows.h>
#define _USE_MATH_DEFINES
#endif

View File

@ -103,7 +103,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, int height)
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
if (xIndex < width && yIndex < height) {
tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width];
tile[threadIdx.y][threadIdx.x] = idata[index];
}
}
@ -111,7 +111,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, int height)
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
if (xIndex < height && yIndex < width) {
odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x];
odata[index] = tile[threadIdx.y][threadIdx.x];
}
}
}
@ -596,17 +596,6 @@ int main(int argc, char **argv)
(size_x * size_y),
1,
TILE_DIM * BLOCK_ROWS);
// Reset d_odata to zero before starting the next loop iteration to avoid
// carrying over results from previous kernels. Without this reset, residual
// data from a prior kernel (e.g., 'copy') could make a subsequent
// kernel (e.g., 'copySharedMem') appear correct even if it performs no work,
// leading to false positives in compareData.
for (int i = 0; i < (size_x * size_y); ++i) {
h_odata[i] = 0;
}
// copy host data to device
checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
}
// cleanup