mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2026-01-09 11:27:49 +08:00
Resolve merge between 13.0 and 13.1 branches
This commit is contained in:
parent
22fafb0a7c
commit
320c7e6392
@ -104,6 +104,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, int height)
|
|||||||
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
|
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
|
||||||
if (xIndex < width && yIndex < height) {
|
if (xIndex < width && yIndex < height) {
|
||||||
tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width];
|
tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width];
|
||||||
|
tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,6 +113,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, int height)
|
|||||||
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
|
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
|
||||||
if (xIndex < height && yIndex < width) {
|
if (xIndex < height && yIndex < width) {
|
||||||
odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x];
|
odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x];
|
||||||
|
odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -607,6 +609,17 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
// copy host data to device
|
// copy host data to device
|
||||||
checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
|
checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
|
// Reset d_odata to zero before starting the next loop iteration to avoid
|
||||||
|
// carrying over results from previous kernels. Without this reset, residual
|
||||||
|
// data from a prior kernel (e.g., 'copy') could make a subsequent
|
||||||
|
// kernel (e.g., 'copySharedMem') appear correct even if it performs no work,
|
||||||
|
// leading to false positives in compareData.
|
||||||
|
for (int i = 0; i < (size_x * size_y); ++i) {
|
||||||
|
h_odata[i] = 0;
|
||||||
|
}
|
||||||
|
// copy host data to device
|
||||||
|
checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanup
|
// cleanup
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user