Fixing copySharedMem in transpose.cu

This commit is contained in:
Tailing Yuan 2024-05-30 02:01:28 +08:00 committed by GitHub
parent 5f97d7d0df
commit 8baefc7ae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -103,7 +103,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width,
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
if (xIndex < width && yIndex < height) {
tile[threadIdx.y][threadIdx.x] = idata[index];
tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width];
}
}
@ -111,7 +111,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width,
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) {
if (xIndex < height && yIndex < width) {
odata[index] = tile[threadIdx.y][threadIdx.x];
odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x];
}
}
}