From 8baefc7ae8ecca99e72e9a1f766824666bce7508 Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Thu, 30 May 2024 02:01:28 +0800 Subject: [PATCH] Fixing copySharedMem in transpose.cu --- Samples/6_Performance/transpose/transpose.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Samples/6_Performance/transpose/transpose.cu b/Samples/6_Performance/transpose/transpose.cu index a8b6fe8d..8a034aa1 100644 --- a/Samples/6_Performance/transpose/transpose.cu +++ b/Samples/6_Performance/transpose/transpose.cu @@ -103,7 +103,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) { if (xIndex < width && yIndex < height) { - tile[threadIdx.y][threadIdx.x] = idata[index]; + tile[threadIdx.y + i][threadIdx.x] = idata[index + i * width]; } } @@ -111,7 +111,7 @@ __global__ void copySharedMem(float *odata, float *idata, int width, for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS) { if (xIndex < height && yIndex < width) { - odata[index] = tile[threadIdx.y][threadIdx.x]; + odata[index + i * width] = tile[threadIdx.y + i][threadIdx.x]; } } }