From b38ed29c95dc7da7f071431526b0717f6224527c Mon Sep 17 00:00:00 2001 From: shawnz Date: Fri, 25 Jul 2025 15:16:12 +0800 Subject: [PATCH] Bug 5412815: Fix the issue of cudaTensorCoreGemm.cu --- .../3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu index d6431f01..34dc96c9 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu @@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float // there's no such tile, all warps in this CTA exit. for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) { const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES); - const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES; + const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES; // Stop when there are no more D matrix tiles to compute in this CTA. if (block_tile_i >= M_TILES) {