From fb9f77575a08918a682d7eb0781c6e5f1d4f0bf3 Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Sun, 26 May 2024 18:30:30 +0800 Subject: [PATCH] Fixing correctness of bf16TensorCoreGemm --- .../3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu index a89e4096..dc42ecbb 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu @@ -592,8 +592,8 @@ __global__ void simple_wmma_bf16gemm(__nv_bfloat16 *a, __nv_bfloat16 *b, float * int aCol = i; int aRow = warpM * M; - int bCol = i; - int bRow = warpN * N; + int bCol = warpN * N; + int bRow = i; // Bounds checking if (aRow < m_ld && aCol < k_ld && bRow < k_ld && bCol < n_ld) {