Fixing correctness of bf16TensorCoreGemm

This commit is contained in:
Tailing Yuan 2024-05-26 18:30:30 +08:00 committed by GitHub
parent 5f97d7d0df
commit fb9f77575a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -592,8 +592,8 @@ __global__ void simple_wmma_bf16gemm(__nv_bfloat16 *a, __nv_bfloat16 *b, float *
int aCol = i; int aCol = i;
int aRow = warpM * M; int aRow = warpM * M;
int bCol = i; int bCol = warpN * N;
int bRow = warpN * N; int bRow = i;
// Bounds checking // Bounds checking
if (aRow < m_ld && aCol < k_ld && bRow < k_ld && bCol < n_ld) { if (aRow < m_ld && aCol < k_ld && bRow < k_ld && bCol < n_ld) {