mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 18:39:16 +08:00
Fixing correctness of bf16TensorCoreGemm
This commit is contained in:
parent
5f97d7d0df
commit
fb9f77575a
|
@ -592,8 +592,8 @@ __global__ void simple_wmma_bf16gemm(__nv_bfloat16 *a, __nv_bfloat16 *b, float *
|
||||||
int aCol = i;
|
int aCol = i;
|
||||||
int aRow = warpM * M;
|
int aRow = warpM * M;
|
||||||
|
|
||||||
int bCol = i;
|
int bCol = warpN * N;
|
||||||
int bRow = warpN * N;
|
int bRow = i;
|
||||||
|
|
||||||
// Bounds checking
|
// Bounds checking
|
||||||
if (aRow < m_ld && aCol < k_ld && bRow < k_ld && bCol < n_ld) {
|
if (aRow < m_ld && aCol < k_ld && bRow < k_ld && bCol < n_ld) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user