mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 16:49:18 +08:00
update sample bf16TensorCoreGemm to add explicit casting
This commit is contained in:
parent
1f76a2d110
commit
01789304f0
|
@ -184,13 +184,13 @@ __host__ void init_host_matrices(__nv_bfloat16 *a, __nv_bfloat16 *b, float *c)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < M_GLOBAL; i++) {
|
for (int i = 0; i < M_GLOBAL; i++) {
|
||||||
for (int j = 0; j < K_GLOBAL; j++) {
|
for (int j = 0; j < K_GLOBAL; j++) {
|
||||||
a[i*K_GLOBAL+j] = (__nv_bfloat16)(rand() % 3);
|
a[i*K_GLOBAL+j] = (__nv_bfloat16)(float)(rand() % 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < N_GLOBAL; i++) {
|
for (int i = 0; i < N_GLOBAL; i++) {
|
||||||
for (int j = 0; j < K_GLOBAL; j++) {
|
for (int j = 0; j < K_GLOBAL; j++) {
|
||||||
b[i*K_GLOBAL+j] = (__nv_bfloat16)(rand() % 3);
|
b[i*K_GLOBAL+j] = (__nv_bfloat16)(float)(rand() % 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user