mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2026-04-02 23:55:41 +08:00
Bug 5970525: Update a minor issue of Samples/0_Introduction/fp16ScalarProduct
This commit is contained in:
parent
e32d17e67e
commit
722c5dd069
@ -94,7 +94,7 @@ scalarProductKernel_intrinsics(half2 const *const a, half2 const *const b, float
|
|||||||
shArray[threadIdx.x] = __float2half2_rn(0.f);
|
shArray[threadIdx.x] = __float2half2_rn(0.f);
|
||||||
half2 value = __float2half2_rn(0.f);
|
half2 value = __float2half2_rn(0.f);
|
||||||
|
|
||||||
for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) {
|
for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) {
|
||||||
value = __hfma2(a[i], b[i], value);
|
value = __hfma2(a[i], b[i], value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ scalarProductKernel_native(half2 const *const a, half2 const *const b, float *co
|
|||||||
half2 value(0.f, 0.f);
|
half2 value(0.f, 0.f);
|
||||||
shArray[threadIdx.x] = value;
|
shArray[threadIdx.x] = value;
|
||||||
|
|
||||||
for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) {
|
for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) {
|
||||||
value = a[i] * b[i] + value;
|
value = a[i] * b[i] + value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user