diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu index 297095fd..7f1fdb7a 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu @@ -94,7 +94,7 @@ scalarProductKernel_intrinsics(half2 const *const a, half2 const *const b, float shArray[threadIdx.x] = __float2half2_rn(0.f); half2 value = __float2half2_rn(0.f); - for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) { + for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) { value = __hfma2(a[i], b[i], value); } @@ -118,7 +118,7 @@ scalarProductKernel_native(half2 const *const a, half2 const *const b, float *co half2 value(0.f, 0.f); shArray[threadIdx.x] = value; - for (int i = threadIdx.x + blockDim.x + blockIdx.x; i < size; i += stride) { + for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < size; i += stride) { value = a[i] * b[i] + value; }