diff --git a/README.md b/README.md
index 71aaba56..f2b67574 100644
--- a/README.md
+++ b/README.md
@@ -133,6 +133,58 @@ $ cd build
 $ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
 ```
 
+### Cross Building for Automotive Linux Platforms from the DriveOS Docker containers
+
+To build CUDA samples to the target platform from the DriveOS Docker containers, use the following instructions.
+
+Mount the target Root Filesystem (RFS) in the container so that the CUDA cmake process has the correct paths to CUDA and other system libraries required to build the samples.
+
+Create a temporary directory, `<temp>` is any temporary directory of your choosing, for example, you can use `/drive/temp`:
+
+```
+$ mkdir /drive/<temp>
+```
+
+Mount the filesystem by running the following command:
+
+```
+$ mount /drive/drive-linux/filesystem/targetfs-images/dev_nsr_desktop_ubuntu-24.04_thor_rfs.img /drive/temp
+```
+
+Configure the project by running the following cmake command:
+
+```
+$ mkdir build && cd build
+$ cmake .. -DBUILD_TEGRA=True \
+  -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
+  -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-linux.cmake \
+  -DTARGET_FS=/drive/temp \
+  -DCMAKE_LIBRARY_PATH=/drive/temp/usr/local/cuda-13.0/thor/lib64/ \
+  -DCMAKE_INCLUDE_PATH=/drive/temp/usr/local/cuda-13.0/thor/include/
+```
+
+Please note that the following libraries are not pre-installed in the DriveOS dev-nsr target filesystem:
+* libdrm-dev
+* Vulkan
+
+This causes the cmake command to throw errors related to the missing files, and as a result, the related samples will not build in later steps. This issue will be addressed in a future DriveOS release.
+
+To build the samples with ignore the error mentioned above, you can use `--ignore-errors`/`--keep-going` or comment out the comment out the corresponding `add_subdirectory` command in the CMakeLists.txt in the parent folder for the samples requiring Vulkan and libdrm_dev:
+
+```
+$ make -j$(nproc) --ignore-errors # or --keep-going
+```
+
+```
+# In Samples/5_Domain_Specific/CMakeList.txt
+# add_subdirectory(simpleGL)
+# add_subdirectory(simpleVulkan)
+# add_subdirectory(simpleVulkanMMAP)
+
+# In Samples/8_Platform_Specific/Tegra/CMakeList.txt
+# add_subdirectory(simpleGLES_EGLOutput)
+```
+
 ### QNX
 
 Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu
index d6431f01..34dc96c9 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu
@@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float
     // there's no such tile, all warps in this CTA exit.
     for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
         const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
-        const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES;
+        const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES;
 
         // Stop when there are no more D matrix tiles to compute in this CTA.
         if (block_tile_i >= M_TILES) {
diff --git a/Samples/7_libNVVM/README.md b/Samples/7_libNVVM/README.md
index 09fbe98c..cdabae0b 100644
--- a/Samples/7_libNVVM/README.md
+++ b/Samples/7_libNVVM/README.md
@@ -4,7 +4,9 @@ libNVVM and NVVM IR Samples
 Introduction
 ------------
 
-The following samples illustrate the use of libNVVM and NVVM IR.
+The following samples illustrate the use of libNVVM and NVVM IR. Running and
+testing these samples requires an NVIDIA driver compatible with the CUDA
+Toolkit being used for compilation.
 
 - cuda-shared-memory - A directory containing NVVM IR programs that demonstrate
   CUDA 'shared' memory usage.
@@ -71,7 +73,7 @@ A Note About the cuda-c-linking Sample
 
 This sample requires a development package (or locally-built) LLVM library
 between versions 7 to 14 inclusive.  LLVM 15 defaults to using opaque pointers,
-which are currently not supported in libNVVM.
+which are not supported in libNVVM for pre-Blackwell architectures.
 
 The LLVM_HOME environment variable is required for users who wish to build the
 cuda-c-linking sample and have a locally built copy of LLVM that they wish to
diff --git a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt
index 072b6aa5..0a6dea2e 100644
--- a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt
+++ b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -29,7 +29,7 @@ if (LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL "15" OR
     LLVM_PACKAGE_VERSION VERSION_LESS "7")
   message(STATUS "The cuda-c-linking sample is expected to build with "
                  "LLVM development libraries v7 to v14, opaque pointers are "
-                 "not yet supported in libNVVM.")
+                 "not supported in libNVVM for pre-Blackwell architectures.")
   return()
 endif ()
 
diff --git a/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp b/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp
index 3717c413..de7d1be1 100644
--- a/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp
+++ b/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -216,8 +216,8 @@ int main(int argc, char **argv)
     checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
     checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
     outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n";
-    if (devMajor < 5) {
-        errs() << "ERROR: Device 0 is not sm_50 or later.\n";
+    if (devMajor < 7 && devMinor < 5) {
+        errs() << "ERROR: Device 0 is not sm_75 or later.\n";
         return 1;
     }
 
diff --git a/Samples/7_libNVVM/device-side-launch/dsl.c b/Samples/7_libNVVM/device-side-launch/dsl.c
index 17cfe835..3dbce390 100644
--- a/Samples/7_libNVVM/device-side-launch/dsl.c
+++ b/Samples/7_libNVVM/device-side-launch/dsl.c
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -181,8 +181,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
     checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
     checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
     printf("compute capability = %d.%d\n", *major, *minor);
-    if (*major < 5) {
-        fprintf(stderr, "Device 0 is not sm_50 or later\n");
+    if (*major < 7 && *minor < 5) {
+        fprintf(stderr, "Device 0 is not sm_75 or later\n");
         exit(EXIT_FAILURE);
     }
     return cuDevice;
diff --git a/Samples/7_libNVVM/ptxgen/README.md b/Samples/7_libNVVM/ptxgen/README.md
index 8d838d29..d4ee4257 100644
--- a/Samples/7_libNVVM/ptxgen/README.md
+++ b/Samples/7_libNVVM/ptxgen/README.md
@@ -22,6 +22,6 @@ interleaved.
 
 For example,
 
-    $ ptxgen a.ll -arch=compute_50 b.bc
+    $ ptxgen a.ll -arch=compute_75 b.bc
 
-links a.ll and b.bc, and generates PTX code for the compute_50 architecture.
+links a.ll and b.bc, and generates PTX code for the compute_75 architecture.
diff --git a/Samples/7_libNVVM/simple/simple.c b/Samples/7_libNVVM/simple/simple.c
index 58c31d5b..12b8dcc2 100644
--- a/Samples/7_libNVVM/simple/simple.c
+++ b/Samples/7_libNVVM/simple/simple.c
@@ -1,4 +1,4 @@
-// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -74,8 +74,8 @@ static CUdevice cudaDeviceInit(int *devMajor, int *devMinor)
 
     // Obtain the device's compute capability.
     checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
-    if (*devMajor < 5) {
-        fprintf(stderr, "Device 0 is not sm_50 or later\n");
+    if (*devMajor < 7 && *devMinor < 5) {
+        fprintf(stderr, "Device 0 is not sm_75 or later\n");
         exit(EXIT_FAILURE);
     }
     checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
diff --git a/Samples/7_libNVVM/uvmlite/uvmlite.c b/Samples/7_libNVVM/uvmlite/uvmlite.c
index f977e092..cfe61cd9 100644
--- a/Samples/7_libNVVM/uvmlite/uvmlite.c
+++ b/Samples/7_libNVVM/uvmlite/uvmlite.c
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -172,8 +172,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
     checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
     checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
     printf("compute capability = %d.%d\n", *major, *minor);
-    if (*major < 5) {
-        fprintf(stderr, "Device 0 is not sm_50 or later\n");
+    if (*major < 7 && *minor < 5) {
+        fprintf(stderr, "Device 0 is not sm_75 or later\n");
         exit(EXIT_FAILURE);
     }