shawnz 
							
						 
					 
					
						
						
						
						
							
						
						
							5987a9e9fa 
							
						 
					 
					
						
						
							
							Update transpose for code format check  
						
						 
						
						
						
						
					 
					
						2025-05-19 17:38:42 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Francesco Rizzi 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							b530f1cf42 
							
						 
					 
					
						
						
							
							Fix bug in 6_Performance/transpose: copy sharedmem kernel ( #363 )  
						
						 
						
						... 
						
						
						
						Update kernel loop bounds handling, main loop data copy to avoid incorrect reuse of output results.
---------
Authored-by: Francesco Rizzi <francesco.rizzi@ng-analytics.com> 
						
						
					 
					
						2025-05-05 08:43:23 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							14b1bfdcc4 
							
						 
					 
					
						
						
							
							Replace README references to "CUDA Toolkit 12.5" with general "CUDA Toolkit"  
						
						 
						
						
						
						
					 
					
						2025-04-30 09:46:45 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							ceab6e8bcc 
							
						 
					 
					
						
						
							
							Apply consistent code formatting across the repo. Add clang-format and pre-commit hooks.  
						
						 
						
						
						
						
					 
					
						2025-03-27 10:30:07 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							c0ab53f986 
							
						 
					 
					
						
						
							
							Update all sample CMakeLists.txt to include ENABLE_CUDA_DEBUG flag to enable cuda-gdb  
						
						 
						
						
						
						
					 
					
						2025-03-26 10:08:59 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							b87c243bbb 
							
						 
					 
					
						
						
							
							Add -lineinfo flag to all targets to include line information for developer tools  
						
						 
						
						
						
						
					 
					
						2025-03-26 09:44:20 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Jonathan Bentz 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							efb46383e0 
							
						 
					 
					
						
						
							
							Transpose: Change TILE_DIM to 32 to fix bank conflicts  
						
						 
						
						... 
						
						
						
						Fixes  #175  
						
						
					 
					
						2025-02-20 15:46:44 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							95308ffc23 
							
						 
					 
					
						
						
							
							Add missing build targets to general samples  
						
						 
						
						
						
						
					 
					
						2025-02-08 13:04:26 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							c631850c15 
							
						 
					 
					
						
						
							
							Remove compute capability 8.7 build target for non-Tegra builds  
						
						 
						
						
						
						
					 
					
						2025-02-04 12:01:38 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							9539ca5fa3 
							
						 
					 
					
						
						
							
							Remove compute capability 7.2 build target for non-Tegra builds  
						
						 
						
						
						
						
					 
					
						2025-02-04 11:58:25 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							0f4bdfad99 
							
						 
					 
					
						
						
							
							CMake: Add '-Wno-deprecated-gpu-targets' to suppress warning messages during build about Maxwell, Pascal, and Volta  
						
						 
						
						
						
						
					 
					
						2025-02-04 11:51:17 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								shawnz 
							
						 
					 
					
						
						
						
						
							
						
						
							08fae276b4 
							
						 
					 
					
						
						
							
							Add Tegra SMs in CMakeLists.txt general samples  
						
						 
						
						
						
						
					 
					
						2025-01-23 11:02:56 +08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							b203467419 
							
						 
					 
					
						
						
							
							Update CUDA architectures list  
						
						 
						
						
						
						
					 
					
						2025-01-22 17:49:44 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							f8fbd04007 
							
						 
					 
					
						
						
							
							Update CMake module search path  
						
						 
						
						
						
						
					 
					
						2025-01-14 09:14:29 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							f753e86e7a 
							
						 
					 
					
						
						
							
							Update all samples to build position-independent code  
						
						 
						
						
						
						
					 
					
						2025-01-09 09:59:36 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							bfd956bc5e 
							
						 
					 
					
						
						
							
							Remove README references to PPC processors (no longer supported)  
						
						 
						
						
						
						
					 
					
						2024-12-18 10:54:37 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							0f5821a8c8 
							
						 
					 
					
						
						
							
							Remove outdated build instructions from README.md  
						
						 
						
						
						
						
					 
					
						2024-12-18 10:52:24 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							03719b7623 
							
						 
					 
					
						
						
							
							Change remaining build targets to specify active SM variants  
						
						 
						
						
						
						
					 
					
						2024-12-16 16:17:14 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							cbfab74480 
							
						 
					 
					
						
						
							
							Refactor CMakeLists.txt under 6_Performance  
						
						 
						
						
						
						
					 
					
						2024-12-16 14:52:10 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							fb1eaa8323 
							
						 
					 
					
						
						
							
							Update alignedTypes, cudaGraphsPerfScaling, LargeKernelParameter, transpose  
						
						 
						
						
						
						
					 
					
						2024-12-12 11:48:07 -08:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Armstrong 
							
						 
					 
					
						
						
						
						
							
						
						
							82bcada84c 
							
						 
					 
					
						
						
							
							Remove now-unnecessary Visual Studio project files  
						
						 
						
						
						
						
					 
					
						2024-12-11 16:25:06 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							9c688d7ff7 
							
						 
					 
					
						
						
							
							Updating samples for CUDA 12.5  
						
						 
						
						
						
						
					 
					
						2024-07-25 16:30:13 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							cd3bc1fa8e 
							
						 
					 
					
						
						
							
							Updating samples for CUDA 12.4  
						
						 
						
						
						
						
					 
					
						2024-03-05 20:53:50 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							e8568c4173 
							
						 
					 
					
						
						
							
							Fixing jitlto regression, including missing cuDLA source files for bug  #235 , and updating changelogs  
						
						 
						
						
						
						
					 
					
						2023-11-09 16:52:00 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							b5c84e6996 
							
						 
					 
					
						
						
							
							Updating Samples for 12.3 and updating props files  
						
						 
						
						
						
						
					 
					
						2023-10-23 18:44:49 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							03309a2d42 
							
						 
					 
					
						
						
							
							Changelog updates  
						
						 
						
						
						
						
					 
					
						2023-06-29 19:33:40 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							81cf058e30 
							
						 
					 
					
						
						
							
							Updating Samples for 12.1  
						
						 
						
						
						
						
					 
					
						2023-03-01 01:41:29 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							00bb9bc367 
							
						 
					 
					
						
						
							
							Updating files for Ada architecture  
						
						 
						
						
						
						
					 
					
						2023-02-27 22:33:19 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							2b689228b7 
							
						 
					 
					
						
						
							
							Updating samples for 12.0  
						
						 
						
						
						
						
					 
					
						2022-12-08 20:19:55 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rob Nertney 
							
						 
					 
					
						
						
						
						
							
						
						
							81992093d2 
							
						 
					 
					
						
						
							
							Update samples for CUDA 11.8 with correct props  
						
						 
						
						
						
						
					 
					
						2022-10-14 17:43:37 -07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Rutwik Choughule 
							
						 
					 
					
						
						
						
						
							
						
						
							2e41896e1b 
							
						 
					 
					
						
						
							
							add and update samples for CUDA 11.6  
						
						 
						
						
						
						
					 
					
						2022-01-13 11:35:24 +05:30