Compare commits

..

No commits in common. "master" and "v12.0" have entirely different histories.

1493 changed files with 2217 additions and 168539 deletions

View File

@ -1,22 +1,5 @@
## Changelog ## Changelog
### CUDA 12.5
### CUDA 12.4
* Added graphConditionalNodes Sample
### CUDA 12.3
* Added cuDLA samples
* Fixed jitLto regression
### CUDA 12.2
* libNVVM samples received updates
* Fixed jitLto Case issues
* Enabled HOST_COMPILER flag to the makefiles for GCC which is untested but may still work.
### CUDA 12.1
* Added new sample for Large Kernels
### CUDA 12.0 ### CUDA 12.0
* Added new flags for JIT compiling * Added new flags for JIT compiling
* Removed deprecated APIs in Hopper Architecture * Removed deprecated APIs in Hopper Architecture

View File

@ -666,7 +666,6 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
{0x80, 64}, {0x80, 64},
{0x86, 128}, {0x86, 128},
{0x87, 128}, {0x87, 128},
{0x89, 128},
{0x90, 128}, {0x90, 128},
{-1, -1}}; {-1, -1}};
@ -715,7 +714,6 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
{0x80, "Ampere"}, {0x80, "Ampere"},
{0x86, "Ampere"}, {0x86, "Ampere"},
{0x87, "Ampere"}, {0x87, "Ampere"},
{0x89, "Ada"},
{0x90, "Hopper"}, {0x90, "Hopper"},
{-1, "Graphics Device"}}; {-1, "Graphics Device"}};

View File

@ -114,7 +114,6 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
{0x80, 64}, {0x80, 64},
{0x86, 128}, {0x86, 128},
{0x87, 128}, {0x87, 128},
{0x89, 128},
{0x90, 128}, {0x90, 128},
{-1, -1}}; {-1, -1}};

View File

@ -168,7 +168,7 @@ int waitProcess(Process *process) {
#endif #endif
} }
#if defined(__linux__) || defined(__QNX__) #if defined(__linux__)
int ipcCreateSocket(ipcHandle *&handle, const char *name, int ipcCreateSocket(ipcHandle *&handle, const char *name,
const std::vector<Process> &processes) { const std::vector<Process> &processes) {
int server_fd; int server_fd;
@ -262,48 +262,41 @@ int ipcRecvShareableHandle(ipcHandle *handle, ShareableHandle *shHandle) {
// Union to guarantee alignment requirements for control array // Union to guarantee alignment requirements for control array
union { union {
struct cmsghdr cm; struct cmsghdr cm;
// This will not work on QNX as QNX CMSG_SPACE calls __cmsg_alignbytes char control[CMSG_SPACE(sizeof(int))];
// And __cmsg_alignbytes is a runtime function instead of compile-time macros
// char control[CMSG_SPACE(sizeof(int))]
char* control;
} control_un; } control_un;
size_t sizeof_control = CMSG_SPACE(sizeof(int)) * sizeof(char);
control_un.control = (char*) malloc(sizeof_control);
struct cmsghdr *cmptr; struct cmsghdr *cmptr;
ssize_t n; ssize_t n;
int receivedfd; int receivedfd;
char dummy_buffer[1]; char dummy_buffer[1];
ssize_t sendResult; ssize_t sendResult;
msg.msg_control = control_un.control; msg.msg_control = control_un.control;
msg.msg_controllen = sizeof_control; msg.msg_controllen = sizeof(control_un.control);
iov[0].iov_base = (void *)dummy_buffer; iov[0].iov_base = (void *)dummy_buffer;
iov[0].iov_len = sizeof(dummy_buffer); iov[0].iov_len = sizeof(dummy_buffer);
msg.msg_iov = iov; msg.msg_iov = iov;
msg.msg_iovlen = 1; msg.msg_iovlen = 1;
if ((n = recvmsg(handle->socket, &msg, 0)) <= 0) { if ((n = recvmsg(handle->socket, &msg, 0)) <= 0) {
perror("IPC failure: Receiving data over socket failed"); perror("IPC failure: Receiving data over socket failed");
free(control_un.control);
return -1; return -1;
} }
if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) && if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) &&
(cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) { (cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) {
if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) { if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) {
free(control_un.control);
return -1; return -1;
} }
memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd)); memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd));
*(int *)shHandle = receivedfd; *(int *)shHandle = receivedfd;
} else { } else {
free(control_un.control);
return -1; return -1;
} }
free(control_un.control);
return 0; return 0;
} }
@ -347,12 +340,9 @@ int ipcSendShareableHandle(ipcHandle *handle,
union { union {
struct cmsghdr cm; struct cmsghdr cm;
char* control; char control[CMSG_SPACE(sizeof(int))];
} control_un; } control_un;
size_t sizeof_control = CMSG_SPACE(sizeof(int)) * sizeof(char);
control_un.control = (char*) malloc(sizeof_control);
struct cmsghdr *cmptr; struct cmsghdr *cmptr;
ssize_t readResult; ssize_t readResult;
struct sockaddr_un cliaddr; struct sockaddr_un cliaddr;
@ -370,7 +360,7 @@ int ipcSendShareableHandle(ipcHandle *handle,
int sendfd = (int)shareableHandles[data]; int sendfd = (int)shareableHandles[data];
msg.msg_control = control_un.control; msg.msg_control = control_un.control;
msg.msg_controllen = sizeof_control; msg.msg_controllen = sizeof(control_un.control);
cmptr = CMSG_FIRSTHDR(&msg); cmptr = CMSG_FIRSTHDR(&msg);
cmptr->cmsg_len = CMSG_LEN(sizeof(int)); cmptr->cmsg_len = CMSG_LEN(sizeof(int));
@ -390,11 +380,9 @@ int ipcSendShareableHandle(ipcHandle *handle,
ssize_t sendResult = sendmsg(handle->socket, &msg, 0); ssize_t sendResult = sendmsg(handle->socket, &msg, 0);
if (sendResult <= 0) { if (sendResult <= 0) {
perror("IPC failure: Sending data over socket failed"); perror("IPC failure: Sending data over socket failed");
free(control_un.control);
return -1; return -1;
} }
free(control_un.control);
return 0; return 0;
} }

View File

@ -84,7 +84,7 @@ int waitProcess(Process *process);
#define checkIpcErrors(ipcFuncResult) \ #define checkIpcErrors(ipcFuncResult) \
if (ipcFuncResult == -1) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(EXIT_FAILURE); } if (ipcFuncResult == -1) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(EXIT_FAILURE); }
#if defined(__linux__) || defined(__QNX__) #if defined(__linux__)
struct ipcHandle_st { struct ipcHandle_st {
int socket; int socket;
char *socketName; char *socketName;

View File

@ -1,12 +1,14 @@
# CUDA Samples # CUDA Samples
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads). Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads).
## Release Notes ## Release Notes
This section describes the release notes for the CUDA Samples on GitHub only. This section describes the release notes for the CUDA Samples on GitHub only.
### CUDA 12.5 ### CUDA 12.0
* Added new flags for JIT compiling
* Removed deprecated APIs in Hopper Architecture
### [older versions...](./CHANGELOG.md) ### [older versions...](./CHANGELOG.md)
@ -14,7 +16,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
### Prerequisites ### Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html). For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
### Getting the CUDA Samples ### Getting the CUDA Samples
@ -89,9 +91,6 @@ Samples that are specific to domain (Graphics, Finance, Image Processing).
### [6. Performance](./Samples/6_Performance/README.md) ### [6. Performance](./Samples/6_Performance/README.md)
Samples that demonstrate performance optimization. Samples that demonstrate performance optimization.
### [7. libNVVM](./Samples/7_libNVVM/README.md)
Samples that demonstrate the use of libNVVVM and NVVM IR.
## Dependencies ## Dependencies
Some CUDA Samples rely on third-party applications and/or libraries, or features provided by the CUDA Toolkit and Driver, to either build or execute. These dependencies are listed below. Some CUDA Samples rely on third-party applications and/or libraries, or features provided by the CUDA Toolkit and Driver, to either build or execute. These dependencies are listed below.
@ -246,10 +245,6 @@ FP16 is a 16-bit floating-point format. One bit is used for the sign, five bits
NVCC support of [C++11 features](https://en.wikipedia.org/wiki/C++11). NVCC support of [C++11 features](https://en.wikipedia.org/wiki/C++11).
#### CMake
The libNVVM samples are built using [CMake](https://cmake.org/) 3.10 or later.
## Contributors Guide ## Contributors Guide
We welcome your input on issues and suggestions for samples. At this time we are not accepting contributions from the public, check back here as we evolve our contribution model. We welcome your input on issues and suggestions for samples. At this time we are not accepting contributions from the public, check back here as we evolve our contribution model.

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -337,7 +320,7 @@ endif
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -28,7 +28,7 @@ cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSe
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile> <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -108,6 +108,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile> <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -104,6 +104,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile> <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -104,6 +104,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -57,7 +57,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaPro
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile> <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile> <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile> <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -316,23 +299,20 @@ ifeq ($(TARGET_OS),linux)
#$(warning $(GCCVERSION)) #$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 47000) IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 47000)
ifneq ($(CUSTOM_HOST_COMPILER), 1)
ifeq ($(IS_MIN_VERSION), 1) ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 4.7.0 <<<) $(info >>> GCC Version is greater or equal to 4.7.0 <<<)
else else
$(info >>> Waiving build. Minimum GCC version required is 4.7.0<<<) $(info >>> Waiving build. Minimum GCC version required is 4.7.0<<<)
SAMPLE_ENABLED := 0 SAMPLE_ENABLED := 0
endif endif
else
$(warning >>> Custom HOST_COMPILER set; skipping GCC version check. This may lead to unintended behavior. Please note the minimum equivalent GCC version is 4.7.0 <<<)
endif
endif endif
# Gencode arguments # Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -49,7 +49,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ CPP11 CUDA
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -30,7 +30,7 @@ cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile> <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile> <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile> <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -45,7 +45,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Performance Strategies
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaMalloc, cudaMemcpy, cudaFree
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/clock.exe</OutputFile> <OutputFile>$(OutDir)/clock.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/clock.exe</OutputFile> <OutputFile>$(OutDir)/clock.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/clock.exe</OutputFile> <OutputFile>$(OutDir)/clock.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)

View File

@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -55,7 +55,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Performance Strategies
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEv
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile> <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile> <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile> <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -39,7 +39,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ CPP-CUDA Integration
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaMalloc, cudaMemcpy, cudaFree
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile> <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -109,6 +109,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile> <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -105,6 +105,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile> <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -105,6 +105,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -50,7 +50,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cud
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile> <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile> <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile> <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -340,7 +323,7 @@ endif
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -30,7 +30,7 @@ cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaG
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile> <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -108,6 +108,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile> <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -104,6 +104,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile> <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -104,6 +104,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -304,7 +287,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 60 61 70 75 80 86 89 90 SMS ?= 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -44,7 +44,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ CUDA Runtime API
## Supported SM Architectures ## Supported SM Architectures
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -30,7 +30,7 @@ cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDevicePro
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile> <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile> <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile> <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -54,7 +54,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHos
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile> <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile> <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile> <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -302,7 +285,7 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, c
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile> <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -111,6 +111,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile> <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile> <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)

View File

@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunct
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -116,6 +116,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)

View File

@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -30,7 +30,7 @@ cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemF
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -298,7 +281,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -44,7 +44,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Data-Parallel Algorithms
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile> <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -111,6 +111,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile> <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile> <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -322,23 +305,20 @@ ifeq ($(TARGET_OS),linux)
#$(warning $(GCCVERSION)) #$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000) IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
ifneq ($(CUSTOM_HOST_COMPILER), 1)
ifeq ($(IS_MIN_VERSION), 1) ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 5.1.0 <<<) $(info >>> GCC Version is greater or equal to 5.1.0 <<<)
else else
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<) $(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
SAMPLE_ENABLED := 0 SAMPLE_ENABLED := 0
endif endif
else
$(warning >>> Custom HOST_COMPILER set; skipping GCC version check. This may lead to unintended behavior. Please note the minimum equivalent GCC version is 5.1.0 <<<)
endif
endif endif
# Gencode arguments # Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 70 72 75 80 86 87 90 SMS ?= 70 72 75 80 86 87 90
else else
SMS ?= 70 75 80 86 89 90 SMS ?= 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -53,7 +53,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Arrive Wait Barrier
## Supported SM Architectures ## Supported SM Architectures
[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -30,7 +30,7 @@ cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cud
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed. Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile> <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile> <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -63,7 +63,7 @@
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile> <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -103,6 +103,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View File

@ -113,10 +113,6 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
endif endif
# host compiler # host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++ HOST_COMPILER ?= clang++
@ -169,19 +165,6 @@ CCFLAGS :=
LDFLAGS := LDFLAGS :=
# build flags # build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin) ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH) CCFLAGS += -arch $(HOST_ARCH)
@ -304,7 +287,7 @@ LIBRARIES :=
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90 SMS ?= 53 61 70 72 75 80 86 87 90
else else
SMS ?= 50 52 60 61 70 75 80 86 89 90 SMS ?= 50 52 60 61 70 75 80 86 90
endif endif
ifeq ($(SMS),) ifeq ($(SMS),)

View File

@ -39,7 +39,6 @@
<sm-arch>sm80</sm-arch> <sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch> <sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch> <sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch> <sm-arch>sm90</sm-arch>
<supported_envs> <supported_envs>
<env> <env>

View File

@ -10,7 +10,7 @@ Assert
## Supported SM Architectures ## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes ## Supported OSes
@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
## Prerequisites ## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run ## Build and Run

View File

@ -38,7 +38,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets"> <ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" /> <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -67,7 +67,7 @@
<OutputFile>$(OutDir)/simpleAssert.exe</OutputFile> <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration> <CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions> <AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
<Include>./;../../../Common</Include> <Include>./;../../../Common</Include>
<Defines>WIN32</Defines> <Defines>WIN32</Defines>
@ -107,6 +107,6 @@
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" /> <Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

Some files were not shown because too many files have changed in this diff Show More