mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-05 12:08:31 +08:00
Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
9c688d7ff7 | ||
|
5f97d7d0df | ||
|
3559ca4d08 | ||
|
cd3bc1fa8e | ||
|
e8568c4173 | ||
|
b5c84e6996 | ||
|
c46754b877 | ||
|
03309a2d42 | ||
|
5688ee0013 | ||
|
8004ad59ab | ||
|
e612904184 | ||
|
81cf058e30 | ||
|
26665bf33b | ||
|
00bb9bc367 | ||
|
e4789153d5 | ||
|
1c2efac7c8 | ||
|
3d553b2ea1 | ||
|
9316529638 |
17
CHANGELOG.md
17
CHANGELOG.md
|
@ -1,5 +1,22 @@
|
|||
## Changelog
|
||||
|
||||
### CUDA 12.5
|
||||
|
||||
### CUDA 12.4
|
||||
* Added graphConditionalNodes Sample
|
||||
|
||||
### CUDA 12.3
|
||||
* Added cuDLA samples
|
||||
* Fixed jitLto regression
|
||||
|
||||
### CUDA 12.2
|
||||
* libNVVM samples received updates
|
||||
* Fixed jitLto Case issues
|
||||
* Enabled HOST_COMPILER flag to the makefiles for GCC which is untested but may still work.
|
||||
|
||||
### CUDA 12.1
|
||||
* Added new sample for Large Kernels
|
||||
|
||||
### CUDA 12.0
|
||||
* Added new flags for JIT compiling
|
||||
* Removed deprecated APIs in Hopper Architecture
|
||||
|
|
|
@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{0x87, 128},
|
||||
{0x89, 128},
|
||||
{0x90, 128},
|
||||
{-1, -1}};
|
||||
|
||||
|
@ -714,6 +715,7 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
|||
{0x80, "Ampere"},
|
||||
{0x86, "Ampere"},
|
||||
{0x87, "Ampere"},
|
||||
{0x89, "Ada"},
|
||||
{0x90, "Hopper"},
|
||||
{-1, "Graphics Device"}};
|
||||
|
||||
|
|
|
@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{0x87, 128},
|
||||
{0x89, 128},
|
||||
{0x90, 128},
|
||||
{-1, -1}};
|
||||
|
||||
|
|
|
@ -168,7 +168,7 @@ int waitProcess(Process *process) {
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
#if defined(__linux__) || defined(__QNX__)
|
||||
int ipcCreateSocket(ipcHandle *&handle, const char *name,
|
||||
const std::vector<Process> &processes) {
|
||||
int server_fd;
|
||||
|
@ -262,41 +262,48 @@ int ipcRecvShareableHandle(ipcHandle *handle, ShareableHandle *shHandle) {
|
|||
// Union to guarantee alignment requirements for control array
|
||||
union {
|
||||
struct cmsghdr cm;
|
||||
char control[CMSG_SPACE(sizeof(int))];
|
||||
// This will not work on QNX as QNX CMSG_SPACE calls __cmsg_alignbytes
|
||||
// And __cmsg_alignbytes is a runtime function instead of compile-time macros
|
||||
// char control[CMSG_SPACE(sizeof(int))]
|
||||
char* control;
|
||||
} control_un;
|
||||
|
||||
size_t sizeof_control = CMSG_SPACE(sizeof(int)) * sizeof(char);
|
||||
control_un.control = (char*) malloc(sizeof_control);
|
||||
struct cmsghdr *cmptr;
|
||||
ssize_t n;
|
||||
int receivedfd;
|
||||
char dummy_buffer[1];
|
||||
ssize_t sendResult;
|
||||
|
||||
msg.msg_control = control_un.control;
|
||||
msg.msg_controllen = sizeof(control_un.control);
|
||||
msg.msg_controllen = sizeof_control;
|
||||
|
||||
iov[0].iov_base = (void *)dummy_buffer;
|
||||
iov[0].iov_len = sizeof(dummy_buffer);
|
||||
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
if ((n = recvmsg(handle->socket, &msg, 0)) <= 0) {
|
||||
perror("IPC failure: Receiving data over socket failed");
|
||||
free(control_un.control);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) &&
|
||||
(cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) {
|
||||
if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) {
|
||||
free(control_un.control);
|
||||
return -1;
|
||||
}
|
||||
|
||||
memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd));
|
||||
*(int *)shHandle = receivedfd;
|
||||
} else {
|
||||
free(control_un.control);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(control_un.control);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -340,9 +347,12 @@ int ipcSendShareableHandle(ipcHandle *handle,
|
|||
|
||||
union {
|
||||
struct cmsghdr cm;
|
||||
char control[CMSG_SPACE(sizeof(int))];
|
||||
char* control;
|
||||
} control_un;
|
||||
|
||||
size_t sizeof_control = CMSG_SPACE(sizeof(int)) * sizeof(char);
|
||||
control_un.control = (char*) malloc(sizeof_control);
|
||||
|
||||
struct cmsghdr *cmptr;
|
||||
ssize_t readResult;
|
||||
struct sockaddr_un cliaddr;
|
||||
|
@ -360,7 +370,7 @@ int ipcSendShareableHandle(ipcHandle *handle,
|
|||
int sendfd = (int)shareableHandles[data];
|
||||
|
||||
msg.msg_control = control_un.control;
|
||||
msg.msg_controllen = sizeof(control_un.control);
|
||||
msg.msg_controllen = sizeof_control;
|
||||
|
||||
cmptr = CMSG_FIRSTHDR(&msg);
|
||||
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
|
||||
|
@ -380,9 +390,11 @@ int ipcSendShareableHandle(ipcHandle *handle,
|
|||
ssize_t sendResult = sendmsg(handle->socket, &msg, 0);
|
||||
if (sendResult <= 0) {
|
||||
perror("IPC failure: Sending data over socket failed");
|
||||
free(control_un.control);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(control_un.control);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ int waitProcess(Process *process);
|
|||
#define checkIpcErrors(ipcFuncResult) \
|
||||
if (ipcFuncResult == -1) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(EXIT_FAILURE); }
|
||||
|
||||
#if defined(__linux__)
|
||||
#if defined(__linux__) || defined(__QNX__)
|
||||
struct ipcHandle_st {
|
||||
int socket;
|
||||
char *socketName;
|
||||
|
|
15
README.md
15
README.md
|
@ -1,14 +1,12 @@
|
|||
# CUDA Samples
|
||||
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads).
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Release Notes
|
||||
|
||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||
|
||||
### CUDA 12.0
|
||||
* Added new flags for JIT compiling
|
||||
* Removed deprecated APIs in Hopper Architecture
|
||||
### CUDA 12.5
|
||||
|
||||
### [older versions...](./CHANGELOG.md)
|
||||
|
||||
|
@ -16,7 +14,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
|
|||
|
||||
### Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||
|
||||
### Getting the CUDA Samples
|
||||
|
@ -91,6 +89,9 @@ Samples that are specific to domain (Graphics, Finance, Image Processing).
|
|||
### [6. Performance](./Samples/6_Performance/README.md)
|
||||
Samples that demonstrate performance optimization.
|
||||
|
||||
### [7. libNVVM](./Samples/7_libNVVM/README.md)
|
||||
Samples that demonstrate the use of libNVVVM and NVVM IR.
|
||||
|
||||
## Dependencies
|
||||
|
||||
Some CUDA Samples rely on third-party applications and/or libraries, or features provided by the CUDA Toolkit and Driver, to either build or execute. These dependencies are listed below.
|
||||
|
@ -245,6 +246,10 @@ FP16 is a 16-bit floating-point format. One bit is used for the sign, five bits
|
|||
|
||||
NVCC support of [C++11 features](https://en.wikipedia.org/wiki/C++11).
|
||||
|
||||
#### CMake
|
||||
|
||||
The libNVVM samples are built using [CMake](https://cmake.org/) 3.10 or later.
|
||||
|
||||
## Contributors Guide
|
||||
|
||||
We welcome your input on issues and suggestions for samples. At this time we are not accepting contributions from the public, check back here as we evolve our contribution model.
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -320,7 +337,7 @@ endif
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -28,7 +28,7 @@ cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSe
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaPro
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -299,20 +316,23 @@ ifeq ($(TARGET_OS),linux)
|
|||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 47000)
|
||||
|
||||
ifneq ($(CUSTOM_HOST_COMPILER), 1)
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 4.7.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 4.7.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
else
|
||||
$(warning >>> Custom HOST_COMPILER set; skipping GCC version check. This may lead to unintended behavior. Please note the minimum equivalent GCC version is 4.7.0 <<<)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ CPP11 CUDA
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMalloc, cudaMemcpy, cudaFree
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEv
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ CPP-CUDA Integration
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMalloc, cudaMemcpy, cudaFree
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -109,6 +109,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cud
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -323,7 +340,7 @@ endif
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaG
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -287,7 +304,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 60 61 70 75 80 86 90
|
||||
SMS ?= 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDevicePro
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -54,6 +54,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHos
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -285,7 +302,7 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, c
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -111,6 +111,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunct
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -116,6 +116,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemF
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -281,7 +298,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Data-Parallel Algorithms
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -111,6 +111,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -305,20 +322,23 @@ ifeq ($(TARGET_OS),linux)
|
|||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
|
||||
|
||||
ifneq ($(CUSTOM_HOST_COMPILER), 1)
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
else
|
||||
$(warning >>> Custom HOST_COMPILER set; skipping GCC version check. This may lead to unintended behavior. Please note the minimum equivalent GCC version is 5.1.0 <<<)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 70 75 80 86 90
|
||||
SMS ?= 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Arrive Wait Barrier
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cud
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -113,6 +113,10 @@ ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
|
@ -165,6 +169,19 @@ CCFLAGS :=
|
|||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
|
@ -287,7 +304,7 @@ LIBRARIES :=
|
|||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 90
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
|
|
|
@ -10,7 +10,7 @@ Assert
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user