mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-12-01 13:39:17 +08:00
189 lines
7.7 KiB
C++
189 lines
7.7 KiB
C++
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
// This sample demonstrates a simple library to interpose CUDA symbols
|
|
|
|
#define __USE_GNU
|
|
#include <dlfcn.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <cuda.h>
|
|
#include "libcuhook.h"
|
|
|
|
// For interposing dlsym(). See elf/dl-libc.c for the internal dlsym interface
|
|
// function For interposing dlopen(). Sell elf/dl-lib.c for the internal
|
|
// dlopen_mode interface function
|
|
extern "C" {
|
|
void* __libc_dlsym(void* map, const char* name);
|
|
}
|
|
extern "C" {
|
|
void* __libc_dlopen_mode(const char* name, int mode);
|
|
}
|
|
|
|
// We need to give the pre-processor a chance to replace a function, such as:
|
|
// cuMemAlloc => cuMemAlloc_v2
|
|
#define STRINGIFY(x) #x
|
|
#define CUDA_SYMBOL_STRING(x) STRINGIFY(x)
|
|
|
|
// We need to interpose dlsym since anyone using dlopen+dlsym to get the CUDA
|
|
// driver symbols will bypass the hooking mechanism (this includes the CUDA
|
|
// runtime). Its tricky though, since if we replace the real dlsym with ours, we
|
|
// can't dlsym() the real dlsym. To get around that, call the 'private' libc
|
|
// interface called __libc_dlsym to get the real dlsym.
|
|
typedef void* (*fnDlsym)(void*, const char*);
|
|
|
|
static void* real_dlsym(void* handle, const char* symbol) {
|
|
static fnDlsym internal_dlsym = (fnDlsym)__libc_dlsym(
|
|
__libc_dlopen_mode("libdl.so.2", RTLD_LAZY), "dlsym");
|
|
return (*internal_dlsym)(handle, symbol);
|
|
}
|
|
|
|
// Main structure that gets initialized at library load time
|
|
// Choose a unique name, or it can clash with other preloaded libraries.
|
|
struct cuHookInfo {
|
|
void* handle;
|
|
void* preHooks[CU_HOOK_SYMBOLS];
|
|
void* postHooks[CU_HOOK_SYMBOLS];
|
|
|
|
// Debugging/Stats Info
|
|
int bDebugEnabled;
|
|
int hookedFunctionCalls[CU_HOOK_SYMBOLS];
|
|
|
|
cuHookInfo() {
|
|
const char* envHookDebug;
|
|
|
|
// Check environment for CU_HOOK_DEBUG to facilitate debugging
|
|
envHookDebug = getenv("CU_HOOK_DEBUG");
|
|
if (envHookDebug && envHookDebug[0] == '1') {
|
|
bDebugEnabled = 1;
|
|
fprintf(stderr, "* %6d >> CUDA HOOK Library loaded.\n", getpid());
|
|
}
|
|
}
|
|
|
|
~cuHookInfo() {
|
|
if (bDebugEnabled) {
|
|
pid_t pid = getpid();
|
|
// You can gather statistics, timings, etc.
|
|
fprintf(stderr, "* %6d >> CUDA HOOK Library Unloaded - Statistics:\n",
|
|
pid);
|
|
fprintf(stderr, "* %6d >> %20s ... %d\n", pid,
|
|
CUDA_SYMBOL_STRING(cuMemAlloc),
|
|
hookedFunctionCalls[CU_HOOK_MEM_ALLOC]);
|
|
fprintf(stderr, "* %6d >> %20s ... %d\n", pid,
|
|
CUDA_SYMBOL_STRING(cuMemFree),
|
|
hookedFunctionCalls[CU_HOOK_MEM_FREE]);
|
|
fprintf(stderr, "* %6d >> %20s ... %d\n", pid,
|
|
CUDA_SYMBOL_STRING(cuCtxGetCurrent),
|
|
hookedFunctionCalls[CU_HOOK_CTX_GET_CURRENT]);
|
|
fprintf(stderr, "* %6d >> %20s ... %d\n", pid,
|
|
CUDA_SYMBOL_STRING(cuCtxSetCurrent),
|
|
hookedFunctionCalls[CU_HOOK_CTX_SET_CURRENT]);
|
|
fprintf(stderr, "* %6d >> %20s ... %d\n", pid,
|
|
CUDA_SYMBOL_STRING(cuCtxDestroy),
|
|
hookedFunctionCalls[CU_HOOK_CTX_DESTROY]);
|
|
}
|
|
if (handle) {
|
|
dlclose(handle);
|
|
}
|
|
}
|
|
};
|
|
|
|
static struct cuHookInfo cuhl;
|
|
|
|
// Exposed API
|
|
void cuHookRegisterCallback(HookSymbols symbol, HookTypes type,
|
|
void* callback) {
|
|
if (type == PRE_CALL_HOOK) {
|
|
cuhl.preHooks[symbol] = callback;
|
|
} else if (type == POST_CALL_HOOK) {
|
|
cuhl.postHooks[symbol] = callback;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Interposed Functions
|
|
*/
|
|
void* dlsym(void* handle, const char* symbol) {
|
|
// Early out if not a CUDA driver symbol
|
|
if (strncmp(symbol, "cu", 2) != 0) {
|
|
return (real_dlsym(handle, symbol));
|
|
}
|
|
|
|
if (strcmp(symbol, CUDA_SYMBOL_STRING(cuMemAlloc)) == 0) {
|
|
return (void*)(&cuMemAlloc);
|
|
} else if (strcmp(symbol, CUDA_SYMBOL_STRING(cuMemFree)) == 0) {
|
|
return (void*)(&cuMemFree);
|
|
} else if (strcmp(symbol, CUDA_SYMBOL_STRING(cuCtxGetCurrent)) == 0) {
|
|
return (void*)(&cuCtxGetCurrent);
|
|
} else if (strcmp(symbol, CUDA_SYMBOL_STRING(cuCtxSetCurrent)) == 0) {
|
|
return (void*)(&cuCtxSetCurrent);
|
|
} else if (strcmp(symbol, CUDA_SYMBOL_STRING(cuCtxDestroy)) == 0) {
|
|
return (void*)(&cuCtxDestroy);
|
|
}
|
|
return (real_dlsym(handle, symbol));
|
|
}
|
|
|
|
/*
|
|
** If the user of this library does not wish to include CUDA specific
|
|
*code/headers in the code,
|
|
** then all the parameters can be changed and/or simply casted before calling
|
|
*the callback.
|
|
*/
|
|
#define CU_HOOK_GENERATE_INTERCEPT(hooksymbol, funcname, params, ...) \
|
|
CUresult CUDAAPI funcname params { \
|
|
static void* real_func = \
|
|
(void*)real_dlsym(RTLD_NEXT, CUDA_SYMBOL_STRING(funcname)); \
|
|
CUresult result = CUDA_SUCCESS; \
|
|
\
|
|
if (cuhl.bDebugEnabled) { \
|
|
cuhl.hookedFunctionCalls[hooksymbol]++; \
|
|
} \
|
|
if (cuhl.preHooks[hooksymbol]) { \
|
|
((CUresult CUDAAPI(*) params)cuhl.preHooks[hooksymbol])(__VA_ARGS__); \
|
|
} \
|
|
result = ((CUresult CUDAAPI(*) params)real_func)(__VA_ARGS__); \
|
|
if (cuhl.postHooks[hooksymbol] && result == CUDA_SUCCESS) { \
|
|
((CUresult CUDAAPI(*) params)cuhl.postHooks[hooksymbol])(__VA_ARGS__); \
|
|
} \
|
|
return (result); \
|
|
}
|
|
|
|
CU_HOOK_GENERATE_INTERCEPT(CU_HOOK_MEM_ALLOC, cuMemAlloc,
|
|
(CUdeviceptr * dptr, size_t bytesize), dptr,
|
|
bytesize)
|
|
CU_HOOK_GENERATE_INTERCEPT(CU_HOOK_MEM_FREE, cuMemFree, (CUdeviceptr dptr),
|
|
dptr)
|
|
CU_HOOK_GENERATE_INTERCEPT(CU_HOOK_CTX_GET_CURRENT, cuCtxGetCurrent,
|
|
(CUcontext * pctx), pctx)
|
|
CU_HOOK_GENERATE_INTERCEPT(CU_HOOK_CTX_SET_CURRENT, cuCtxSetCurrent,
|
|
(CUcontext ctx), ctx)
|
|
CU_HOOK_GENERATE_INTERCEPT(CU_HOOK_CTX_DESTROY, cuCtxDestroy, (CUcontext ctx),
|
|
ctx)
|