[DRIVER] Add CUDA P2P support (#209)

This commit is contained in:
milesial
2021-08-21 06:00:54 +02:00
committed by GitHub
parent 6aa5720d75
commit 5b29da719d
7 changed files with 147 additions and 21 deletions

View File

@@ -67,6 +67,7 @@ public:
size_t max_sm_clock() const;
size_t max_mem_clock() const;
void set_max_clock();
void enable_peer_access(CUdeviceptr peer_mem_ptr) const;
// Target
std::unique_ptr<codegen::target> make_target() const;

View File

@@ -108,8 +108,9 @@ public:
static CUresult cuCtxGetDevice(CUdevice* result);
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
static CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
static CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
static CUresult cuFuncSetCacheConfig (CUfunction hfunc, CUfunc_cache config);
static CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
static CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int flags);
// NVML
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
@@ -178,6 +179,7 @@ private:
static void* cuFuncGetAttribute_;
static void* cuFuncSetAttribute_;
static void* cuFuncSetCacheConfig_;
static void* cuCtxEnablePeerAccess_;
// NVML
static void* nvmlInit_v2_;
static void* nvmlDeviceGetHandleByPciBusId_v2_;