[driver] fixed some bugs
This commit is contained in:
@@ -40,7 +40,7 @@ class Platform
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
//Accessors
|
//Accessors
|
||||||
std::string name() const;
|
std::string name() const { return "CUDA"; }
|
||||||
std::string version() const;
|
std::string version() const;
|
||||||
std::vector<Device> devices() const;
|
std::vector<Device> devices() const;
|
||||||
private:
|
private:
|
||||||
|
@@ -85,18 +85,6 @@ namespace driver
|
|||||||
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||||
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||||
|
|
||||||
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
|
|
||||||
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
|
|
||||||
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
|
|
||||||
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
|
|
||||||
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
|
|
||||||
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
|
|
||||||
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
|
||||||
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
|
||||||
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
|
||||||
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
|
||||||
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
|
||||||
|
|
||||||
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
|
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
|
||||||
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
|
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
|
||||||
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||||
@@ -127,12 +115,6 @@ bool dispatch::cuinit(){
|
|||||||
return cuda_ != nullptr;
|
return cuda_ != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool dispatch::nvrtcinit(){
|
|
||||||
if(nvrtc_==nullptr)
|
|
||||||
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
|
|
||||||
return nvrtc_ != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool dispatch::nvmlinit(){
|
bool dispatch::nvmlinit(){
|
||||||
if(nvml_==nullptr)
|
if(nvml_==nullptr)
|
||||||
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
||||||
@@ -194,13 +176,6 @@ CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUst
|
|||||||
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
|
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
|
||||||
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
|
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
|
||||||
|
|
||||||
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
|
|
||||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
|
|
||||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
|
|
||||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
|
||||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
|
||||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
|
||||||
|
|
||||||
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
||||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||||
|
@@ -94,23 +94,6 @@ void check(CUresult err)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void check(nvrtcResult err){
|
|
||||||
using namespace exception::nvrtc;
|
|
||||||
|
|
||||||
switch(err)
|
|
||||||
{
|
|
||||||
case NVRTC_SUCCESS: break;
|
|
||||||
case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory();
|
|
||||||
case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure();
|
|
||||||
case NVRTC_ERROR_INVALID_INPUT: throw invalid_input();
|
|
||||||
case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program();
|
|
||||||
case NVRTC_ERROR_INVALID_OPTION: throw invalid_option();
|
|
||||||
case NVRTC_ERROR_COMPILATION: throw compilation();
|
|
||||||
case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure();
|
|
||||||
default: throw unknown_error();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void check(cublasStatus_t err){
|
void check(cublasStatus_t err){
|
||||||
using namespace exception::cublas;
|
using namespace exception::cublas;
|
||||||
switch(err)
|
switch(err)
|
||||||
|
@@ -34,68 +34,19 @@ namespace tdl
|
|||||||
namespace driver
|
namespace driver
|
||||||
{
|
{
|
||||||
|
|
||||||
CUjit_target_enum cutarget(Device::Architecture arch){
|
Module::Module(Context const & context, std::string const & source) : context_(context), source_(source){
|
||||||
switch(arch){
|
|
||||||
case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20;
|
|
||||||
case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21;
|
|
||||||
case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30;
|
|
||||||
case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35;
|
|
||||||
case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37;
|
|
||||||
case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50;
|
|
||||||
case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52;
|
|
||||||
case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60;
|
|
||||||
case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61;
|
|
||||||
default: throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline std::pair<int, int> ptx(std::pair<int, int> sm){
|
|
||||||
if(sm.first == 7) return {6, 0};
|
|
||||||
if(sm.first == 6) return {5, 0};
|
|
||||||
if(sm.first == 5) return {4, 3};
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string Module::header(Device const & device){
|
|
||||||
auto cc = device.compute_capability();
|
|
||||||
auto vptx = ptx(cc);
|
|
||||||
std::string header;
|
|
||||||
header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n";
|
|
||||||
header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n";
|
|
||||||
header += ".address_size 64\n";
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){
|
|
||||||
ContextSwitcher ctx_switch(context_);
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
// JIT compile source-code
|
||||||
//Path to custom PTX compiler
|
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
||||||
std::string compiler = tools::getenv("ISAAC_PTXAS");
|
unsigned int errbufsize = 8096;
|
||||||
if(compiler.size()){
|
std::string errbuf(errbufsize, 0);
|
||||||
auto cc = context.device().compute_capability();
|
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
|
||||||
std::string out = context.cache_path() + "tmp.o";
|
try{
|
||||||
std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second)
|
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
|
||||||
+ " -o " + out
|
}catch(exception::cuda::base const &){
|
||||||
+ " -ias \"" + source_ + "\"";
|
std::cerr << "Compilation Failed! Log: " << std::endl;
|
||||||
std::string cmd = compiler + opt;
|
std::cerr << errbuf << std::endl;
|
||||||
if(std::system(cmd.c_str()) != 0)
|
throw;
|
||||||
throw;
|
|
||||||
dispatch::cuModuleLoad(&*cu_, out.c_str());
|
|
||||||
}
|
|
||||||
//JIT Compilation
|
|
||||||
else{
|
|
||||||
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
|
||||||
unsigned int errbufsize = 8096;
|
|
||||||
std::string errbuf(errbufsize, 0);
|
|
||||||
//CUjit_target_enum target = cutarget(context.device().architecture());
|
|
||||||
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
|
|
||||||
try{
|
|
||||||
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
|
|
||||||
}catch(exception::cuda::base const &){
|
|
||||||
std::cerr << "Compilation Failed! Log: " << std::endl;
|
|
||||||
std::cerr << errbuf << std::endl;
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -37,9 +37,6 @@ std::string Platform::version() const{
|
|||||||
return std::to_string(version);
|
return std::to_string(version);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Platform::name() const
|
|
||||||
{ return (std::string)"CUDA"; }
|
|
||||||
|
|
||||||
std::vector<Device> Platform::devices() const{
|
std::vector<Device> Platform::devices() const{
|
||||||
std::vector<Device> devices;
|
std::vector<Device> devices;
|
||||||
int N;
|
int N;
|
||||||
|
Reference in New Issue
Block a user