diff --git a/include/driver/platform.h b/include/driver/platform.h index 2a3b8fcdb..add506e82 100755 --- a/include/driver/platform.h +++ b/include/driver/platform.h @@ -40,7 +40,7 @@ class Platform { public: //Accessors - std::string name() const; + std::string name() const { return "CUDA"; } std::string version() const; std::vector devices() const; private: diff --git a/lib/driver/dispatch.cpp b/lib/driver/dispatch.cpp index 4551bf072..2d0cd5232 100755 --- a/lib/driver/dispatch.cpp +++ b/lib/driver/dispatch.cpp @@ -85,18 +85,6 @@ namespace driver #define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) #define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) -#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1) -#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2) -#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3) -#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4) -#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5) -#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6) -#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7) -#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) -#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) -#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) -#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) - #define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname) #define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1) #define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2) @@ -127,12 +115,6 @@ bool dispatch::cuinit(){ return cuda_ != nullptr; } -bool dispatch::nvrtcinit(){ - if(nvrtc_==nullptr) - nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY); - return nvrtc_ != nullptr; -} - bool dispatch::nvmlinit(){ if(nvml_==nullptr) nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY); @@ -194,13 +176,6 @@ CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUst CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext) CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*) -NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **) -NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *) -NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *) -NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *) -NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **) -NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *) - NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*) NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*) NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*) diff --git a/lib/driver/error.cpp b/lib/driver/error.cpp index 42d3a780d..7e7dc9d75 100755 --- a/lib/driver/error.cpp +++ b/lib/driver/error.cpp @@ -94,23 +94,6 @@ void check(CUresult err) } } -void check(nvrtcResult err){ - using namespace exception::nvrtc; - - switch(err) - { - case NVRTC_SUCCESS: break; - case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory(); - case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure(); - case NVRTC_ERROR_INVALID_INPUT: throw invalid_input(); - case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program(); - case NVRTC_ERROR_INVALID_OPTION: throw invalid_option(); - case NVRTC_ERROR_COMPILATION: throw compilation(); - case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure(); - default: throw unknown_error(); - } -} - void check(cublasStatus_t err){ using namespace exception::cublas; switch(err) diff --git a/lib/driver/module.cpp b/lib/driver/module.cpp index 06a6ff437..c61482cbc 100755 --- a/lib/driver/module.cpp +++ b/lib/driver/module.cpp @@ -34,68 +34,19 @@ namespace tdl namespace driver { -CUjit_target_enum cutarget(Device::Architecture arch){ - switch(arch){ - case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20; - case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21; - case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30; - case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35; - case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37; - case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50; - case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52; - case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60; - case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61; - default: throw; - } -} - -inline std::pair ptx(std::pair sm){ - if(sm.first == 7) return {6, 0}; - if(sm.first == 6) return {5, 0}; - if(sm.first == 5) return {4, 3}; - throw; -} - -std::string Module::header(Device const & device){ - auto cc = device.compute_capability(); - auto vptx = ptx(cc); - std::string header; - header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n"; - header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n"; - header += ".address_size 64\n"; - return header; -} - -Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){ +Module::Module(Context const & context, std::string const & source) : context_(context), source_(source){ ContextSwitcher ctx_switch(context_); - - //Path to custom PTX compiler - std::string compiler = tools::getenv("ISAAC_PTXAS"); - if(compiler.size()){ - auto cc = context.device().compute_capability(); - std::string out = context.cache_path() + "tmp.o"; - std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second) - + " -o " + out - + " -ias \"" + source_ + "\""; - std::string cmd = compiler + opt; - if(std::system(cmd.c_str()) != 0) - throw; - dispatch::cuModuleLoad(&*cu_, out.c_str()); - } - //JIT Compilation - else{ - CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER}; - unsigned int errbufsize = 8096; - std::string errbuf(errbufsize, 0); - //CUjit_target_enum target = cutarget(context.device().architecture()); - void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()}; - try{ - dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval); - }catch(exception::cuda::base const &){ - std::cerr << "Compilation Failed! Log: " << std::endl; - std::cerr << errbuf << std::endl; - throw; - } + // JIT compile source-code + CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER}; + unsigned int errbufsize = 8096; + std::string errbuf(errbufsize, 0); + void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()}; + try{ + dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval); + }catch(exception::cuda::base const &){ + std::cerr << "Compilation Failed! Log: " << std::endl; + std::cerr << errbuf << std::endl; + throw; } } diff --git a/lib/driver/platform.cpp b/lib/driver/platform.cpp index b9722933d..2fa9933d3 100755 --- a/lib/driver/platform.cpp +++ b/lib/driver/platform.cpp @@ -37,9 +37,6 @@ std::string Platform::version() const{ return std::to_string(version); } -std::string Platform::name() const -{ return (std::string)"CUDA"; } - std::vector Platform::devices() const{ std::vector devices; int N;