[driver] fixed some bugs
This commit is contained in:
@@ -40,7 +40,7 @@ class Platform
|
||||
{
|
||||
public:
|
||||
//Accessors
|
||||
std::string name() const;
|
||||
std::string name() const { return "CUDA"; }
|
||||
std::string version() const;
|
||||
std::vector<Device> devices() const;
|
||||
private:
|
||||
|
@@ -85,18 +85,6 @@ namespace driver
|
||||
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
|
||||
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
|
||||
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
|
||||
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
|
||||
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
|
||||
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
|
||||
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
|
||||
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
|
||||
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||
@@ -127,12 +115,6 @@ bool dispatch::cuinit(){
|
||||
return cuda_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::nvrtcinit(){
|
||||
if(nvrtc_==nullptr)
|
||||
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
|
||||
return nvrtc_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::nvmlinit(){
|
||||
if(nvml_==nullptr)
|
||||
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
||||
@@ -194,13 +176,6 @@ CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUst
|
||||
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
|
||||
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
|
||||
|
||||
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||
|
||||
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
|
@@ -94,23 +94,6 @@ void check(CUresult err)
|
||||
}
|
||||
}
|
||||
|
||||
void check(nvrtcResult err){
|
||||
using namespace exception::nvrtc;
|
||||
|
||||
switch(err)
|
||||
{
|
||||
case NVRTC_SUCCESS: break;
|
||||
case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory();
|
||||
case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure();
|
||||
case NVRTC_ERROR_INVALID_INPUT: throw invalid_input();
|
||||
case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program();
|
||||
case NVRTC_ERROR_INVALID_OPTION: throw invalid_option();
|
||||
case NVRTC_ERROR_COMPILATION: throw compilation();
|
||||
case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure();
|
||||
default: throw unknown_error();
|
||||
}
|
||||
}
|
||||
|
||||
void check(cublasStatus_t err){
|
||||
using namespace exception::cublas;
|
||||
switch(err)
|
||||
|
@@ -34,68 +34,19 @@ namespace tdl
|
||||
namespace driver
|
||||
{
|
||||
|
||||
CUjit_target_enum cutarget(Device::Architecture arch){
|
||||
switch(arch){
|
||||
case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20;
|
||||
case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21;
|
||||
case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30;
|
||||
case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35;
|
||||
case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37;
|
||||
case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50;
|
||||
case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52;
|
||||
case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60;
|
||||
case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61;
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
|
||||
inline std::pair<int, int> ptx(std::pair<int, int> sm){
|
||||
if(sm.first == 7) return {6, 0};
|
||||
if(sm.first == 6) return {5, 0};
|
||||
if(sm.first == 5) return {4, 3};
|
||||
throw;
|
||||
}
|
||||
|
||||
std::string Module::header(Device const & device){
|
||||
auto cc = device.compute_capability();
|
||||
auto vptx = ptx(cc);
|
||||
std::string header;
|
||||
header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n";
|
||||
header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n";
|
||||
header += ".address_size 64\n";
|
||||
return header;
|
||||
}
|
||||
|
||||
Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){
|
||||
Module::Module(Context const & context, std::string const & source) : context_(context), source_(source){
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
|
||||
//Path to custom PTX compiler
|
||||
std::string compiler = tools::getenv("ISAAC_PTXAS");
|
||||
if(compiler.size()){
|
||||
auto cc = context.device().compute_capability();
|
||||
std::string out = context.cache_path() + "tmp.o";
|
||||
std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second)
|
||||
+ " -o " + out
|
||||
+ " -ias \"" + source_ + "\"";
|
||||
std::string cmd = compiler + opt;
|
||||
if(std::system(cmd.c_str()) != 0)
|
||||
throw;
|
||||
dispatch::cuModuleLoad(&*cu_, out.c_str());
|
||||
}
|
||||
//JIT Compilation
|
||||
else{
|
||||
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
||||
unsigned int errbufsize = 8096;
|
||||
std::string errbuf(errbufsize, 0);
|
||||
//CUjit_target_enum target = cutarget(context.device().architecture());
|
||||
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
|
||||
try{
|
||||
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
|
||||
}catch(exception::cuda::base const &){
|
||||
std::cerr << "Compilation Failed! Log: " << std::endl;
|
||||
std::cerr << errbuf << std::endl;
|
||||
throw;
|
||||
}
|
||||
// JIT compile source-code
|
||||
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
||||
unsigned int errbufsize = 8096;
|
||||
std::string errbuf(errbufsize, 0);
|
||||
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
|
||||
try{
|
||||
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
|
||||
}catch(exception::cuda::base const &){
|
||||
std::cerr << "Compilation Failed! Log: " << std::endl;
|
||||
std::cerr << errbuf << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -37,9 +37,6 @@ std::string Platform::version() const{
|
||||
return std::to_string(version);
|
||||
}
|
||||
|
||||
std::string Platform::name() const
|
||||
{ return (std::string)"CUDA"; }
|
||||
|
||||
std::vector<Device> Platform::devices() const{
|
||||
std::vector<Device> devices;
|
||||
int N;
|
||||
|
Reference in New Issue
Block a user