[driver] fixed some bugs

This commit is contained in:
Philippe Tillet
2019-02-08 18:05:43 -05:00
parent a9d219cdf5
commit f697fcb887
5 changed files with 13 additions and 107 deletions

View File

@@ -40,7 +40,7 @@ class Platform
{
public:
//Accessors
std::string name() const;
std::string name() const { return "CUDA"; }
std::string version() const;
std::vector<Device> devices() const;
private:

View File

@@ -85,18 +85,6 @@ namespace driver
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
@@ -127,12 +115,6 @@ bool dispatch::cuinit(){
return cuda_ != nullptr;
}
bool dispatch::nvrtcinit(){
if(nvrtc_==nullptr)
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
return nvrtc_ != nullptr;
}
bool dispatch::nvmlinit(){
if(nvml_==nullptr)
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
@@ -194,13 +176,6 @@ CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUst
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)

View File

@@ -94,23 +94,6 @@ void check(CUresult err)
}
}
void check(nvrtcResult err){
using namespace exception::nvrtc;
switch(err)
{
case NVRTC_SUCCESS: break;
case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory();
case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure();
case NVRTC_ERROR_INVALID_INPUT: throw invalid_input();
case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program();
case NVRTC_ERROR_INVALID_OPTION: throw invalid_option();
case NVRTC_ERROR_COMPILATION: throw compilation();
case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure();
default: throw unknown_error();
}
}
void check(cublasStatus_t err){
using namespace exception::cublas;
switch(err)

View File

@@ -34,68 +34,19 @@ namespace tdl
namespace driver
{
CUjit_target_enum cutarget(Device::Architecture arch){
switch(arch){
case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20;
case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21;
case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30;
case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35;
case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37;
case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50;
case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52;
case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60;
case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61;
default: throw;
}
}
inline std::pair<int, int> ptx(std::pair<int, int> sm){
if(sm.first == 7) return {6, 0};
if(sm.first == 6) return {5, 0};
if(sm.first == 5) return {4, 3};
throw;
}
std::string Module::header(Device const & device){
auto cc = device.compute_capability();
auto vptx = ptx(cc);
std::string header;
header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n";
header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n";
header += ".address_size 64\n";
return header;
}
Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){
Module::Module(Context const & context, std::string const & source) : context_(context), source_(source){
ContextSwitcher ctx_switch(context_);
//Path to custom PTX compiler
std::string compiler = tools::getenv("ISAAC_PTXAS");
if(compiler.size()){
auto cc = context.device().compute_capability();
std::string out = context.cache_path() + "tmp.o";
std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second)
+ " -o " + out
+ " -ias \"" + source_ + "\"";
std::string cmd = compiler + opt;
if(std::system(cmd.c_str()) != 0)
throw;
dispatch::cuModuleLoad(&*cu_, out.c_str());
}
//JIT Compilation
else{
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
unsigned int errbufsize = 8096;
std::string errbuf(errbufsize, 0);
//CUjit_target_enum target = cutarget(context.device().architecture());
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
try{
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
}catch(exception::cuda::base const &){
std::cerr << "Compilation Failed! Log: " << std::endl;
std::cerr << errbuf << std::endl;
throw;
}
// JIT compile source-code
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
unsigned int errbufsize = 8096;
std::string errbuf(errbufsize, 0);
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
try{
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
}catch(exception::cuda::base const &){
std::cerr << "Compilation Failed! Log: " << std::endl;
std::cerr << errbuf << std::endl;
throw;
}
}

View File

@@ -37,9 +37,6 @@ std::string Platform::version() const{
return std::to_string(version);
}
std::string Platform::name() const
{ return (std::string)"CUDA"; }
std::vector<Device> Platform::devices() const{
std::vector<Device> devices;
int N;