[CI] run clang-format (#24)
This commit is contained in:
408
lib/driver/dispatch.cc
Executable file → Normal file
408
lib/driver/dispatch.cc
Executable file → Normal file
@@ -1,107 +1,152 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "triton/driver/dispatch.h"
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
//Helpers for function definition
|
||||
#define DEFINE0(init, hlib, ret, fname) ret dispatch::fname()\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname); }\
|
||||
void* dispatch::fname ## _;
|
||||
// Helpers for function definition
|
||||
#define DEFINE0(init, hlib, ret, fname) \
|
||||
ret dispatch::fname() { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE1(init, hlib, ret, fname, t1) ret dispatch::fname(t1 a)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE1(init, hlib, ret, fname, t1) \
|
||||
ret dispatch::fname(t1 a) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE2(init, hlib, ret, fname, t1, t2) ret dispatch::fname(t1 a, t2 b)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE2(init, hlib, ret, fname, t1, t2) \
|
||||
ret dispatch::fname(t1 a, t2 b) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) ret dispatch::fname(t1 a, t2 b, t3 c)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h, i); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
|
||||
t10) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, \
|
||||
t10 j) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h, i, j); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
|
||||
t10, t11) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, \
|
||||
t10 j, t11 k) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h, i, j, k); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE13(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m); }\
|
||||
void* dispatch::fname ## _;
|
||||
|
||||
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }\
|
||||
void* dispatch::fname ## _;
|
||||
#define DEFINE13(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
|
||||
t10, t11, t12, t13) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, \
|
||||
t10 j, t11 k, t12 l, t13 m) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h, i, j, k, l, m); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
|
||||
t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) \
|
||||
ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, \
|
||||
t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, \
|
||||
t18 r, t19 s) { \
|
||||
return f_impl<dispatch::init>(hlib, fname, fname##_, #fname, a, b, c, d, \
|
||||
e, f, g, h, i, j, k, l, m, n, o, p, q, r, \
|
||||
s); \
|
||||
} \
|
||||
void *dispatch::fname##_;
|
||||
|
||||
/* ------------------- *
|
||||
* CUDA
|
||||
* ------------------- */
|
||||
|
||||
bool dispatch::cuinit(){
|
||||
if(cuda_==nullptr){
|
||||
#ifdef _WIN32
|
||||
bool dispatch::cuinit() {
|
||||
if (cuda_ == nullptr) {
|
||||
#ifdef _WIN32
|
||||
cuda_ = dlopen("cudart64_110.dll", RTLD_LAZY);
|
||||
#else
|
||||
#else
|
||||
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
|
||||
if(!cuda_)
|
||||
if (!cuda_)
|
||||
cuda_ = dlopen("libcuda.so.1", RTLD_LAZY);
|
||||
#endif
|
||||
if(!cuda_)
|
||||
throw std::runtime_error("Could not find `libcuda.so`. Make sure it is in your LD_LIBRARY_PATH.");
|
||||
#endif
|
||||
if (!cuda_)
|
||||
throw std::runtime_error("Could not find `libcuda.so`. Make sure it is "
|
||||
"in your LD_LIBRARY_PATH.");
|
||||
}
|
||||
if(cuda_ == nullptr)
|
||||
if (cuda_ == nullptr)
|
||||
return false;
|
||||
CUresult (*fptr)(unsigned int);
|
||||
cuInit_ = dlsym(cuda_, "cuInit");
|
||||
@@ -112,21 +157,33 @@ bool dispatch::cuinit(){
|
||||
}
|
||||
|
||||
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
||||
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
|
||||
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
|
||||
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
#define CUDA_DEFINE2(ret, fname, t1, t2) \
|
||||
DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) \
|
||||
DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
|
||||
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) \
|
||||
DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
|
||||
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) \
|
||||
DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) \
|
||||
DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) \
|
||||
DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) \
|
||||
DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
|
||||
DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \
|
||||
DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \
|
||||
t11) \
|
||||
DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \
|
||||
t11)
|
||||
|
||||
// context management
|
||||
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
|
||||
CUDA_DEFINE3(CUresult, cuCtxCreate_v2, CUcontext *, unsigned int, CUdevice)
|
||||
CUDA_DEFINE1(CUresult, cuCtxGetDevice, CUdevice*)
|
||||
CUDA_DEFINE1(CUresult, cuCtxGetDevice, CUdevice *)
|
||||
CUDA_DEFINE2(CUresult, cuCtxEnablePeerAccess, CUcontext, unsigned int)
|
||||
CUDA_DEFINE1(CUresult, cuInit, unsigned int)
|
||||
CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
|
||||
@@ -134,59 +191,71 @@ CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
|
||||
CUDA_DEFINE2(CUresult, cuDeviceGet, CUdevice *, int)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetName, char *, int, CUdevice)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetPCIBusId, char *, int, CUdevice)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute, CUdevice)
|
||||
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int*)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute,
|
||||
CUdevice)
|
||||
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int *)
|
||||
|
||||
// link management
|
||||
CUDA_DEFINE8(CUresult, cuLinkAddData_v2, CUlinkState, CUjitInputType, void*, size_t, const char*, unsigned int, CUjit_option*, void**);
|
||||
CUDA_DEFINE4(CUresult, cuLinkCreate_v2, unsigned int, CUjit_option*, void**, CUlinkState*);
|
||||
CUDA_DEFINE8(CUresult, cuLinkAddData_v2, CUlinkState, CUjitInputType, void *,
|
||||
size_t, const char *, unsigned int, CUjit_option *, void **);
|
||||
CUDA_DEFINE4(CUresult, cuLinkCreate_v2, unsigned int, CUjit_option *, void **,
|
||||
CUlinkState *);
|
||||
CUDA_DEFINE1(CUresult, cuLinkDestroy, CUlinkState);
|
||||
CUDA_DEFINE3(CUresult, cuLinkComplete, CUlinkState, void**, size_t*);
|
||||
CUDA_DEFINE3(CUresult, cuLinkComplete, CUlinkState, void **, size_t *);
|
||||
// module management
|
||||
CUDA_DEFINE4(CUresult, cuModuleGetGlobal_v2, CUdeviceptr*, size_t*, CUmodule, const char*)
|
||||
CUDA_DEFINE4(CUresult, cuModuleGetGlobal_v2, CUdeviceptr *, size_t *, CUmodule,
|
||||
const char *)
|
||||
CUDA_DEFINE2(CUresult, cuModuleLoad, CUmodule *, const char *)
|
||||
CUDA_DEFINE1(CUresult, cuModuleUnload, CUmodule)
|
||||
CUDA_DEFINE2(CUresult, cuModuleLoadData, CUmodule *, const void *)
|
||||
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *, unsigned int, CUjit_option *, void **)
|
||||
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule, const char *)
|
||||
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *,
|
||||
unsigned int, CUjit_option *, void **)
|
||||
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule,
|
||||
const char *)
|
||||
// stream management
|
||||
CUDA_DEFINE2(CUresult, cuStreamCreate, CUstream *, unsigned int)
|
||||
CUDA_DEFINE1(CUresult, cuStreamSynchronize, CUstream)
|
||||
CUDA_DEFINE1(CUresult, cuStreamDestroy_v2, CUstream)
|
||||
CUDA_DEFINE2(CUresult, cuStreamGetCtx, CUstream, CUcontext*)
|
||||
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **)
|
||||
CUDA_DEFINE2(CUresult, cuStreamGetCtx, CUstream, CUcontext *)
|
||||
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int,
|
||||
unsigned int, unsigned int, unsigned int, unsigned int,
|
||||
unsigned int, CUstream, void **, void **)
|
||||
// function management
|
||||
CUDA_DEFINE3(CUresult, cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction)
|
||||
CUDA_DEFINE3(CUresult, cuFuncSetAttribute, CUfunction, CUfunction_attribute, int)
|
||||
CUDA_DEFINE3(CUresult, cuFuncGetAttribute, int *, CUfunction_attribute,
|
||||
CUfunction)
|
||||
CUDA_DEFINE3(CUresult, cuFuncSetAttribute, CUfunction, CUfunction_attribute,
|
||||
int)
|
||||
CUDA_DEFINE2(CUresult, cuFuncSetCacheConfig, CUfunction, CUfunc_cache)
|
||||
// memory management
|
||||
CUDA_DEFINE3(CUresult, cuMemcpyDtoH_v2, void *, CUdeviceptr, size_t)
|
||||
CUDA_DEFINE1(CUresult, cuMemFree_v2, CUdeviceptr)
|
||||
CUDA_DEFINE4(CUresult, cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t, CUstream)
|
||||
CUDA_DEFINE4(CUresult, cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t, CUstream)
|
||||
CUDA_DEFINE3(CUresult, cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t )
|
||||
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr*, size_t)
|
||||
CUDA_DEFINE3(CUresult, cuPointerGetAttribute, void*, CUpointer_attribute, CUdeviceptr)
|
||||
CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUstream)
|
||||
CUDA_DEFINE4(CUresult, cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t,
|
||||
CUstream)
|
||||
CUDA_DEFINE4(CUresult, cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t,
|
||||
CUstream)
|
||||
CUDA_DEFINE3(CUresult, cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t)
|
||||
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr *, size_t)
|
||||
CUDA_DEFINE3(CUresult, cuPointerGetAttribute, void *, CUpointer_attribute,
|
||||
CUdeviceptr)
|
||||
CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t,
|
||||
CUstream)
|
||||
// event management
|
||||
CUDA_DEFINE2(CUresult, cuEventCreate, CUevent *, unsigned int)
|
||||
CUDA_DEFINE3(CUresult, cuEventElapsedTime, float *, CUevent, CUevent)
|
||||
CUDA_DEFINE2(CUresult, cuEventRecord, CUevent, CUstream)
|
||||
CUDA_DEFINE1(CUresult, cuEventDestroy_v2, CUevent)
|
||||
|
||||
|
||||
|
||||
/* ------------------- *
|
||||
* NVML
|
||||
* ------------------- */
|
||||
bool dispatch::nvmlinit(){
|
||||
#ifdef _WIN32
|
||||
if(nvml_==nullptr)
|
||||
bool dispatch::nvmlinit() {
|
||||
#ifdef _WIN32
|
||||
if (nvml_ == nullptr)
|
||||
nvml_ = dlopen("nvml.dll", RTLD_LAZY);
|
||||
#else
|
||||
if(nvml_==nullptr)
|
||||
#else
|
||||
if (nvml_ == nullptr)
|
||||
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
||||
#endif
|
||||
#endif
|
||||
nvmlReturn_t (*fptr)();
|
||||
nvmlInit_v2_ = dlsym(nvml_, "nvmlInit_v2");
|
||||
*reinterpret_cast<void **>(&fptr) = nvmlInit_v2_;
|
||||
@@ -197,21 +266,27 @@ bool dispatch::nvmlinit(){
|
||||
|
||||
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
|
||||
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
|
||||
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||
#define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
|
||||
#define NVML_DEFINE2(ret, fname, t1, t2) \
|
||||
DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||
#define NVML_DEFINE3(ret, fname, t1, t2, t3) \
|
||||
DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
|
||||
|
||||
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t, unsigned int, unsigned int)
|
||||
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *,
|
||||
nvmlDevice_t *)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t,
|
||||
nvmlClockType_t, unsigned int *)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t,
|
||||
nvmlClockType_t, unsigned int *)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t,
|
||||
unsigned int, unsigned int)
|
||||
|
||||
/* ------------------- *
|
||||
* HIP
|
||||
* ------------------- */
|
||||
bool dispatch::hipinit(){
|
||||
if(hip_==nullptr)
|
||||
bool dispatch::hipinit() {
|
||||
if (hip_ == nullptr)
|
||||
hip_ = dlopen("libamdhip64.so", RTLD_LAZY);
|
||||
if(hip_ == nullptr)
|
||||
if (hip_ == nullptr)
|
||||
return false;
|
||||
hipError_t (*fptr)();
|
||||
hipInit_ = dlsym(hip_, "hipInit");
|
||||
@@ -222,23 +297,34 @@ bool dispatch::hipinit(){
|
||||
}
|
||||
|
||||
#define HIP_DEFINE1(ret, fname, t1) DEFINE1(hipinit, hip_, ret, fname, t1)
|
||||
#define HIP_DEFINE2(ret, fname, t1, t2) DEFINE2(hipinit, hip_, ret, fname, t1, t2)
|
||||
#define HIP_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(hipinit, hip_, ret, fname, t1, t2, t3)
|
||||
#define HIP_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(hipinit, hip_, ret, fname, t1, t2, t3, t4)
|
||||
#define HIP_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define HIP_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define HIP_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define HIP_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define HIP_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define HIP_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define HIP_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
#define HIP_DEFINE2(ret, fname, t1, t2) \
|
||||
DEFINE2(hipinit, hip_, ret, fname, t1, t2)
|
||||
#define HIP_DEFINE3(ret, fname, t1, t2, t3) \
|
||||
DEFINE3(hipinit, hip_, ret, fname, t1, t2, t3)
|
||||
#define HIP_DEFINE4(ret, fname, t1, t2, t3, t4) \
|
||||
DEFINE4(hipinit, hip_, ret, fname, t1, t2, t3, t4)
|
||||
#define HIP_DEFINE5(ret, fname, t1, t2, t3, t4, t5) \
|
||||
DEFINE5(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define HIP_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) \
|
||||
DEFINE6(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define HIP_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) \
|
||||
DEFINE7(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define HIP_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) \
|
||||
DEFINE8(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define HIP_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
|
||||
DEFINE9(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define HIP_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \
|
||||
DEFINE10(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define HIP_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \
|
||||
DEFINE11(hipinit, hip_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \
|
||||
t11)
|
||||
|
||||
// context management
|
||||
HIP_DEFINE1(hipError_t, hipCtxDestroy, hipCtx_t)
|
||||
HIP_DEFINE3(hipError_t, hipCtxCreate, hipCtx_t *, unsigned int, hipDevice_t)
|
||||
HIP_DEFINE1(hipError_t, hipCtxGetDevice, hipDevice_t*)
|
||||
HIP_DEFINE1(hipError_t, hipCtxGetDevice, hipDevice_t *)
|
||||
HIP_DEFINE1(hipError_t, hipCtxPushCurrent, hipCtx_t)
|
||||
HIP_DEFINE1(hipError_t, hipCtxPopCurrent, hipCtx_t*)
|
||||
HIP_DEFINE1(hipError_t, hipCtxPopCurrent, hipCtx_t *)
|
||||
HIP_DEFINE2(hipError_t, hipCtxEnablePeerAccess, hipCtx_t, unsigned int)
|
||||
HIP_DEFINE1(hipError_t, hipInit, unsigned int)
|
||||
HIP_DEFINE1(hipError_t, hipDriverGetVersion, int *)
|
||||
@@ -246,56 +332,64 @@ HIP_DEFINE1(hipError_t, hipDriverGetVersion, int *)
|
||||
HIP_DEFINE2(hipError_t, hipGetDevice, hipDevice_t *, int)
|
||||
HIP_DEFINE3(hipError_t, hipDeviceGetName, char *, int, hipDevice_t)
|
||||
HIP_DEFINE3(hipError_t, hipDeviceGetPCIBusId, char *, int, hipDevice_t)
|
||||
HIP_DEFINE3(hipError_t, hipDeviceGetAttribute, int *, hipDeviceAttribute_t, hipDevice_t)
|
||||
HIP_DEFINE3(hipError_t, hipDeviceGetAttribute, int *, hipDeviceAttribute_t,
|
||||
hipDevice_t)
|
||||
HIP_DEFINE1(hipError_t, hipGetDeviceCount, int *)
|
||||
// module management
|
||||
HIP_DEFINE4(hipError_t, hipModuleGetGlobal, hipDeviceptr_t*, size_t*, hipModule_t, const char*)
|
||||
HIP_DEFINE4(hipError_t, hipModuleGetGlobal, hipDeviceptr_t *, size_t *,
|
||||
hipModule_t, const char *)
|
||||
HIP_DEFINE2(hipError_t, hipModuleLoad, hipModule_t *, const char *)
|
||||
HIP_DEFINE1(hipError_t, hipModuleUnload, hipModule_t)
|
||||
HIP_DEFINE2(hipError_t, hipModuleLoadData, hipModule_t *, const void *)
|
||||
HIP_DEFINE5(hipError_t, hipModuleLoadDataEx, hipModule_t *, const void *, unsigned int, hipJitOption *, void **)
|
||||
HIP_DEFINE3(hipError_t, hipModuleGetFunction, hipFunction_t *, hipModule_t, const char *)
|
||||
HIP_DEFINE5(hipError_t, hipModuleLoadDataEx, hipModule_t *, const void *,
|
||||
unsigned int, hipJitOption *, void **)
|
||||
HIP_DEFINE3(hipError_t, hipModuleGetFunction, hipFunction_t *, hipModule_t,
|
||||
const char *)
|
||||
// stream management
|
||||
HIP_DEFINE2(hipError_t, hipStreamCreate, hipStream_t *, unsigned int)
|
||||
HIP_DEFINE1(hipError_t, hipStreamSynchronize, hipStream_t)
|
||||
HIP_DEFINE1(hipError_t, hipStreamDestroy, hipStream_t)
|
||||
HIP_DEFINE11(hipError_t, hipModuleLaunchKernel, hipFunction_t, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, hipStream_t, void **, void **)
|
||||
HIP_DEFINE11(hipError_t, hipModuleLaunchKernel, hipFunction_t, unsigned int,
|
||||
unsigned int, unsigned int, unsigned int, unsigned int,
|
||||
unsigned int, unsigned int, hipStream_t, void **, void **)
|
||||
// function management
|
||||
HIP_DEFINE2(hipError_t, hipFuncGetAttributes, hipFuncAttributes*, void*)
|
||||
HIP_DEFINE2(hipError_t, hipFuncGetAttributes, hipFuncAttributes *, void *)
|
||||
HIP_DEFINE2(hipError_t, hipFuncSetCacheConfig, hipFunction_t, hipFuncCache_t)
|
||||
// memory management
|
||||
HIP_DEFINE3(hipError_t, hipMemcpyDtoH, void *, hipDeviceptr_t, size_t)
|
||||
HIP_DEFINE1(hipError_t, hipFree, hipDeviceptr_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemcpyDtoHAsync, void *, hipDeviceptr_t, size_t, hipStream_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemcpyHtoDAsync, hipDeviceptr_t, const void *, size_t, hipStream_t)
|
||||
HIP_DEFINE3(hipError_t, hipMemcpyHtoD, hipDeviceptr_t, const void *, size_t )
|
||||
HIP_DEFINE2(hipError_t, hipMalloc, hipDeviceptr_t*, size_t)
|
||||
HIP_DEFINE3(hipError_t, hipPointerGetAttribute, void*, CUpointer_attribute, hipDeviceptr_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemsetD8Async, hipDeviceptr_t, unsigned char, size_t, hipStream_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemcpyDtoHAsync, void *, hipDeviceptr_t, size_t,
|
||||
hipStream_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemcpyHtoDAsync, hipDeviceptr_t, const void *,
|
||||
size_t, hipStream_t)
|
||||
HIP_DEFINE3(hipError_t, hipMemcpyHtoD, hipDeviceptr_t, const void *, size_t)
|
||||
HIP_DEFINE2(hipError_t, hipMalloc, hipDeviceptr_t *, size_t)
|
||||
HIP_DEFINE3(hipError_t, hipPointerGetAttribute, void *, CUpointer_attribute,
|
||||
hipDeviceptr_t)
|
||||
HIP_DEFINE4(hipError_t, hipMemsetD8Async, hipDeviceptr_t, unsigned char, size_t,
|
||||
hipStream_t)
|
||||
// event management
|
||||
HIP_DEFINE2(hipError_t, hipEventCreate, hipEvent_t *, unsigned int)
|
||||
HIP_DEFINE3(hipError_t, hipEventElapsedTime, float *, hipEvent_t, hipEvent_t)
|
||||
HIP_DEFINE2(hipError_t, hipEventRecord, hipEvent_t, hipStream_t)
|
||||
HIP_DEFINE1(hipError_t, hipEventDestroy, hipEvent_t)
|
||||
|
||||
|
||||
/* ------------------- *
|
||||
* COMMON
|
||||
* ------------------- */
|
||||
|
||||
// Release
|
||||
void dispatch::release(){
|
||||
if(cuda_){
|
||||
void dispatch::release() {
|
||||
if (cuda_) {
|
||||
dlclose(cuda_);
|
||||
cuda_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void* dispatch::cuda_;
|
||||
void* dispatch::nvml_;
|
||||
void* dispatch::nvmlInit_v2_;
|
||||
void* dispatch::hip_;
|
||||
void *dispatch::cuda_;
|
||||
void *dispatch::nvml_;
|
||||
void *dispatch::nvmlInit_v2_;
|
||||
void *dispatch::hip_;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
410
lib/driver/error.cc
Executable file → Normal file
410
lib/driver/error.cc
Executable file → Normal file
@@ -1,166 +1,270 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "triton/driver/error.h"
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
void check(CUresult err)
|
||||
{
|
||||
void check(CUresult err) {
|
||||
using namespace exception::cuda;
|
||||
switch(err)
|
||||
{
|
||||
case CUDA_SUCCESS : break;
|
||||
case CUDA_ERROR_INVALID_VALUE : throw invalid_value();
|
||||
case CUDA_ERROR_OUT_OF_MEMORY : throw out_of_memory();
|
||||
case CUDA_ERROR_NOT_INITIALIZED : throw not_initialized();
|
||||
case CUDA_ERROR_DEINITIALIZED : throw deinitialized();
|
||||
case CUDA_ERROR_PROFILER_DISABLED : throw profiler_disabled();
|
||||
case CUDA_ERROR_PROFILER_NOT_INITIALIZED : throw profiler_not_initialized();
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STARTED : throw profiler_already_started();
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STOPPED : throw profiler_already_stopped();
|
||||
case CUDA_ERROR_NO_DEVICE : throw no_device();
|
||||
case CUDA_ERROR_INVALID_DEVICE : throw invalid_device();
|
||||
case CUDA_ERROR_INVALID_IMAGE : throw invalid_image();
|
||||
case CUDA_ERROR_INVALID_CONTEXT : throw invalid_context();
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT : throw context_already_current();
|
||||
case CUDA_ERROR_MAP_FAILED : throw map_failed();
|
||||
case CUDA_ERROR_UNMAP_FAILED : throw unmap_failed();
|
||||
case CUDA_ERROR_ARRAY_IS_MAPPED : throw array_is_mapped();
|
||||
case CUDA_ERROR_ALREADY_MAPPED : throw already_mapped();
|
||||
case CUDA_ERROR_NO_BINARY_FOR_GPU : throw no_binary_for_gpu();
|
||||
case CUDA_ERROR_ALREADY_ACQUIRED : throw already_acquired();
|
||||
case CUDA_ERROR_NOT_MAPPED : throw not_mapped();
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY : throw not_mapped_as_array();
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_POINTER : throw not_mapped_as_pointer();
|
||||
case CUDA_ERROR_ECC_UNCORRECTABLE : throw ecc_uncorrectable();
|
||||
case CUDA_ERROR_UNSUPPORTED_LIMIT : throw unsupported_limit();
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE : throw context_already_in_use();
|
||||
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED : throw peer_access_unsupported();
|
||||
case CUDA_ERROR_INVALID_PTX : throw invalid_ptx();
|
||||
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT : throw invalid_graphics_context();
|
||||
case CUDA_ERROR_INVALID_SOURCE : throw invalid_source();
|
||||
case CUDA_ERROR_FILE_NOT_FOUND : throw file_not_found();
|
||||
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND : throw shared_object_symbol_not_found();
|
||||
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED : throw shared_object_init_failed();
|
||||
case CUDA_ERROR_OPERATING_SYSTEM : throw operating_system();
|
||||
case CUDA_ERROR_INVALID_HANDLE : throw invalid_handle();
|
||||
case CUDA_ERROR_NOT_FOUND : throw not_found();
|
||||
case CUDA_ERROR_NOT_READY : throw not_ready();
|
||||
case CUDA_ERROR_ILLEGAL_ADDRESS : throw illegal_address();
|
||||
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES : throw launch_out_of_resources();
|
||||
case CUDA_ERROR_LAUNCH_TIMEOUT : throw launch_timeout();
|
||||
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING : throw launch_incompatible_texturing();
|
||||
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED : throw peer_access_already_enabled();
|
||||
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED : throw peer_access_not_enabled();
|
||||
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE : throw primary_context_active();
|
||||
case CUDA_ERROR_CONTEXT_IS_DESTROYED : throw context_is_destroyed();
|
||||
case CUDA_ERROR_ASSERT : throw assert_error();
|
||||
case CUDA_ERROR_TOO_MANY_PEERS : throw too_many_peers();
|
||||
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED : throw host_memory_already_registered();
|
||||
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED : throw host_memory_not_registered();
|
||||
case CUDA_ERROR_HARDWARE_STACK_ERROR : throw hardware_stack_error();
|
||||
case CUDA_ERROR_ILLEGAL_INSTRUCTION : throw illegal_instruction();
|
||||
case CUDA_ERROR_MISALIGNED_ADDRESS : throw misaligned_address();
|
||||
case CUDA_ERROR_INVALID_ADDRESS_SPACE : throw invalid_address_space();
|
||||
case CUDA_ERROR_INVALID_PC : throw invalid_pc();
|
||||
case CUDA_ERROR_LAUNCH_FAILED : throw launch_failed();
|
||||
case CUDA_ERROR_NOT_PERMITTED : throw not_permitted();
|
||||
case CUDA_ERROR_NOT_SUPPORTED : throw not_supported();
|
||||
case CUDA_ERROR_UNKNOWN : throw unknown();
|
||||
default : throw unknown();
|
||||
switch (err) {
|
||||
case CUDA_SUCCESS:
|
||||
break;
|
||||
case CUDA_ERROR_INVALID_VALUE:
|
||||
throw invalid_value();
|
||||
case CUDA_ERROR_OUT_OF_MEMORY:
|
||||
throw out_of_memory();
|
||||
case CUDA_ERROR_NOT_INITIALIZED:
|
||||
throw not_initialized();
|
||||
case CUDA_ERROR_DEINITIALIZED:
|
||||
throw deinitialized();
|
||||
case CUDA_ERROR_PROFILER_DISABLED:
|
||||
throw profiler_disabled();
|
||||
case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
|
||||
throw profiler_not_initialized();
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STARTED:
|
||||
throw profiler_already_started();
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
|
||||
throw profiler_already_stopped();
|
||||
case CUDA_ERROR_NO_DEVICE:
|
||||
throw no_device();
|
||||
case CUDA_ERROR_INVALID_DEVICE:
|
||||
throw invalid_device();
|
||||
case CUDA_ERROR_INVALID_IMAGE:
|
||||
throw invalid_image();
|
||||
case CUDA_ERROR_INVALID_CONTEXT:
|
||||
throw invalid_context();
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
|
||||
throw context_already_current();
|
||||
case CUDA_ERROR_MAP_FAILED:
|
||||
throw map_failed();
|
||||
case CUDA_ERROR_UNMAP_FAILED:
|
||||
throw unmap_failed();
|
||||
case CUDA_ERROR_ARRAY_IS_MAPPED:
|
||||
throw array_is_mapped();
|
||||
case CUDA_ERROR_ALREADY_MAPPED:
|
||||
throw already_mapped();
|
||||
case CUDA_ERROR_NO_BINARY_FOR_GPU:
|
||||
throw no_binary_for_gpu();
|
||||
case CUDA_ERROR_ALREADY_ACQUIRED:
|
||||
throw already_acquired();
|
||||
case CUDA_ERROR_NOT_MAPPED:
|
||||
throw not_mapped();
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
|
||||
throw not_mapped_as_array();
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
|
||||
throw not_mapped_as_pointer();
|
||||
case CUDA_ERROR_ECC_UNCORRECTABLE:
|
||||
throw ecc_uncorrectable();
|
||||
case CUDA_ERROR_UNSUPPORTED_LIMIT:
|
||||
throw unsupported_limit();
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
|
||||
throw context_already_in_use();
|
||||
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
|
||||
throw peer_access_unsupported();
|
||||
case CUDA_ERROR_INVALID_PTX:
|
||||
throw invalid_ptx();
|
||||
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
|
||||
throw invalid_graphics_context();
|
||||
case CUDA_ERROR_INVALID_SOURCE:
|
||||
throw invalid_source();
|
||||
case CUDA_ERROR_FILE_NOT_FOUND:
|
||||
throw file_not_found();
|
||||
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
|
||||
throw shared_object_symbol_not_found();
|
||||
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
|
||||
throw shared_object_init_failed();
|
||||
case CUDA_ERROR_OPERATING_SYSTEM:
|
||||
throw operating_system();
|
||||
case CUDA_ERROR_INVALID_HANDLE:
|
||||
throw invalid_handle();
|
||||
case CUDA_ERROR_NOT_FOUND:
|
||||
throw not_found();
|
||||
case CUDA_ERROR_NOT_READY:
|
||||
throw not_ready();
|
||||
case CUDA_ERROR_ILLEGAL_ADDRESS:
|
||||
throw illegal_address();
|
||||
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
|
||||
throw launch_out_of_resources();
|
||||
case CUDA_ERROR_LAUNCH_TIMEOUT:
|
||||
throw launch_timeout();
|
||||
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
|
||||
throw launch_incompatible_texturing();
|
||||
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
|
||||
throw peer_access_already_enabled();
|
||||
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
|
||||
throw peer_access_not_enabled();
|
||||
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
|
||||
throw primary_context_active();
|
||||
case CUDA_ERROR_CONTEXT_IS_DESTROYED:
|
||||
throw context_is_destroyed();
|
||||
case CUDA_ERROR_ASSERT:
|
||||
throw assert_error();
|
||||
case CUDA_ERROR_TOO_MANY_PEERS:
|
||||
throw too_many_peers();
|
||||
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
|
||||
throw host_memory_already_registered();
|
||||
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
|
||||
throw host_memory_not_registered();
|
||||
case CUDA_ERROR_HARDWARE_STACK_ERROR:
|
||||
throw hardware_stack_error();
|
||||
case CUDA_ERROR_ILLEGAL_INSTRUCTION:
|
||||
throw illegal_instruction();
|
||||
case CUDA_ERROR_MISALIGNED_ADDRESS:
|
||||
throw misaligned_address();
|
||||
case CUDA_ERROR_INVALID_ADDRESS_SPACE:
|
||||
throw invalid_address_space();
|
||||
case CUDA_ERROR_INVALID_PC:
|
||||
throw invalid_pc();
|
||||
case CUDA_ERROR_LAUNCH_FAILED:
|
||||
throw launch_failed();
|
||||
case CUDA_ERROR_NOT_PERMITTED:
|
||||
throw not_permitted();
|
||||
case CUDA_ERROR_NOT_SUPPORTED:
|
||||
throw not_supported();
|
||||
case CUDA_ERROR_UNKNOWN:
|
||||
throw unknown();
|
||||
default:
|
||||
throw unknown();
|
||||
}
|
||||
}
|
||||
|
||||
void check(hipError_t error) {
|
||||
using namespace exception::hip;
|
||||
switch(error)
|
||||
{
|
||||
case hipSuccess : break;
|
||||
case hipErrorInvalidValue : throw invalid_value();
|
||||
case hipErrorMemoryAllocation : throw out_of_memory();
|
||||
case hipErrorNotInitialized : throw not_initialized();
|
||||
case hipErrorDeinitialized : throw deinitialized();
|
||||
case hipErrorProfilerDisabled : throw profiler_disabled();
|
||||
case hipErrorProfilerNotInitialized : throw profiler_not_initialized();
|
||||
case hipErrorProfilerAlreadyStarted : throw profiler_already_started();
|
||||
case hipErrorProfilerAlreadyStopped : throw profiler_already_stopped();
|
||||
case hipErrorNoDevice : throw no_device();
|
||||
case hipErrorInvalidSymbol : throw invalid_symbol();
|
||||
case hipErrorInvalidDevice : throw invalid_device();
|
||||
case hipErrorInvalidImage : throw invalid_image();
|
||||
case hipErrorInvalidContext : throw invalid_context();
|
||||
case hipErrorContextAlreadyCurrent : throw context_already_current();
|
||||
case hipErrorMapFailed : throw map_failed();
|
||||
case hipErrorUnmapFailed : throw unmap_failed();
|
||||
case hipErrorArrayIsMapped : throw array_is_mapped();
|
||||
case hipErrorAlreadyMapped : throw already_mapped();
|
||||
case hipErrorNoBinaryForGpu : throw no_binary_for_gpu();
|
||||
case hipErrorAlreadyAcquired : throw already_acquired();
|
||||
case hipErrorNotMapped : throw not_mapped();
|
||||
case hipErrorNotMappedAsArray : throw not_mapped_as_array();
|
||||
case hipErrorNotMappedAsPointer : throw not_mapped_as_pointer();
|
||||
case hipErrorECCNotCorrectable : throw ecc_uncorrectable();
|
||||
case hipErrorUnsupportedLimit : throw unsupported_limit();
|
||||
case hipErrorContextAlreadyInUse : throw context_already_in_use();
|
||||
case hipErrorPeerAccessUnsupported : throw peer_access_unsupported();
|
||||
case hipErrorInvalidKernelFile : throw invalid_ptx();
|
||||
case hipErrorInvalidGraphicsContext : throw invalid_graphics_context();
|
||||
case hipErrorInvalidSource : throw invalid_source();
|
||||
case hipErrorFileNotFound : throw file_not_found();
|
||||
case hipErrorSharedObjectSymbolNotFound : throw shared_object_symbol_not_found();
|
||||
case hipErrorSharedObjectInitFailed : throw shared_object_init_failed();
|
||||
case hipErrorOperatingSystem : throw operating_system();
|
||||
case hipErrorInvalidResourceHandle : throw invalid_handle();
|
||||
case hipErrorNotFound : throw not_found();
|
||||
case hipErrorNotReady : throw not_ready();
|
||||
case hipErrorIllegalAddress : throw illegal_address();
|
||||
case hipErrorLaunchOutOfResources : throw launch_out_of_resources();
|
||||
case hipErrorLaunchTimeOut : throw launch_timeout();
|
||||
// case hipErrorLaunchIncompatibleTexturing : throw launch_incompatible_texturing();
|
||||
case hipErrorPeerAccessAlreadyEnabled : throw peer_access_already_enabled();
|
||||
case hipErrorPeerAccessNotEnabled : throw peer_access_not_enabled();
|
||||
// case hipErrorPrimaryContextActive : throw primary_context_active();
|
||||
// case hipErrorContextIsDestroyed : throw context_is_destroyed();
|
||||
case hipErrorAssert : throw assert_error();
|
||||
// case hipErrorTooManyPeers : throw too_many_peers();
|
||||
case hipErrorHostMemoryAlreadyRegistered : throw host_memory_already_registered();
|
||||
case hipErrorHostMemoryNotRegistered : throw host_memory_not_registered();
|
||||
// case hipErrorHardwareStackError : throw hardware_stack_error();
|
||||
// case hipErrorIllegalInstruction : throw illegal_instruction();
|
||||
// case hipErrorMisalignedAddress : throw misaligned_address();
|
||||
// case hipErrorInvalidAddressSpace : throw invalid_address_space();
|
||||
// case hipErrorInvalidPc : throw invalid_pc();
|
||||
case hipErrorLaunchFailure : throw launch_failed();
|
||||
// case hipErrorNotPermitted : throw not_permitted();
|
||||
case hipErrorNotSupported : throw not_supported();
|
||||
case hipErrorUnknown : throw unknown();
|
||||
default : throw unknown();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
switch (error) {
|
||||
case hipSuccess:
|
||||
break;
|
||||
case hipErrorInvalidValue:
|
||||
throw invalid_value();
|
||||
case hipErrorMemoryAllocation:
|
||||
throw out_of_memory();
|
||||
case hipErrorNotInitialized:
|
||||
throw not_initialized();
|
||||
case hipErrorDeinitialized:
|
||||
throw deinitialized();
|
||||
case hipErrorProfilerDisabled:
|
||||
throw profiler_disabled();
|
||||
case hipErrorProfilerNotInitialized:
|
||||
throw profiler_not_initialized();
|
||||
case hipErrorProfilerAlreadyStarted:
|
||||
throw profiler_already_started();
|
||||
case hipErrorProfilerAlreadyStopped:
|
||||
throw profiler_already_stopped();
|
||||
case hipErrorNoDevice:
|
||||
throw no_device();
|
||||
case hipErrorInvalidSymbol:
|
||||
throw invalid_symbol();
|
||||
case hipErrorInvalidDevice:
|
||||
throw invalid_device();
|
||||
case hipErrorInvalidImage:
|
||||
throw invalid_image();
|
||||
case hipErrorInvalidContext:
|
||||
throw invalid_context();
|
||||
case hipErrorContextAlreadyCurrent:
|
||||
throw context_already_current();
|
||||
case hipErrorMapFailed:
|
||||
throw map_failed();
|
||||
case hipErrorUnmapFailed:
|
||||
throw unmap_failed();
|
||||
case hipErrorArrayIsMapped:
|
||||
throw array_is_mapped();
|
||||
case hipErrorAlreadyMapped:
|
||||
throw already_mapped();
|
||||
case hipErrorNoBinaryForGpu:
|
||||
throw no_binary_for_gpu();
|
||||
case hipErrorAlreadyAcquired:
|
||||
throw already_acquired();
|
||||
case hipErrorNotMapped:
|
||||
throw not_mapped();
|
||||
case hipErrorNotMappedAsArray:
|
||||
throw not_mapped_as_array();
|
||||
case hipErrorNotMappedAsPointer:
|
||||
throw not_mapped_as_pointer();
|
||||
case hipErrorECCNotCorrectable:
|
||||
throw ecc_uncorrectable();
|
||||
case hipErrorUnsupportedLimit:
|
||||
throw unsupported_limit();
|
||||
case hipErrorContextAlreadyInUse:
|
||||
throw context_already_in_use();
|
||||
case hipErrorPeerAccessUnsupported:
|
||||
throw peer_access_unsupported();
|
||||
case hipErrorInvalidKernelFile:
|
||||
throw invalid_ptx();
|
||||
case hipErrorInvalidGraphicsContext:
|
||||
throw invalid_graphics_context();
|
||||
case hipErrorInvalidSource:
|
||||
throw invalid_source();
|
||||
case hipErrorFileNotFound:
|
||||
throw file_not_found();
|
||||
case hipErrorSharedObjectSymbolNotFound:
|
||||
throw shared_object_symbol_not_found();
|
||||
case hipErrorSharedObjectInitFailed:
|
||||
throw shared_object_init_failed();
|
||||
case hipErrorOperatingSystem:
|
||||
throw operating_system();
|
||||
case hipErrorInvalidResourceHandle:
|
||||
throw invalid_handle();
|
||||
case hipErrorNotFound:
|
||||
throw not_found();
|
||||
case hipErrorNotReady:
|
||||
throw not_ready();
|
||||
case hipErrorIllegalAddress:
|
||||
throw illegal_address();
|
||||
case hipErrorLaunchOutOfResources:
|
||||
throw launch_out_of_resources();
|
||||
case hipErrorLaunchTimeOut:
|
||||
throw launch_timeout();
|
||||
// case hipErrorLaunchIncompatibleTexturing : throw
|
||||
// launch_incompatible_texturing();
|
||||
case hipErrorPeerAccessAlreadyEnabled:
|
||||
throw peer_access_already_enabled();
|
||||
case hipErrorPeerAccessNotEnabled:
|
||||
throw peer_access_not_enabled();
|
||||
// case hipErrorPrimaryContextActive : throw primary_context_active();
|
||||
// case hipErrorContextIsDestroyed : throw context_is_destroyed();
|
||||
case hipErrorAssert:
|
||||
throw assert_error();
|
||||
// case hipErrorTooManyPeers : throw too_many_peers();
|
||||
case hipErrorHostMemoryAlreadyRegistered:
|
||||
throw host_memory_already_registered();
|
||||
case hipErrorHostMemoryNotRegistered:
|
||||
throw host_memory_not_registered();
|
||||
// case hipErrorHardwareStackError : throw hardware_stack_error();
|
||||
// case hipErrorIllegalInstruction : throw illegal_instruction();
|
||||
// case hipErrorMisalignedAddress : throw misaligned_address();
|
||||
// case hipErrorInvalidAddressSpace : throw invalid_address_space();
|
||||
// case hipErrorInvalidPc : throw invalid_pc();
|
||||
case hipErrorLaunchFailure:
|
||||
throw launch_failed();
|
||||
// case hipErrorNotPermitted : throw not_permitted();
|
||||
case hipErrorNotSupported:
|
||||
throw not_supported();
|
||||
case hipErrorUnknown:
|
||||
throw unknown();
|
||||
default:
|
||||
throw unknown();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
@@ -1,73 +1,73 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include <fstream>
|
||||
#if __has_include(<unistd.h>)
|
||||
#include <unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
#include "triton/driver/llvm.h"
|
||||
#include "triton/driver/dispatch.h"
|
||||
#include "triton/driver/error.h"
|
||||
#include "triton/driver/llvm.h"
|
||||
#include "triton/tools/sha1.hpp"
|
||||
#include "triton/tools/sys/exec.hpp"
|
||||
#include "triton/tools/sys/getenv.hpp"
|
||||
#include "triton/tools/sys/mkdir.hpp"
|
||||
#include "triton/tools/sys/exec.hpp"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/ExecutionEngine/ExecutionEngine.h"
|
||||
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/IR/IRPrintingPasses.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/MC/TargetRegistry.h"
|
||||
#include "llvm/Support/CodeGen.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/ExecutionEngine/ExecutionEngine.h"
|
||||
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/Cloning.h"
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
|
||||
// begin AMD stuff
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Support/Program.h"
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
// end AMD stuff
|
||||
|
||||
extern "C"{
|
||||
int set_curterm(char* nterm){ return 0; }
|
||||
int del_curterm(char* nterm){ return 0; }
|
||||
int tigetnum(char *capname) { return 0; }
|
||||
int setupterm(char *term, int fildes, int *errret) { return 0; }
|
||||
extern "C" {
|
||||
int set_curterm(char *nterm) { return 0; }
|
||||
int del_curterm(char *nterm) { return 0; }
|
||||
int tigetnum(char *capname) { return 0; }
|
||||
int setupterm(char *term, int fildes, int *errret) { return 0; }
|
||||
}
|
||||
|
||||
namespace triton{
|
||||
namespace driver{
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
void init_llvm() {
|
||||
LLVMInitializeNVPTXTargetInfo();
|
||||
@@ -80,82 +80,93 @@ void init_llvm() {
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
static bool find_and_replace(std::string& str, const std::string& begin, const std::string& end, const std::string& target){
|
||||
static bool find_and_replace(std::string &str, const std::string &begin,
|
||||
const std::string &end,
|
||||
const std::string &target) {
|
||||
size_t start_replace = str.find(begin);
|
||||
size_t end_replace = str.find(end, start_replace);
|
||||
if(start_replace == std::string::npos)
|
||||
if (start_replace == std::string::npos)
|
||||
return false;
|
||||
str.replace(start_replace, end_replace + 1 - start_replace, target);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string path_to_ptxas(int& version) {
|
||||
std::string path_to_ptxas(int &version) {
|
||||
std::vector<std::string> rets;
|
||||
std::string ret;
|
||||
// search pathes for ptxas
|
||||
std::vector<std::string> ptxas_prefixes = {"", "/usr/local/cuda/bin/"};
|
||||
std::string triton_ptxas = tools::getenv("TRITON_PTXAS_PATH");
|
||||
if(!triton_ptxas.empty())
|
||||
if (!triton_ptxas.empty())
|
||||
ptxas_prefixes.insert(ptxas_prefixes.begin(), triton_ptxas);
|
||||
// see what path for ptxas are valid
|
||||
std::vector<std::string> working_ptxas;
|
||||
for(std::string prefix: ptxas_prefixes){
|
||||
for (std::string prefix : ptxas_prefixes) {
|
||||
std::string ptxas = prefix + "ptxas";
|
||||
bool works = tools::exec(ptxas + " --version 2>&1", ret) == 0;
|
||||
if(works) {
|
||||
if (works) {
|
||||
working_ptxas.push_back(ptxas);
|
||||
rets.push_back(ret);
|
||||
}
|
||||
}
|
||||
// error if no working ptxas was found
|
||||
if(working_ptxas.empty())
|
||||
throw std::runtime_error("`ptxas` was searched in TRITON_PTXAS_PATH, /usr/local/cuda/bin/ or PATH"
|
||||
if (working_ptxas.empty())
|
||||
throw std::runtime_error("`ptxas` was searched in TRITON_PTXAS_PATH, "
|
||||
"/usr/local/cuda/bin/ or PATH"
|
||||
" but a working version could not be found.");
|
||||
std::string ptxas = working_ptxas.front();
|
||||
// parse version
|
||||
std::regex version_regex("release (\\d+)\\.(\\d+)");
|
||||
std::smatch match;
|
||||
bool found = false;
|
||||
// currently choosing the first ptxas. Other logics can be implemented in future
|
||||
for(std::string ret : rets) {
|
||||
if(std::regex_search(ret, match, version_regex)){
|
||||
// currently choosing the first ptxas. Other logics can be implemented in
|
||||
// future
|
||||
for (std::string ret : rets) {
|
||||
if (std::regex_search(ret, match, version_regex)) {
|
||||
int major = std::stoi(match[1]);
|
||||
int minor = std::stoi(match[2]);
|
||||
version = major*1000 + minor*10;
|
||||
version = major * 1000 + minor * 10;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( not found) {
|
||||
if (not found) {
|
||||
throw std::runtime_error("Error in parsing version");
|
||||
}
|
||||
return ptxas;
|
||||
}
|
||||
|
||||
|
||||
int vptx(int version){
|
||||
if(version >= 11040) return 74;
|
||||
if(version >= 11030) return 73;
|
||||
if(version >= 11020) return 72;
|
||||
if(version >= 11010) return 71;
|
||||
if(version >= 11000) return 70;
|
||||
if(version >= 10020) return 65;
|
||||
if(version >= 10010) return 64;
|
||||
if(version >= 10000) return 63;
|
||||
int vptx(int version) {
|
||||
if (version >= 11040)
|
||||
return 74;
|
||||
if (version >= 11030)
|
||||
return 73;
|
||||
if (version >= 11020)
|
||||
return 72;
|
||||
if (version >= 11010)
|
||||
return 71;
|
||||
if (version >= 11000)
|
||||
return 70;
|
||||
if (version >= 10020)
|
||||
return 65;
|
||||
if (version >= 10010)
|
||||
return 64;
|
||||
if (version >= 10000)
|
||||
return 63;
|
||||
throw std::runtime_error("Triton requires CUDA 10+");
|
||||
}
|
||||
|
||||
std::string llir_to_ptx(llvm::Module* module, int cc, int version){
|
||||
std::string llir_to_ptx(llvm::Module *module, int cc, int version) {
|
||||
// LLVM version in use may not officially support target hardware
|
||||
int max_nvvm_cc = 75;
|
||||
int max_nvvm_ptx = 74;
|
||||
// options
|
||||
auto options = llvm::cl::getRegisteredOptions();
|
||||
auto* short_ptr = static_cast<llvm::cl::opt<bool>*>(options["nvptx-short-ptr"]);
|
||||
auto *short_ptr =
|
||||
static_cast<llvm::cl::opt<bool> *>(options["nvptx-short-ptr"]);
|
||||
assert(short_ptr);
|
||||
short_ptr->setValue(true);
|
||||
// compute capability
|
||||
@@ -170,7 +181,8 @@ std::string llir_to_ptx(llvm::Module* module, int cc, int version){
|
||||
std::string proc = "sm_" + std::to_string(std::min(cc, max_nvvm_cc));
|
||||
std::string layout = "";
|
||||
std::string features = "";
|
||||
// std::string features = "+ptx" + std::to_string(std::min(ptx, max_nvvm_ptx));
|
||||
// std::string features = "+ptx" + std::to_string(std::min(ptx,
|
||||
// max_nvvm_ptx));
|
||||
init_llvm();
|
||||
// verify and store llvm
|
||||
llvm::legacy::PassManager pm;
|
||||
@@ -181,16 +193,18 @@ std::string llir_to_ptx(llvm::Module* module, int cc, int version){
|
||||
// create machine
|
||||
module->setTargetTriple(triple);
|
||||
std::string error;
|
||||
auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
auto target =
|
||||
llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
llvm::TargetOptions opt;
|
||||
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||
opt.UnsafeFPMath = false;
|
||||
opt.NoInfsFPMath = false;
|
||||
opt.NoNaNsFPMath = true;
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
|
||||
llvm::Reloc::PIC_, llvm::None, llvm::CodeGenOpt::Aggressive);
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(
|
||||
module->getTargetTriple(), proc, features, opt, llvm::Reloc::PIC_,
|
||||
llvm::None, llvm::CodeGenOpt::Aggressive);
|
||||
// set data layout
|
||||
if(layout.empty())
|
||||
if (layout.empty())
|
||||
module->setDataLayout(machine->createDataLayout());
|
||||
else
|
||||
module->setDataLayout(layout);
|
||||
@@ -200,19 +214,25 @@ std::string llir_to_ptx(llvm::Module* module, int cc, int version){
|
||||
llvm::legacy::PassManager pass;
|
||||
llvm::raw_svector_ostream stream(buffer);
|
||||
// emit
|
||||
machine->addPassesToEmitFile(pass, stream, nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile);
|
||||
machine->addPassesToEmitFile(pass, stream, nullptr,
|
||||
llvm::CodeGenFileType::CGFT_AssemblyFile);
|
||||
pass.run(*module);
|
||||
|
||||
// post-process
|
||||
std::string result(buffer.begin(), buffer.end());
|
||||
find_and_replace(result, ".version", "\n", ".version " + std::to_string(ptx_major) + "." + std::to_string(ptx_minor) + "\n");
|
||||
find_and_replace(result, ".version", "\n",
|
||||
".version " + std::to_string(ptx_major) + "." +
|
||||
std::to_string(ptx_minor) + "\n");
|
||||
find_and_replace(result, ".target", "\n", ".target " + sm + "\n");
|
||||
while(find_and_replace(result, "\t// begin inline asm", "\n", ""));
|
||||
while(find_and_replace(result, "\t// end inline asm", "\n", ""));
|
||||
while (find_and_replace(result, "\t// begin inline asm", "\n", ""))
|
||||
;
|
||||
while (find_and_replace(result, "\t// end inline asm", "\n", ""))
|
||||
;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas, int cc) {
|
||||
std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas,
|
||||
int cc) {
|
||||
// compile ptx with ptxas
|
||||
char _fsrc[L_tmpnam];
|
||||
char _flog[L_tmpnam];
|
||||
@@ -221,15 +241,16 @@ std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas, int c
|
||||
std::string fsrc = _fsrc;
|
||||
std::string flog = _flog;
|
||||
std::string fbin = fsrc + ".o";
|
||||
const char* _fbin = fbin.c_str();
|
||||
const char *_fbin = fbin.c_str();
|
||||
std::ofstream ofs(fsrc);
|
||||
ofs << ptx << std::endl;
|
||||
ofs.close();
|
||||
std::string cmd;
|
||||
int err;
|
||||
cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog;
|
||||
cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc +
|
||||
" -o " + fsrc + ".o 2> " + flog;
|
||||
err = system(cmd.c_str());
|
||||
if(err != 0){
|
||||
if (err != 0) {
|
||||
std::ifstream _log(_flog);
|
||||
std::string log(std::istreambuf_iterator<char>(_log), {});
|
||||
unlink(_fsrc);
|
||||
@@ -237,7 +258,7 @@ std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas, int c
|
||||
throw std::runtime_error("Internal Triton PTX codegen error: \n" + log);
|
||||
}
|
||||
CUmodule ret;
|
||||
std::ifstream _cubin(_fbin, std::ios::binary );
|
||||
std::ifstream _cubin(_fbin, std::ios::binary);
|
||||
std::string cubin(std::istreambuf_iterator<char>(_cubin), {});
|
||||
_cubin.close();
|
||||
unlink(_fsrc);
|
||||
@@ -251,11 +272,11 @@ std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas, int c
|
||||
// HIP //
|
||||
/* ------------------------ */
|
||||
|
||||
std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
std::string llir_to_amdgpu(llvm::Module *module, const std::string &_proc) {
|
||||
init_llvm();
|
||||
|
||||
// proc = std::get<0>(GetFeatureStrFromGCNArchName(rocminfo));
|
||||
// features = std::get<1>(GetFeatureStrFromGCNArchName(rocminfo));
|
||||
// proc = std::get<0>(GetFeatureStrFromGCNArchName(rocminfo));
|
||||
// features = std::get<1>(GetFeatureStrFromGCNArchName(rocminfo));
|
||||
|
||||
// create
|
||||
llvm::SmallVector<char, 0> buffer;
|
||||
@@ -270,17 +291,18 @@ std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
// create machine
|
||||
module->setTargetTriple(triple);
|
||||
std::string error;
|
||||
auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
auto target =
|
||||
llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
|
||||
llvm::TargetOptions opt;
|
||||
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||
opt.UnsafeFPMath = false;
|
||||
opt.NoInfsFPMath = false;
|
||||
opt.NoNaNsFPMath = true;
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
|
||||
llvm::Reloc::PIC_, llvm::None,
|
||||
llvm::CodeGenOpt::Aggressive);
|
||||
llvm::TargetMachine *machine = target->createTargetMachine(
|
||||
module->getTargetTriple(), proc, features, opt, llvm::Reloc::PIC_,
|
||||
llvm::None, llvm::CodeGenOpt::Aggressive);
|
||||
// set data layout
|
||||
if(layout.empty())
|
||||
if (layout.empty())
|
||||
module->setDataLayout(machine->createDataLayout());
|
||||
else
|
||||
module->setDataLayout(layout);
|
||||
@@ -295,33 +317,37 @@ std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
std::error_code ec;
|
||||
|
||||
// Save GCN ISA binary.
|
||||
std::string isabin_path = std::string("/tmp/") + module_name + std::string(".o");
|
||||
std::string isabin_path =
|
||||
std::string("/tmp/") + module_name + std::string(".o");
|
||||
std::unique_ptr<llvm::raw_fd_ostream> isabin_fs(
|
||||
new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::OF_Text));
|
||||
if (ec)
|
||||
{
|
||||
std::cout << isabin_path << " was not created. error code: " << ec << std::endl;
|
||||
if (ec) {
|
||||
std::cout << isabin_path << " was not created. error code: " << ec
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// emit
|
||||
machine->addPassesToEmitFile(pass, *isabin_fs, nullptr, llvm::CGFT_ObjectFile);
|
||||
machine->addPassesToEmitFile(pass, *isabin_fs, nullptr,
|
||||
llvm::CGFT_ObjectFile);
|
||||
pass.run(*module);
|
||||
// Save GCN ISA.
|
||||
std::string amdgcn_path = std::string("/tmp/") + module_name + std::string(".gcn");
|
||||
std::string amdgcn_path =
|
||||
std::string("/tmp/") + module_name + std::string(".gcn");
|
||||
std::string result(buffer.begin(), buffer.end());
|
||||
std::ofstream amdgcn(amdgcn_path);
|
||||
amdgcn << result;
|
||||
amdgcn.close();
|
||||
|
||||
// generate HASCO file
|
||||
std::string hsaco_path = std::string("/tmp/") + module_name + std::string(".hsaco");
|
||||
std::string hsaco_path =
|
||||
std::string("/tmp/") + module_name + std::string(".hsaco");
|
||||
std::string error_message;
|
||||
int lld_result =
|
||||
llvm::sys::ExecuteAndWait("/opt/rocm/llvm/bin/ld.lld",
|
||||
{"/opt/rocm/llvm/bin/ld.lld", "-flavor", "gnu", "-shared", "-o", hsaco_path, isabin_path},
|
||||
{"/opt/rocm/llvm/bin/ld.lld", "-flavor", "gnu",
|
||||
"-shared", "-o", hsaco_path, isabin_path},
|
||||
llvm::None, {}, 0, 0, &error_message);
|
||||
if (lld_result)
|
||||
{
|
||||
if (lld_result) {
|
||||
std::cout << "ld.lld execute fail: " << std::endl;
|
||||
std::cout << error_message << std::endl;
|
||||
std::cout << lld_result << std::endl;
|
||||
@@ -330,33 +356,29 @@ std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
return hsaco_path;
|
||||
}
|
||||
|
||||
|
||||
hipModule_t amdgpu_to_hipmodule(const std::string& path) {
|
||||
hipModule_t amdgpu_to_hipmodule(const std::string &path) {
|
||||
// Read HSACO.
|
||||
std::ifstream hsaco_file(path, std::ios::binary | std::ios::ate);
|
||||
std::ifstream::pos_type hsaco_file_size = hsaco_file.tellg();
|
||||
|
||||
std::vector<unsigned char> hsaco(hsaco_file_size);
|
||||
hsaco_file.seekg(0, std::ios::beg);
|
||||
hsaco_file.read(reinterpret_cast<char*>(&hsaco[0]), hsaco_file_size);
|
||||
hsaco_file.read(reinterpret_cast<char *>(&hsaco[0]), hsaco_file_size);
|
||||
hsaco_file.close();
|
||||
hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes, hipJitOptionErrorLogBuffer,
|
||||
hipJitOptionInfoLogBufferSizeBytes, hipJitOptionInfoLogBuffer,
|
||||
hipJitOptionLogVerbose};
|
||||
hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes,
|
||||
hipJitOptionErrorLogBuffer,
|
||||
hipJitOptionInfoLogBufferSizeBytes,
|
||||
hipJitOptionInfoLogBuffer, hipJitOptionLogVerbose};
|
||||
const unsigned int errbufsize = 8192;
|
||||
const unsigned int logbufsize = 8192;
|
||||
char _err[errbufsize];
|
||||
char _log[logbufsize];
|
||||
void* optval[] = {(void*)(uintptr_t)errbufsize,
|
||||
(void*)_err, (void*)(uintptr_t)logbufsize,
|
||||
(void*)_log, (void*)1};
|
||||
void *optval[] = {(void *)(uintptr_t)errbufsize, (void *)_err,
|
||||
(void *)(uintptr_t)logbufsize, (void *)_log, (void *)1};
|
||||
hipModule_t ret;
|
||||
dispatch::hipModuleLoadDataEx(&ret, hsaco.data(), 5, opt, optval);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
Reference in New Issue
Block a user