More cleaning
This commit is contained in:
@@ -64,7 +64,7 @@ private:
|
|||||||
return (*fptr)(args...);
|
return (*fptr)(args...);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cublasCreate(cublasHandle_t* h);
|
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static bool clinit();
|
static bool clinit();
|
||||||
@@ -146,10 +146,10 @@ public:
|
|||||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||||
|
|
||||||
static void cublasGetStream(cudaStream_t *streamId);
|
static cublasStatus_t cublasGetStream(cudaStream_t *streamId);
|
||||||
static void cublasSetStream(cudaStream_t streamId);
|
static cublasStatus_t cublasSetStream(cudaStream_t streamId);
|
||||||
static void cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
static cublasStatus_t cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||||
static void cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
static cublasStatus_t cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void* opencl_;
|
static void* opencl_;
|
||||||
@@ -230,7 +230,7 @@ private:
|
|||||||
static void* nvrtcCreateProgram_;
|
static void* nvrtcCreateProgram_;
|
||||||
static void* nvrtcGetProgramLog_;
|
static void* nvrtcGetProgramLog_;
|
||||||
|
|
||||||
static void* cublasCreate_;
|
static void* cublasCreate_v2_;
|
||||||
static void* cublasGetStream_;
|
static void* cublasGetStream_;
|
||||||
static void* cublasSetStream_;
|
static void* cublasSetStream_;
|
||||||
static void* cublasSgemm_;
|
static void* cublasSgemm_;
|
||||||
|
@@ -129,7 +129,7 @@ bool dispatch::cublasinit()
|
|||||||
if(cublas_==nullptr){
|
if(cublas_==nullptr){
|
||||||
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
|
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
|
||||||
if(cublas_!=nullptr)
|
if(cublas_!=nullptr)
|
||||||
cublasCreate(&cublas_handle_);
|
dispatch::cublasCreate_v2(&cublas_handle_);
|
||||||
}
|
}
|
||||||
return cublas_ != nullptr;
|
return cublas_ != nullptr;
|
||||||
}
|
}
|
||||||
@@ -212,19 +212,19 @@ NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
|||||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||||
|
|
||||||
CUBLAS_DEFINE1(void, cublasCreate, cublasHandle_t*)
|
CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
|
||||||
|
|
||||||
void dispatch::cublasGetStream(cudaStream_t *a)
|
cublasStatus_t dispatch::cublasGetStream(cudaStream_t *a)
|
||||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
|
||||||
|
|
||||||
void dispatch::cublasSetStream(cudaStream_t a)
|
cublasStatus_t dispatch::cublasSetStream(cudaStream_t a)
|
||||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
|
||||||
|
|
||||||
void dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
cublasStatus_t dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
||||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||||
|
|
||||||
void dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
cublasStatus_t dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
||||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||||
|
|
||||||
void dispatch::release()
|
void dispatch::release()
|
||||||
{
|
{
|
||||||
@@ -324,7 +324,7 @@ void* dispatch::nvrtcGetPTXSize_;
|
|||||||
void* dispatch::nvrtcCreateProgram_;
|
void* dispatch::nvrtcCreateProgram_;
|
||||||
void* dispatch::nvrtcGetProgramLog_;
|
void* dispatch::nvrtcGetProgramLog_;
|
||||||
|
|
||||||
void* dispatch::cublasCreate_;
|
void* dispatch::cublasCreate_v2_;
|
||||||
void* dispatch::cublasGetStream_;
|
void* dispatch::cublasGetStream_;
|
||||||
void* dispatch::cublasSetStream_;
|
void* dispatch::cublasSetStream_;
|
||||||
void* dispatch::cublasSgemm_;
|
void* dispatch::cublasSgemm_;
|
||||||
|
@@ -83,7 +83,7 @@ class GeneticOptimizer:
|
|||||||
def evaluate(genome):
|
def evaluate(genome):
|
||||||
idx = tuple(genome)
|
idx = tuple(genome)
|
||||||
if idx not in cache:
|
if idx not in cache:
|
||||||
time = tools.benchmark(template, template(*decode(genome)), tree)
|
time = tools.benchmark(template(*decode(genome)), tree)
|
||||||
if time == float('inf'):
|
if time == float('inf'):
|
||||||
return time,
|
return time,
|
||||||
cache[idx] = time
|
cache[idx] = time
|
||||||
@@ -173,7 +173,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
|||||||
sweep_over = [0,1,2,3,4]
|
sweep_over = [0,1,2,3,4]
|
||||||
|
|
||||||
#Evaluate the provided parameters guess
|
#Evaluate the provided parameters guess
|
||||||
reference = tools.benchmark(template, template(*parameters), tree)
|
reference = tools.benchmark(template(*parameters), tree)
|
||||||
if isinf(reference):
|
if isinf(reference):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -187,7 +187,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
|||||||
for x in product(*domain):
|
for x in product(*domain):
|
||||||
if x==parameters:
|
if x==parameters:
|
||||||
pass
|
pass
|
||||||
time = tools.benchmark(template, template(*x), tree)
|
time = tools.benchmark(template(*x), tree)
|
||||||
if time/reference < .98:
|
if time/reference < .98:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
@@ -40,7 +40,7 @@ def linspace(a, b, n=100):
|
|||||||
def expspace(a,b,N,r=128):
|
def expspace(a,b,N,r=128):
|
||||||
return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)]
|
return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)]
|
||||||
|
|
||||||
def benchmark(operation, template, tree):
|
def benchmark(template, tree):
|
||||||
queue = tree.context.queues[0]
|
queue = tree.context.queues[0]
|
||||||
queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue)
|
queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue)
|
||||||
times = []
|
times = []
|
||||||
|
@@ -145,10 +145,10 @@ class Tuner:
|
|||||||
best = None
|
best = None
|
||||||
if idx > 0:
|
if idx > 0:
|
||||||
dim = min(10, idx+1)
|
dim = min(10, idx+1)
|
||||||
model = RandomForestRegressor(dim, dim).fit(X, Y)
|
clf = RandomForestRegressor(dim, dim).fit(X, Y)
|
||||||
predictions = model.predict(x)[0]
|
predictions = clf.predict(x)[0]
|
||||||
for idx in (-predictions).argsort():
|
for idx in (-predictions).argsort():
|
||||||
ts = tools.benchmark(operation, operation(*profiles[idx]), tree)
|
ts = tools.benchmark(operation(*profiles[idx]), tree)
|
||||||
if np.isfinite(ts):
|
if np.isfinite(ts):
|
||||||
break
|
break
|
||||||
if np.isfinite(ts):
|
if np.isfinite(ts):
|
||||||
@@ -162,11 +162,11 @@ class Tuner:
|
|||||||
profiles.append(best)
|
profiles.append(best)
|
||||||
for xx,yy in zip(X, Y):
|
for xx,yy in zip(X, Y):
|
||||||
tree, _operands = tools.tree_of(operation, xx, context)
|
tree, _operands = tools.tree_of(operation, xx, context)
|
||||||
time = tools.benchmark(operation, best, _tree)
|
time = tools.benchmark(operation(*best), _tree)
|
||||||
yy.append(performance(xx, time))
|
yy.append(performance(xx, time))
|
||||||
#Update dataset
|
#Update dataset
|
||||||
X.append(x)
|
X.append(x)
|
||||||
y = [performance(x,tools.benchmark(operation, prf, tree)) for prf in profiles]
|
y = [performance(x,tools.benchmark(operation(*prf), tree)) for prf in profiles]
|
||||||
Y.append(y)
|
Y.append(y)
|
||||||
#Save data
|
#Save data
|
||||||
for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]):
|
for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]):
|
||||||
@@ -179,10 +179,13 @@ class Tuner:
|
|||||||
self.progress_bar.set_finished()
|
self.progress_bar.set_finished()
|
||||||
|
|
||||||
#Adding external profiles
|
#Adding external profiles
|
||||||
#~ for prf in tools.external_profiles(operation):
|
for prof in tools.external_profiles(operation):
|
||||||
#~ x = [1024, 1024, 1024]
|
for x, y in zip(X, Y):
|
||||||
#~ tree, operands = tools.tree_of(operation, x, context)
|
tree, operands = tools.tree_of(operation, x, context)
|
||||||
#~ print performance(x,tools.benchmark(operation, prf, tree))
|
perf = performance(x,tools.benchmark(prof, tree))
|
||||||
|
if perf > 0:
|
||||||
|
profiles.append(prof.__class__.__name__)
|
||||||
|
y.append(perf)
|
||||||
|
|
||||||
#Pruning of useless profiles
|
#Pruning of useless profiles
|
||||||
if len(Y[0]) > 1:
|
if len(Y[0]) > 1:
|
||||||
|
Reference in New Issue
Block a user