More cleaning
This commit is contained in:
@@ -64,7 +64,7 @@ private:
|
||||
return (*fptr)(args...);
|
||||
}
|
||||
|
||||
static void cublasCreate(cublasHandle_t* h);
|
||||
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||
|
||||
public:
|
||||
static bool clinit();
|
||||
@@ -146,10 +146,10 @@ public:
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
static void cublasGetStream(cudaStream_t *streamId);
|
||||
static void cublasSetStream(cudaStream_t streamId);
|
||||
static void cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||
static void cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||
static cublasStatus_t cublasGetStream(cudaStream_t *streamId);
|
||||
static cublasStatus_t cublasSetStream(cudaStream_t streamId);
|
||||
static cublasStatus_t cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||
static cublasStatus_t cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||
|
||||
private:
|
||||
static void* opencl_;
|
||||
@@ -230,7 +230,7 @@ private:
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
|
||||
static void* cublasCreate_;
|
||||
static void* cublasCreate_v2_;
|
||||
static void* cublasGetStream_;
|
||||
static void* cublasSetStream_;
|
||||
static void* cublasSgemm_;
|
||||
|
@@ -129,7 +129,7 @@ bool dispatch::cublasinit()
|
||||
if(cublas_==nullptr){
|
||||
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
|
||||
if(cublas_!=nullptr)
|
||||
cublasCreate(&cublas_handle_);
|
||||
dispatch::cublasCreate_v2(&cublas_handle_);
|
||||
}
|
||||
return cublas_ != nullptr;
|
||||
}
|
||||
@@ -212,19 +212,19 @@ NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||
|
||||
CUBLAS_DEFINE1(void, cublasCreate, cublasHandle_t*)
|
||||
CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
|
||||
|
||||
void dispatch::cublasGetStream(cudaStream_t *a)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
|
||||
cublasStatus_t dispatch::cublasGetStream(cudaStream_t *a)
|
||||
{ return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
|
||||
|
||||
void dispatch::cublasSetStream(cudaStream_t a)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
|
||||
cublasStatus_t dispatch::cublasSetStream(cudaStream_t a)
|
||||
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
|
||||
|
||||
void dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
cublasStatus_t dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
||||
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
|
||||
void dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
cublasStatus_t dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
||||
{ return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
|
||||
void dispatch::release()
|
||||
{
|
||||
@@ -324,7 +324,7 @@ void* dispatch::nvrtcGetPTXSize_;
|
||||
void* dispatch::nvrtcCreateProgram_;
|
||||
void* dispatch::nvrtcGetProgramLog_;
|
||||
|
||||
void* dispatch::cublasCreate_;
|
||||
void* dispatch::cublasCreate_v2_;
|
||||
void* dispatch::cublasGetStream_;
|
||||
void* dispatch::cublasSetStream_;
|
||||
void* dispatch::cublasSgemm_;
|
||||
|
@@ -83,7 +83,7 @@ class GeneticOptimizer:
|
||||
def evaluate(genome):
|
||||
idx = tuple(genome)
|
||||
if idx not in cache:
|
||||
time = tools.benchmark(template, template(*decode(genome)), tree)
|
||||
time = tools.benchmark(template(*decode(genome)), tree)
|
||||
if time == float('inf'):
|
||||
return time,
|
||||
cache[idx] = time
|
||||
@@ -173,7 +173,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
||||
sweep_over = [0,1,2,3,4]
|
||||
|
||||
#Evaluate the provided parameters guess
|
||||
reference = tools.benchmark(template, template(*parameters), tree)
|
||||
reference = tools.benchmark(template(*parameters), tree)
|
||||
if isinf(reference):
|
||||
return False
|
||||
|
||||
@@ -187,7 +187,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
||||
for x in product(*domain):
|
||||
if x==parameters:
|
||||
pass
|
||||
time = tools.benchmark(template, template(*x), tree)
|
||||
time = tools.benchmark(template(*x), tree)
|
||||
if time/reference < .98:
|
||||
return False
|
||||
return True
|
||||
|
@@ -40,7 +40,7 @@ def linspace(a, b, n=100):
|
||||
def expspace(a,b,N,r=128):
|
||||
return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)]
|
||||
|
||||
def benchmark(operation, template, tree):
|
||||
def benchmark(template, tree):
|
||||
queue = tree.context.queues[0]
|
||||
queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue)
|
||||
times = []
|
||||
|
@@ -145,10 +145,10 @@ class Tuner:
|
||||
best = None
|
||||
if idx > 0:
|
||||
dim = min(10, idx+1)
|
||||
model = RandomForestRegressor(dim, dim).fit(X, Y)
|
||||
predictions = model.predict(x)[0]
|
||||
clf = RandomForestRegressor(dim, dim).fit(X, Y)
|
||||
predictions = clf.predict(x)[0]
|
||||
for idx in (-predictions).argsort():
|
||||
ts = tools.benchmark(operation, operation(*profiles[idx]), tree)
|
||||
ts = tools.benchmark(operation(*profiles[idx]), tree)
|
||||
if np.isfinite(ts):
|
||||
break
|
||||
if np.isfinite(ts):
|
||||
@@ -162,11 +162,11 @@ class Tuner:
|
||||
profiles.append(best)
|
||||
for xx,yy in zip(X, Y):
|
||||
tree, _operands = tools.tree_of(operation, xx, context)
|
||||
time = tools.benchmark(operation, best, _tree)
|
||||
time = tools.benchmark(operation(*best), _tree)
|
||||
yy.append(performance(xx, time))
|
||||
#Update dataset
|
||||
X.append(x)
|
||||
y = [performance(x,tools.benchmark(operation, prf, tree)) for prf in profiles]
|
||||
y = [performance(x,tools.benchmark(operation(*prf), tree)) for prf in profiles]
|
||||
Y.append(y)
|
||||
#Save data
|
||||
for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]):
|
||||
@@ -179,10 +179,13 @@ class Tuner:
|
||||
self.progress_bar.set_finished()
|
||||
|
||||
#Adding external profiles
|
||||
#~ for prf in tools.external_profiles(operation):
|
||||
#~ x = [1024, 1024, 1024]
|
||||
#~ tree, operands = tools.tree_of(operation, x, context)
|
||||
#~ print performance(x,tools.benchmark(operation, prf, tree))
|
||||
for prof in tools.external_profiles(operation):
|
||||
for x, y in zip(X, Y):
|
||||
tree, operands = tools.tree_of(operation, x, context)
|
||||
perf = performance(x,tools.benchmark(prof, tree))
|
||||
if perf > 0:
|
||||
profiles.append(prof.__class__.__name__)
|
||||
y.append(perf)
|
||||
|
||||
#Pruning of useless profiles
|
||||
if len(Y[0]) > 1:
|
||||
|
Reference in New Issue
Block a user