More cleaning

This commit is contained in:
Philippe Tillet
2016-10-04 02:06:11 -04:00
parent ffb9548b6a
commit a4ed0dfbec
5 changed files with 33 additions and 30 deletions

View File

@@ -64,7 +64,7 @@ private:
return (*fptr)(args...); return (*fptr)(args...);
} }
static void cublasCreate(cublasHandle_t* h); static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
public: public:
static bool clinit(); static bool clinit();
@@ -146,10 +146,10 @@ public:
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames); static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log); static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
static void cublasGetStream(cudaStream_t *streamId); static cublasStatus_t cublasGetStream(cudaStream_t *streamId);
static void cublasSetStream(cudaStream_t streamId); static cublasStatus_t cublasSetStream(cudaStream_t streamId);
static void cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc); static cublasStatus_t cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
static void cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc); static cublasStatus_t cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
private: private:
static void* opencl_; static void* opencl_;
@@ -230,7 +230,7 @@ private:
static void* nvrtcCreateProgram_; static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_; static void* nvrtcGetProgramLog_;
static void* cublasCreate_; static void* cublasCreate_v2_;
static void* cublasGetStream_; static void* cublasGetStream_;
static void* cublasSetStream_; static void* cublasSetStream_;
static void* cublasSgemm_; static void* cublasSgemm_;

View File

@@ -129,7 +129,7 @@ bool dispatch::cublasinit()
if(cublas_==nullptr){ if(cublas_==nullptr){
cublas_ = dlopen("libcublas.so", RTLD_LAZY); cublas_ = dlopen("libcublas.so", RTLD_LAZY);
if(cublas_!=nullptr) if(cublas_!=nullptr)
cublasCreate(&cublas_handle_); dispatch::cublasCreate_v2(&cublas_handle_);
} }
return cublas_ != nullptr; return cublas_ != nullptr;
} }
@@ -212,19 +212,19 @@ NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **) NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *) NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
CUBLAS_DEFINE1(void, cublasCreate, cublasHandle_t*) CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
void dispatch::cublasGetStream(cudaStream_t *a) cublasStatus_t dispatch::cublasGetStream(cudaStream_t *a)
{ f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); } { return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
void dispatch::cublasSetStream(cudaStream_t a) cublasStatus_t dispatch::cublasSetStream(cudaStream_t a)
{ f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); } { return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
void dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc) cublasStatus_t dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
{ f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);} { return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
void dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc) cublasStatus_t dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
{ f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);} { return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
void dispatch::release() void dispatch::release()
{ {
@@ -324,7 +324,7 @@ void* dispatch::nvrtcGetPTXSize_;
void* dispatch::nvrtcCreateProgram_; void* dispatch::nvrtcCreateProgram_;
void* dispatch::nvrtcGetProgramLog_; void* dispatch::nvrtcGetProgramLog_;
void* dispatch::cublasCreate_; void* dispatch::cublasCreate_v2_;
void* dispatch::cublasGetStream_; void* dispatch::cublasGetStream_;
void* dispatch::cublasSetStream_; void* dispatch::cublasSetStream_;
void* dispatch::cublasSgemm_; void* dispatch::cublasSgemm_;

View File

@@ -83,7 +83,7 @@ class GeneticOptimizer:
def evaluate(genome): def evaluate(genome):
idx = tuple(genome) idx = tuple(genome)
if idx not in cache: if idx not in cache:
time = tools.benchmark(template, template(*decode(genome)), tree) time = tools.benchmark(template(*decode(genome)), tree)
if time == float('inf'): if time == float('inf'):
return time, return time,
cache[idx] = time cache[idx] = time
@@ -173,7 +173,7 @@ def is_local_optimum(parameters, template, sizes, context):
sweep_over = [0,1,2,3,4] sweep_over = [0,1,2,3,4]
#Evaluate the provided parameters guess #Evaluate the provided parameters guess
reference = tools.benchmark(template, template(*parameters), tree) reference = tools.benchmark(template(*parameters), tree)
if isinf(reference): if isinf(reference):
return False return False
@@ -187,7 +187,7 @@ def is_local_optimum(parameters, template, sizes, context):
for x in product(*domain): for x in product(*domain):
if x==parameters: if x==parameters:
pass pass
time = tools.benchmark(template, template(*x), tree) time = tools.benchmark(template(*x), tree)
if time/reference < .98: if time/reference < .98:
return False return False
return True return True

View File

@@ -40,7 +40,7 @@ def linspace(a, b, n=100):
def expspace(a,b,N,r=128): def expspace(a,b,N,r=128):
return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)] return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)]
def benchmark(operation, template, tree): def benchmark(template, tree):
queue = tree.context.queues[0] queue = tree.context.queues[0]
queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue) queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue)
times = [] times = []

View File

@@ -145,10 +145,10 @@ class Tuner:
best = None best = None
if idx > 0: if idx > 0:
dim = min(10, idx+1) dim = min(10, idx+1)
model = RandomForestRegressor(dim, dim).fit(X, Y) clf = RandomForestRegressor(dim, dim).fit(X, Y)
predictions = model.predict(x)[0] predictions = clf.predict(x)[0]
for idx in (-predictions).argsort(): for idx in (-predictions).argsort():
ts = tools.benchmark(operation, operation(*profiles[idx]), tree) ts = tools.benchmark(operation(*profiles[idx]), tree)
if np.isfinite(ts): if np.isfinite(ts):
break break
if np.isfinite(ts): if np.isfinite(ts):
@@ -162,11 +162,11 @@ class Tuner:
profiles.append(best) profiles.append(best)
for xx,yy in zip(X, Y): for xx,yy in zip(X, Y):
tree, _operands = tools.tree_of(operation, xx, context) tree, _operands = tools.tree_of(operation, xx, context)
time = tools.benchmark(operation, best, _tree) time = tools.benchmark(operation(*best), _tree)
yy.append(performance(xx, time)) yy.append(performance(xx, time))
#Update dataset #Update dataset
X.append(x) X.append(x)
y = [performance(x,tools.benchmark(operation, prf, tree)) for prf in profiles] y = [performance(x,tools.benchmark(operation(*prf), tree)) for prf in profiles]
Y.append(y) Y.append(y)
#Save data #Save data
for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]): for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]):
@@ -179,10 +179,13 @@ class Tuner:
self.progress_bar.set_finished() self.progress_bar.set_finished()
#Adding external profiles #Adding external profiles
#~ for prf in tools.external_profiles(operation): for prof in tools.external_profiles(operation):
#~ x = [1024, 1024, 1024] for x, y in zip(X, Y):
#~ tree, operands = tools.tree_of(operation, x, context) tree, operands = tools.tree_of(operation, x, context)
#~ print performance(x,tools.benchmark(operation, prf, tree)) perf = performance(x,tools.benchmark(prof, tree))
if perf > 0:
profiles.append(prof.__class__.__name__)
y.append(perf)
#Pruning of useless profiles #Pruning of useless profiles
if len(Y[0]) > 1: if len(Y[0]) > 1: