More cleaning

This commit is contained in:
Philippe Tillet
2016-10-04 02:06:11 -04:00
parent ffb9548b6a
commit a4ed0dfbec
5 changed files with 33 additions and 30 deletions

View File

@@ -64,7 +64,7 @@ private:
return (*fptr)(args...);
}
static void cublasCreate(cublasHandle_t* h);
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
public:
static bool clinit();
@@ -146,10 +146,10 @@ public:
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
static void cublasGetStream(cudaStream_t *streamId);
static void cublasSetStream(cudaStream_t streamId);
static void cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
static void cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
static cublasStatus_t cublasGetStream(cudaStream_t *streamId);
static cublasStatus_t cublasSetStream(cudaStream_t streamId);
static cublasStatus_t cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
static cublasStatus_t cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
private:
static void* opencl_;
@@ -230,7 +230,7 @@ private:
static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_;
static void* cublasCreate_;
static void* cublasCreate_v2_;
static void* cublasGetStream_;
static void* cublasSetStream_;
static void* cublasSgemm_;

View File

@@ -129,7 +129,7 @@ bool dispatch::cublasinit()
if(cublas_==nullptr){
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
if(cublas_!=nullptr)
cublasCreate(&cublas_handle_);
dispatch::cublasCreate_v2(&cublas_handle_);
}
return cublas_ != nullptr;
}
@@ -212,19 +212,19 @@ NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
CUBLAS_DEFINE1(void, cublasCreate, cublasHandle_t*)
CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
void dispatch::cublasGetStream(cudaStream_t *a)
{ f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
cublasStatus_t dispatch::cublasGetStream(cudaStream_t *a)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
void dispatch::cublasSetStream(cudaStream_t a)
{ f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
cublasStatus_t dispatch::cublasSetStream(cudaStream_t a)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
void dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
{ f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
cublasStatus_t dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
void dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
{ f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
cublasStatus_t dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
void dispatch::release()
{
@@ -324,7 +324,7 @@ void* dispatch::nvrtcGetPTXSize_;
void* dispatch::nvrtcCreateProgram_;
void* dispatch::nvrtcGetProgramLog_;
void* dispatch::cublasCreate_;
void* dispatch::cublasCreate_v2_;
void* dispatch::cublasGetStream_;
void* dispatch::cublasSetStream_;
void* dispatch::cublasSgemm_;

View File

@@ -83,7 +83,7 @@ class GeneticOptimizer:
def evaluate(genome):
idx = tuple(genome)
if idx not in cache:
time = tools.benchmark(template, template(*decode(genome)), tree)
time = tools.benchmark(template(*decode(genome)), tree)
if time == float('inf'):
return time,
cache[idx] = time
@@ -173,7 +173,7 @@ def is_local_optimum(parameters, template, sizes, context):
sweep_over = [0,1,2,3,4]
#Evaluate the provided parameters guess
reference = tools.benchmark(template, template(*parameters), tree)
reference = tools.benchmark(template(*parameters), tree)
if isinf(reference):
return False
@@ -187,7 +187,7 @@ def is_local_optimum(parameters, template, sizes, context):
for x in product(*domain):
if x==parameters:
pass
time = tools.benchmark(template, template(*x), tree)
time = tools.benchmark(template(*x), tree)
if time/reference < .98:
return False
return True

View File

@@ -40,7 +40,7 @@ def linspace(a, b, n=100):
def expspace(a,b,N,r=128):
return [int(ceil(exp(x)/r)*r) for x in linspace(log(a), log(b), N)]
def benchmark(operation, template, tree):
def benchmark(template, tree):
queue = tree.context.queues[0]
queue.profiles[template, sc.float32] = sc.profile(template, sc.float32, queue)
times = []

View File

@@ -145,10 +145,10 @@ class Tuner:
best = None
if idx > 0:
dim = min(10, idx+1)
model = RandomForestRegressor(dim, dim).fit(X, Y)
predictions = model.predict(x)[0]
clf = RandomForestRegressor(dim, dim).fit(X, Y)
predictions = clf.predict(x)[0]
for idx in (-predictions).argsort():
ts = tools.benchmark(operation, operation(*profiles[idx]), tree)
ts = tools.benchmark(operation(*profiles[idx]), tree)
if np.isfinite(ts):
break
if np.isfinite(ts):
@@ -162,11 +162,11 @@ class Tuner:
profiles.append(best)
for xx,yy in zip(X, Y):
tree, _operands = tools.tree_of(operation, xx, context)
time = tools.benchmark(operation, best, _tree)
time = tools.benchmark(operation(*best), _tree)
yy.append(performance(xx, time))
#Update dataset
X.append(x)
y = [performance(x,tools.benchmark(operation, prf, tree)) for prf in profiles]
y = [performance(x,tools.benchmark(operation(*prf), tree)) for prf in profiles]
Y.append(y)
#Save data
for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]):
@@ -179,10 +179,13 @@ class Tuner:
self.progress_bar.set_finished()
#Adding external profiles
#~ for prf in tools.external_profiles(operation):
#~ x = [1024, 1024, 1024]
#~ tree, operands = tools.tree_of(operation, x, context)
#~ print performance(x,tools.benchmark(operation, prf, tree))
for prof in tools.external_profiles(operation):
for x, y in zip(X, Y):
tree, operands = tools.tree_of(operation, x, context)
perf = performance(x,tools.benchmark(prof, tree))
if perf > 0:
profiles.append(prof.__class__.__name__)
y.append(perf)
#Pruning of useless profiles
if len(Y[0]) > 1: