Bugfix: fixed bug in dynamic kernel selection

This commit is contained in:
Philippe Tillet
2015-08-26 19:11:09 -04:00
parent ffb3c01b77
commit f06a3bdf53
3 changed files with 6 additions and 6 deletions

View File

@@ -324,7 +324,7 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS #ifdef HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld(); int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif #endif
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t); BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
if(C.context().backend()==sc::driver::OPENCL) if(C.context().backend()==sc::driver::OPENCL)

View File

@@ -48,10 +48,8 @@ inline std::string vstore(unsigned int simd_width, std::string const & dtype, st
{ {
switch(backend) switch(backend)
{ {
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: case driver::CUDA:
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value; return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value;
#endif
case driver::OPENCL: case driver::OPENCL:
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")"; return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
default: default:
@@ -69,10 +67,8 @@ inline std::string vload(unsigned int simd_width, std::string const & dtype, std
{ {
switch(backend) switch(backend)
{ {
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: case driver::CUDA:
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]"; return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]";
#endif
case driver::OPENCL: case driver::OPENCL:
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")"; return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
default: default:

View File

@@ -87,6 +87,7 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
{ {
driver::Program const & program = init(expr); driver::Program const & program = init(expr);
std::vector<int_t> x = templates_[0]->input_sizes(expr.x()); std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
//Specific tuning if requested //Specific tuning if requested
if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end()) if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
@@ -94,6 +95,10 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
std::vector<double> timings(templates_.size()); std::vector<double> timings(templates_.size());
for(unsigned int i = 0 ; i < templates_.size() ; ++i) for(unsigned int i = 0 ; i < templates_.size() ; ++i)
{ {
if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
timings[i] = INFINITY;
continue;
}
std::list<driver::Event> events; std::list<driver::Event> events;
try{ try{
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events))); templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events)));
@@ -109,7 +114,6 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
} }
//Prediction //Prediction
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
int label = 0; int label = 0;
if(expr.dispatcher_options().label>=0) if(expr.dispatcher_options().label>=0)