Bugfix: fixed bug in dynamic kernel selection

This commit is contained in:
Philippe Tillet
2015-08-26 19:11:09 -04:00
parent ffb3c01b77
commit f06a3bdf53
3 changed files with 6 additions and 6 deletions

View File

@@ -324,7 +324,7 @@ void bench(sc::numeric_type dtype, std::string operation)
#ifdef HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
/* clblas */
#ifdef BENCH_CLBLAS
if(C.context().backend()==sc::driver::OPENCL)

View File

@@ -48,10 +48,8 @@ inline std::string vstore(unsigned int simd_width, std::string const & dtype, st
{
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA:
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value;
#endif
case driver::OPENCL:
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
default:
@@ -69,10 +67,8 @@ inline std::string vload(unsigned int simd_width, std::string const & dtype, std
{
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA:
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]";
#endif
case driver::OPENCL:
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
default:

View File

@@ -87,6 +87,7 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
{
driver::Program const & program = init(expr);
std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
//Specific tuning if requested
if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
@@ -94,6 +95,10 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
std::vector<double> timings(templates_.size());
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
{
if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
timings[i] = INFINITY;
continue;
}
std::list<driver::Event> events;
try{
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events)));
@@ -109,7 +114,6 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
}
//Prediction
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
int label = 0;
if(expr.dispatcher_options().label>=0)