Bugfix: fixed bug in dynamic kernel selection
This commit is contained in:
@@ -324,7 +324,7 @@ void bench(sc::numeric_type dtype, std::string operation)
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
||||
#endif
|
||||
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
|
||||
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
|
||||
/* clblas */
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(C.context().backend()==sc::driver::OPENCL)
|
||||
|
@@ -48,10 +48,8 @@ inline std::string vstore(unsigned int simd_width, std::string const & dtype, st
|
||||
{
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA:
|
||||
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value;
|
||||
#endif
|
||||
case driver::OPENCL:
|
||||
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
|
||||
default:
|
||||
@@ -69,10 +67,8 @@ inline std::string vload(unsigned int simd_width, std::string const & dtype, std
|
||||
{
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA:
|
||||
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]";
|
||||
#endif
|
||||
case driver::OPENCL:
|
||||
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
|
||||
default:
|
||||
|
@@ -87,6 +87,7 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
||||
{
|
||||
driver::Program const & program = init(expr);
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
|
||||
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
|
||||
|
||||
//Specific tuning if requested
|
||||
if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
|
||||
@@ -94,6 +95,10 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
||||
std::vector<double> timings(templates_.size());
|
||||
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
|
||||
{
|
||||
if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
|
||||
timings[i] = INFINITY;
|
||||
continue;
|
||||
}
|
||||
std::list<driver::Event> events;
|
||||
try{
|
||||
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events)));
|
||||
@@ -109,7 +114,6 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
||||
}
|
||||
|
||||
//Prediction
|
||||
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
|
||||
|
||||
int label = 0;
|
||||
if(expr.dispatcher_options().label>=0)
|
||||
|
Reference in New Issue
Block a user