Bugfix: fixed bug in dynamic kernel selection
This commit is contained in:
@@ -324,7 +324,7 @@ void bench(sc::numeric_type dtype, std::string operation)
|
|||||||
#ifdef HAS_A_BLAS
|
#ifdef HAS_A_BLAS
|
||||||
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
||||||
#endif
|
#endif
|
||||||
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(false)), (double)2*M*N*K/t);
|
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events), sc::dispatcher_options_type(true)), (double)2*M*N*K/t);
|
||||||
/* clblas */
|
/* clblas */
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
if(C.context().backend()==sc::driver::OPENCL)
|
if(C.context().backend()==sc::driver::OPENCL)
|
||||||
|
@@ -48,10 +48,8 @@ inline std::string vstore(unsigned int simd_width, std::string const & dtype, st
|
|||||||
{
|
{
|
||||||
switch(backend)
|
switch(backend)
|
||||||
{
|
{
|
||||||
#ifdef ISAAC_WITH_CUDA
|
|
||||||
case driver::CUDA:
|
case driver::CUDA:
|
||||||
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value;
|
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "] = " + value;
|
||||||
#endif
|
|
||||||
case driver::OPENCL:
|
case driver::OPENCL:
|
||||||
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
|
return append_width("vstore", simd_width) + "(" + value + ", " + offset + ", " + ptr + ")";
|
||||||
default:
|
default:
|
||||||
@@ -69,10 +67,8 @@ inline std::string vload(unsigned int simd_width, std::string const & dtype, std
|
|||||||
{
|
{
|
||||||
switch(backend)
|
switch(backend)
|
||||||
{
|
{
|
||||||
#ifdef ISAAC_WITH_CUDA
|
|
||||||
case driver::CUDA:
|
case driver::CUDA:
|
||||||
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]";
|
return "reinterpret_cast<" + vdtype + "*>(" + ptr + ")[" + offset + "]";
|
||||||
#endif
|
|
||||||
case driver::OPENCL:
|
case driver::OPENCL:
|
||||||
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
|
return append_width("vload", simd_width) + "(" + offset + ", " + ptr + ")";
|
||||||
default:
|
default:
|
||||||
|
@@ -87,6 +87,7 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
|||||||
{
|
{
|
||||||
driver::Program const & program = init(expr);
|
driver::Program const & program = init(expr);
|
||||||
std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
|
std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
|
||||||
|
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
|
||||||
|
|
||||||
//Specific tuning if requested
|
//Specific tuning if requested
|
||||||
if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
|
if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
|
||||||
@@ -94,6 +95,10 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
|||||||
std::vector<double> timings(templates_.size());
|
std::vector<double> timings(templates_.size());
|
||||||
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
|
for(unsigned int i = 0 ; i < templates_.size() ; ++i)
|
||||||
{
|
{
|
||||||
|
if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
|
||||||
|
timings[i] = INFINITY;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
std::list<driver::Event> events;
|
std::list<driver::Event> events;
|
||||||
try{
|
try{
|
||||||
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events)));
|
templates_[i]->enqueue(queue_, program, tools::to_string(i), *fallback_, control(expr.x(), execution_options_type(0, &events)));
|
||||||
@@ -109,7 +114,6 @@ void profiles::value_type::execute(controller<expressions_tuple> const & expr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Prediction
|
//Prediction
|
||||||
static const int MAX_TEMPORARY_WORKSPACE = 1e6;
|
|
||||||
|
|
||||||
int label = 0;
|
int label = 0;
|
||||||
if(expr.dispatcher_options().label>=0)
|
if(expr.dispatcher_options().label>=0)
|
||||||
|
Reference in New Issue
Block a user