Some renaming; lower overhead in benchmark
This commit is contained in:
@@ -28,9 +28,9 @@ std::string model::define_extension(std::string const & extensions, std::string
|
||||
return std::string("");
|
||||
}
|
||||
|
||||
void model::fill_program_name(char* program_name, array_expressions_container const & array_expressions, binding_policy_t binding_policy)
|
||||
void model::fill_program_name(char* program_name, expressions_tuple const & expressions, binding_policy_t binding_policy)
|
||||
{
|
||||
if (array_expressions.order()==array_expressions_container::INDEPENDENT)
|
||||
if (expressions.order()==expressions_tuple::INDEPENDENT)
|
||||
*program_name++='i';
|
||||
else
|
||||
*program_name++='s';
|
||||
@@ -39,31 +39,39 @@ void model::fill_program_name(char* program_name, array_expressions_container co
|
||||
binder = new bind_to_handle();
|
||||
else
|
||||
binder = new bind_all_unique();
|
||||
for (array_expressions_container::data_type::const_iterator it = array_expressions.data().begin(); it != array_expressions.data().end(); ++it)
|
||||
for (expressions_tuple::data_type::const_iterator it = expressions.data().begin(); it != expressions.data().end(); ++it)
|
||||
traverse(**it, (*it)->root(), array_expression_representation_functor(*binder, program_name),true);
|
||||
*program_name='\0';
|
||||
delete binder;
|
||||
}
|
||||
|
||||
std::vector<cl_ext::lazy_compiler>& model::init(array_expressions_container const & array_expressions, cl::Context const & context, cl::Device const & device, bool force_recompilation)
|
||||
std::vector<cl_ext::lazy_compiler>& model::init(expressions_tuple const & expressions, runtime_options const & opt)
|
||||
{
|
||||
char program_name[256];
|
||||
fill_program_name(program_name, array_expressions, BIND_TO_HANDLE);
|
||||
std::string pname(program_name);
|
||||
cl::Context const & context = expressions.context();
|
||||
std::string pname;
|
||||
if(opt.program_name.empty())
|
||||
{
|
||||
char program_name[256];
|
||||
fill_program_name(program_name, expressions, BIND_TO_HANDLE);
|
||||
pname = std::string(program_name);
|
||||
}
|
||||
else
|
||||
pname = opt.program_name;
|
||||
std::vector<cl_ext::lazy_compiler> & to_init = lazy_programs_[context()][pname];
|
||||
if(to_init.empty())
|
||||
{
|
||||
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
|
||||
std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
|
||||
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname, force_recompilation));
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname, opt.recompile));
|
||||
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
|
||||
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", force_recompilation));
|
||||
to_init.push_back(cl_ext::lazy_compiler(context, pname + "_fb", opt.recompile));
|
||||
to_init.back().add(define_extension(extensions, "cl_khr_fp64"));
|
||||
|
||||
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
||||
{
|
||||
std::vector<std::string> cur = templates_[i]->generate(i, array_expressions, device);
|
||||
std::vector<std::string> cur = templates_[i]->generate(i, expressions, device);
|
||||
for(size_t j = 0 ; j < cur.size() ; ++j){
|
||||
to_init[j].add(cur[j]);
|
||||
}
|
||||
@@ -82,42 +90,37 @@ model::model(std::vector< tools::shared_ptr<base> > const & templates, cl::Comma
|
||||
model::model(base const & tp, cl::CommandQueue & queue) : templates_(1,tp.clone()), queue_(queue)
|
||||
{}
|
||||
|
||||
void model::execute(array_expressions_container const & array_expressions, bool bypass_predictor, bool force_recompilation)
|
||||
void model::execute(expressions_tuple const & expressions, runtime_options const & opt)
|
||||
{
|
||||
bypass_predictor = bypass_predictor || predictor_.get()==NULL;
|
||||
cl::Context const & context = array_expressions.context();
|
||||
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
|
||||
cl::Device const & device = queue_.getInfo<CL_QUEUE_DEVICE>();
|
||||
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(array_expressions, context, device, force_recompilation);
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions, opt);
|
||||
|
||||
//Prediction
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(array_expressions);
|
||||
int label;
|
||||
//The user tuned the model specifically for this input size
|
||||
if(hardcoded_.find(x)!=hardcoded_.end())
|
||||
label = hardcoded_.at(x);
|
||||
//The user bypasses the random forest
|
||||
else if(bypass_predictor)
|
||||
label = 0;
|
||||
//Default
|
||||
int label = 0;
|
||||
if(opt.label>=0)
|
||||
{
|
||||
label = opt.label;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<float> predictions = predictor_->predict(x);
|
||||
label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end()));
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(expressions);
|
||||
//The user tuned the model specifically for this input size
|
||||
if(hardcoded_.find(x)!=hardcoded_.end())
|
||||
label = hardcoded_.at(x);
|
||||
//The user bypasses the random forest
|
||||
else if(predictor_.get())
|
||||
{
|
||||
std::vector<float> predictions = predictor_->predict(x);
|
||||
label = std::distance(predictions.begin(),std::min_element(predictions.begin(), predictions.end()));
|
||||
}
|
||||
}
|
||||
|
||||
//Execution
|
||||
templates_[label]->enqueue(queue_, compilers, label, array_expressions);
|
||||
templates_[label]->enqueue(queue_, compilers, label, expressions);
|
||||
}
|
||||
|
||||
void model::tune(array_expressions_container const & array_expressions)
|
||||
void model::tune(expressions_tuple const & expressions)
|
||||
{
|
||||
cl::Context const & context = array_expressions.context();
|
||||
assert(context() == queue_.getInfo<CL_QUEUE_CONTEXT>()());
|
||||
cl::Device device = queue_.getInfo<CL_QUEUE_DEVICE>();
|
||||
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(array_expressions, context, device, false);
|
||||
std::vector<cl_ext::lazy_compiler> & compilers = init(expressions);
|
||||
|
||||
//Collect the timings
|
||||
std::vector<float> timings(templates_.size());
|
||||
@@ -125,13 +128,13 @@ void model::tune(array_expressions_container const & array_expressions)
|
||||
for(size_t i = 0 ; i < templates_.size() ; ++i)
|
||||
{
|
||||
timer.start();
|
||||
templates_[i]->enqueue(queue_, compilers, i, array_expressions);
|
||||
templates_[i]->enqueue(queue_, compilers, i, expressions);
|
||||
queue_.finish();
|
||||
timings[i] = timer.get();
|
||||
}
|
||||
|
||||
//Fill the override
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(array_expressions);
|
||||
std::vector<int_t> x = templates_[0]->input_sizes(expressions);
|
||||
hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end()));
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user