Inference: now explicit tuning among top-5 kernel predictions

2016-10-06 17:35:02 -04:00
parent 18beb991d0
commit 33f309d496
3 changed files with 34 additions and 60 deletions
--- a/bench/plot.gnuplot
+++ b/bench/plot.gnuplot
@@ -1,16 +0,0 @@
 set terminal pdf
 set output 'bench.pdf'
 set xlabel 'N'
 set ylabel 'Bandwidth (GB/s)'
 set key top left
 stats "out.dat" nooutput
 set logscale x
 do for [i=1:STATS_blocks]{
 plot "out.dat" index (i-1) using 1:2 with lines title 'ViennaCL', \
     "out.dat" index (i-1) using 1:3 with lines title 'Model', \
     "out.dat" index (i-1) using 1:4 with lines title 'Optimal', \
     "out.dat" index (i-1) using 1:5 with lines title 'CuBLAS'
 }
--- a/include/isaac/runtime/profiles.h
+++ b/include/isaac/runtime/profiles.h
@@ -60,7 +60,7 @@ public:
    private:
      templates_container templates_;
      std::shared_ptr<predictors::random_forest> predictor_;
-      std::map<std::vector<int_t>, int> hardcoded_;
+      std::map<std::vector<int_t>, int> labels_;
      driver::CommandQueue queue_;
      driver::ProgramCache & cache_;
    };
--- a/lib/runtime/profiles.cpp
+++ b/lib/runtime/profiles.cpp
@@ -84,54 +84,44 @@ profiles::value_type::value_type(numeric_type dtype, std::shared_ptr<templates::
 void profiles::value_type::execute(runtime::execution_handler const & expr)
 {
  static const int MAX_TEMPORARY_WORKSPACE = 1e6;
  driver::Program const & program = init(expr);
  std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
  static const int MAX_TEMPORARY_WORKSPACE = 1e6;
-  //Specific tuning if requested
+  //Cached
-  if(expr.dispatcher_options().tune && hardcoded_.find(x)==hardcoded_.end())
+  auto it = labels_.find(x);
-  {
+  if(it!=labels_.end()){
-    std::vector<double> timings(templates_.size());
+    templates_[it->second]->enqueue(queue_, program, tools::to_string(it->second), expr);
-    for(unsigned int i = 0 ; i < templates_.size() ; ++i)
+    return;
-    {
+  }
-      if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
+
-          timings[i] = INFINITY;
+  //Not cached
-          continue;
+  std::vector<double> times;
-      }
+  std::vector<float> perf = predictor_->predict(x);
-      std::list<driver::Event> events;
+  std::vector<size_t> idx(perf.size());
-      try{
+  std::iota(idx.begin(), idx.end(), 0);
-        templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
+  std::sort(idx.begin(), idx.end(), [&perf](size_t i1, size_t i2) {return perf[i1] > perf[i2];});
-        queue_.synchronize();
+  bool valid_found = false;
-        timings[i] = 1e-9*std::accumulate(events.begin(), events.end(), 0, &time_event);
+  for(size_t k = 0 ; k < std::min<size_t>(idx.size(), idx.size()) || !valid_found ; k++){
-      }catch(...){
+    size_t i = idx[k];
-        timings[i] = INFINITY;
+    if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
-      }
+      times.push_back(INFINITY);
      continue;
    }
    std::list<driver::Event> events;
    try{
      templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
      queue_.synchronize();
      times.push_back(1e-9*std::accumulate(events.begin(), events.end(), 0, &time_event));
      valid_found = true;
    }catch(...){
      times.push_back(INFINITY);
    }
    //Fill the override
    std::vector<int_t> x = templates_[0]->input_sizes(expr.x());
    hardcoded_[x] = std::distance(timings.begin(),std::min_element(timings.begin(), timings.end()));
  }
-
+  //Fill the override
-  //Prediction
+  size_t label = idx[std::distance(times.begin(),std::min_element(times.begin(), times.end()))];
-  int label = 0;
+  labels_.insert({x, label});
-  if(expr.dispatcher_options().label>=0)
+  templates_[label]->enqueue(queue_, program, tools::to_string(label), expr);
    label = expr.dispatcher_options().label;
  else  if(hardcoded_.find(x)!=hardcoded_.end())
    label = hardcoded_.at(x);
  else if(predictor_.get())
  {
    std::vector<float> predictions = predictor_->predict(x);
    do{
        label = std::distance(predictions.begin(),std::max_element(predictions.begin(), predictions.end()));
        predictions[label] = 0;
    }while(templates_[label]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE);
  }
  //Execution
  if(templates_[label]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE)
    throw operation_not_supported_exception("Running this operation would require an overly large temporary.");
  return templates_[label]->enqueue(queue_, program, tools::to_string(label), expr);
 }
 profiles::value_type::templates_container const & profiles::value_type::templates() const