Inference: now benchmarking real time instead of kernel time

This commit is contained in:
Philippe Tillet
2016-10-06 17:49:35 -04:00
parent 33f309d496
commit be4d70d6c1
3 changed files with 43 additions and 20 deletions

View File

@@ -90,20 +90,13 @@ class Timer
public:
explicit Timer(bool run = false)
{
if (run)
start();
}
{ if (run) start(); }
void start()
{
_start = high_resolution_clock::now();
}
{ _start = high_resolution_clock::now(); }
nanoseconds get() const
{
return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start);
}
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
private:
high_resolution_clock::time_point _start;

View File

@@ -0,0 +1,33 @@
#ifndef ISAAC_TOOLS_CPP_TIMER_HPP
#define ISAAC_TOOLS_CPP_TIMER_HPP
#include <chrono>
namespace isaac
{
namespace tools
{
class Timer
{
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::nanoseconds nanoseconds;
public:
explicit Timer(bool run = false)
{ if (run) start(); }
void start()
{ _start = high_resolution_clock::now(); }
nanoseconds get() const
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
private:
high_resolution_clock::time_point _start;
};
}
}
#endif

View File

@@ -38,17 +38,12 @@
#include "isaac/jit/syntax/engine/process.h"
#include "isaac/tools/sys/getenv.hpp"
#include "isaac/tools/cpp/string.hpp"
#include "isaac/tools/cpp/timer.hpp"
namespace isaac
{
namespace runtime
{
static long time_event(long sum, driver::Event const & e)
{
return sum + e.elapsed_time();
}
driver::Program const & profiles::value_type::init(runtime::execution_handler const & expression)
{
driver::Context & context = (driver::Context&)expression.x().context();
@@ -96,23 +91,24 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
}
//Not cached
tools::Timer tmr;
std::vector<double> times;
std::vector<float> perf = predictor_->predict(x);
std::vector<size_t> idx(perf.size());
std::iota(idx.begin(), idx.end(), 0);
std::sort(idx.begin(), idx.end(), [&perf](size_t i1, size_t i2) {return perf[i1] > perf[i2];});
bool valid_found = false;
for(size_t k = 0 ; k < std::min<size_t>(idx.size(), idx.size()) || !valid_found ; k++){
for(size_t k = 0 ; k < std::min<size_t>(5, idx.size()) || !valid_found ; k++){
size_t i = idx[k];
if(templates_[i]->temporary_workspace(expr.x()) > MAX_TEMPORARY_WORKSPACE){
times.push_back(INFINITY);
continue;
}
std::list<driver::Event> events;
try{
templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x(), runtime::execution_options_type(0, &events)));
tmr.start();
templates_[i]->enqueue(queue_, program, tools::to_string(i), runtime::execution_handler(expr.x()));
queue_.synchronize();
times.push_back(1e-9*std::accumulate(events.begin(), events.end(), 0, &time_event));
times.push_back(1e-9*tmr.get().count());
valid_found = true;
}catch(...){
times.push_back(INFINITY);
@@ -121,6 +117,7 @@ void profiles::value_type::execute(runtime::execution_handler const & expr)
//Fill the override
size_t label = idx[std::distance(times.begin(),std::min_element(times.begin(), times.end()))];
labels_.insert({x, label});
// std::cout << label << std::endl;
templates_[label]->enqueue(queue_, program, tools::to_string(label), expr);
}