#include #include #include #include "triton/driver/device.h" #include class timer{ typedef std::chrono::high_resolution_clock high_resolution_clock; typedef std::chrono::nanoseconds nanoseconds; public: explicit timer(bool run = false) { if (run) start(); } void start() { _start = high_resolution_clock::now(); } nanoseconds get() const { return std::chrono::duration_cast(high_resolution_clock::now() - _start); } private: high_resolution_clock::time_point _start; }; template T min(std::vector x) { return *std::min_element(x.begin(), x.end()); } template double bench(OP const & op, SYNC const & sync, triton::driver::device const & device) { timer tmr; std::vector times; double total_time = 0; op(); sync(); while(total_time*1e-9 < 1e-3){ float norm = 1; tmr.start(); op(); sync(); times.push_back(norm*tmr.get().count()); total_time+=times.back(); } return min(times); } // helper function to print a tuple of any size template struct TuplePrinter { static void print(const Tuple& t) { TuplePrinter::print(t); std::cout << ", " << std::get(t); } }; template struct TuplePrinter { static void print(const Tuple& t) { std::cout << std::get<0>(t); } }; template void print(const std::tuple& t) { std::cout << "("; TuplePrinter::print(t); std::cout << ")\n"; }