Better benchmarking

2014-10-30 13:04:33 -04:00
parent de48ccc7b1
commit 71b4bde3ec
2 changed files with 44 additions and 17 deletions
--- a/bench/blas.cpp
+++ b/bench/blas.cpp
@@ -12,6 +12,12 @@
 namespace ad = atidlas;
 typedef atidlas::atidlas_int_t int_t;

+template<class T>
+float bandwidth(std::size_t N, float t)
+{
+    return N * sizeof(T) * 1e-9 / t;
+}
+
 template<class T>
 void bench(std::vector<int_t> BLAS1_N, std::map<std::string, ad::tools::shared_ptr<ad::model> > & models)
 {
@@ -34,30 +40,39 @@ void bench(std::vector<int_t> BLAS1_N, std::map<std::string, ad::tools::shared_p
  viennacl::backend::finish();\
  float resname = ad::tools::median(times);

-  //BLAS1
-  {
-    for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
-    {
-      viennacl::vector<T> x(*it), y(*it), z(*it);
-      viennacl::scheduler::statement statement(z, viennacl::op_assign(), x + y);
-      BENCHMARK(models["vector-axpy-float32"]->execute(statement), time_model);
-      BENCHMARK(models["vector-axpy-float32"]->execute(statement, true), time_unique_kernel);
-      models["vector-axpy-float32"]->tune(statement);
-      BENCHMARK(models["vector-axpy-float32"]->execute(statement), time_opt);
+  std::cout << "#N PerfNaive PerfModel PerfOpt" << std::endl;

-      std::cout << *it << " " << 3*(*it)*sizeof(T)*1e-9/time_unique_kernel << " " << 3*(*it)*sizeof(T)*1e-9/time_model << " " << 3*(*it)*sizeof(T)*1e-9/time_opt << std::endl;
-
-    }
-  }
+#define BENCH(declarations, statement_op, sizes, measure, N, key) \
+    std::cout << "#"  << key << std::endl;\
+    for(std::vector<int_t>::const_iterator it = sizes.begin() ; it != sizes.end() ; ++it)\
+    {\
+      declarations;\
+      viennacl::scheduler::statement statement(statement_op);\
+      BENCHMARK(models["vector-axpy-float32"]->execute(statement), time_model);\
+      BENCHMARK(models["vector-axpy-float32"]->execute(statement, true), time_unique_kernel);\
+      models["vector-axpy-float32"]->tune(statement);\
+      BENCHMARK(models["vector-axpy-float32"]->execute(statement), time_opt);\
+      std::cout << *it << " " << measure<T>(N,time_unique_kernel) << " " << measure<T>(N,time_model) << " " << measure<T>(N,time_opt) << std::endl;\
+    }\

+#define DECLARE(type, ...) type __VA_ARGS__
+#define ARGS(...) __VA_ARGS__

+  BENCH(DECLARE(viennacl::vector<T>, x(*it), y(*it), z(*it)), ARGS(z, viennacl::op_assign(), x + y), BLAS1_N, bandwidth, 3*(*it), "vector-axpy-float32");
+  std::cout << std::endl;
+  std::cout << std::endl;
+  BENCH(DECLARE(viennacl::vector<T>, x(*it), y(*it), z(*it)), ARGS(z, viennacl::op_assign(), x + y), BLAS1_N, bandwidth, 3*(*it), "reduction-float32");
+  std::cout << std::endl;
+  std::cout << std::endl;
+  BENCH(DECLARE(viennacl::vector<T>, x(*it), y(*it), z(*it)), ARGS(z, viennacl::op_assign(), x + y), BLAS1_N, bandwidth, 3*(*it), "row-wise-reduction-float32");
+  std::cout << std::endl;
+  std::cout << std::endl;
 }

 std::vector<int_t> create_log_range(int_t min, int_t max, int_t N)
 {
  std::vector<int_t> res(N);
  for(int_t i = 0 ; i < N ; ++i)
-    //res[i] = std::exp(std::log(min) + float(std::log(max) - std::log(min)*i)/N);
    res[i] = std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N);
  return res;
 }
@@ -67,12 +82,13 @@ int main(int argc, char* argv[])
  if(argc != 2)
  {
      std::cerr << "Usage : PROG model_file" << std::endl;
+      exit(EXIT_FAILURE);
  }
  std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import(argv[1]);

  std::vector<int_t> BLAS1_N = create_log_range(1e3, 2e7, 50);

-  std::cout << "Benchmark : BLAS" << std::endl;
-  std::cout << "----------------" << std::endl;
+  std::cout << "#Benchmark : BLAS" << std::endl;
+  std::cout << "#----------------" << std::endl;
  bench<float>(BLAS1_N, models);
 }