[TESTS] Simplified testing of half-precision transposes

2020-04-09 01:10:11 -04:00
parent 4ae0e28b32
commit ec2cb2155e
5 changed files with 61 additions and 21 deletions
--- a/tests/bench/copy.cc
+++ b/tests/bench/copy.cc
@@ -12,16 +12,16 @@ int main() {
  typedef std::tuple<std::vector<int>, std::vector<int>, std::vector<int>> config_t;
  std::vector<config_t> configs = {
    {{4096*4096}, {0}, {0}},
-    {{4096, 4096}, {0, 1}, {1, 0}},
+    {{4096, 4096}, {0, 1}, {0, 1}},
    {{4096, 4096}, {0, 1}, {1, 0}},
    {{4096, 4096}, {1, 0}, {0, 1}},
-    {{4096, 4096}, {0, 1}, {0, 1}},
+    {{4096, 4096}, {1, 0}, {1, 0}},
    {{256, 256, 256}, {0, 1, 2}, {0, 1, 2}},
    {{256, 256, 256}, {0, 1, 2}, {0, 2, 1}},
    {{256, 256, 256}, {1, 0, 2}, {1, 2, 0}},
-    {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}},
-    {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
-    {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
+    {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}
+//    {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
+//    {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
  };
  // does the work
  std::vector<int32_t> shape;
@@ -29,7 +29,7 @@ int main() {
  for(const auto& c: configs){
    std::tie(shape, ord_x, ord_y) = c;
    std::cout << "// " << c << std::flush;
-    for(auto perf: bench_copy_nd(stream, shape, ord_x, ord_y))
+    for(auto perf: bench_copy_nd(stream, HALF, shape, ord_x, ord_y))
      std::cout << ", " << perf << std::flush;
    std::cout << std::endl;
  }
--- a/tests/common/copy.h
+++ b/tests/common/copy.h
@@ -2,6 +2,7 @@
 #include "triton/driver/stream.h"
 #include "triton/runtime/function.h"
 #include "triton/tools/bench.hpp"
+#include "triton/external/half.hpp"
 #include "util.h"

 int32_t off(const std::vector<int32_t>& idx, const std::vector<int32_t>& strides) {
@@ -16,6 +17,13 @@ enum run_mode_t {
  TEST
 };

+enum dtype_t {
+  FLOAT,
+  HALF,
+  DOUBLE
+};
+
+
 template<class T>
 void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
                const std::vector<int32_t>& shape,
@@ -46,13 +54,28 @@ void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
      y[off({i, j, k}, y_strides)] = x[off({i, j, k}, x_strides)];
 }

+template<class T>
+struct to_string;
+
+template<> struct to_string<half_float::half>{
+  static constexpr const char* value = "half";
+};
+
+template<> struct to_string<float>{
+  static constexpr const char* value = "float";
+};
+
+template<> struct to_string<double>{
+  static constexpr const char* value = "double";
+};
+
+template<typename T>
 void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
                    const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order,
                    std::vector<std::vector<std::string>> TS,
                    run_mode_t mode, std::vector<double>& bench, bool &test) {
-  typedef float NumericT;
-  std::string ty = "float";
-  size_t dtsize = sizeof(NumericT);
+  std::string ty = to_string<T>::value;
+  size_t dtsize = sizeof(T);
  drv::context* context = stream->context();

  // rank
@@ -107,11 +130,11 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,

  // test triton
  if(mode == TEST){
-    std::vector<NumericT> hx(size);
-    std::vector<NumericT> hy(size);
-    std::vector<NumericT> ry(size);
+    std::vector<T> hx(size);
+    std::vector<T> hy(size);
+    std::vector<T> ry(size);
    for(size_t i = 0; i < hx.size(); i++)
-      hx[i] = static_cast<NumericT>((float)rand()/RAND_MAX);
+      hx[i] = static_cast<T>((float)rand()/RAND_MAX);
    stream->write(&*dx, true, 0, hx);
    function(args, grid, stream);
    stream->synchronize();
@@ -121,15 +144,23 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
  }
 }

-std::vector<double> bench_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
+std::vector<double> bench_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
                                  const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
  std::vector<double> bench;
  bool test;
-  triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test);
+  switch(dtype){
+    case HALF:
+      triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
+      break;
+    case FLOAT:
+      triton_copy_nd<float>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
+      break;
+    default: break;
+  }
  return bench;
 }

-bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
+bool test_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
                  const std::vector<int32_t>& TS,
                  const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
  std::vector<double> bench;
@@ -137,6 +168,14 @@ bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
  std::vector<std::vector<std::string>> TSS;
  for(int32_t d: TS)
    TSS.push_back({std::to_string(d)});
-  triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test);
+  switch(dtype){
+    case HALF:
+      triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
+      break;
+    case FLOAT:
+      triton_copy_nd<float>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
+      break;
+    default: break;
+  }
  return test;
 }
--- a/tests/common/src/copy.h
+++ b/tests/common/src/copy.h
@@ -26,9 +26,10 @@ void copy2d(TYPE * X __noalias __readonly __aligned(16),
  int pid1 = get_program_id(1);
  int rs0[TS0] = pid0 * TS0 + 0 ... TS0;
  int rs1[TS1] = pid1 * TS1 + 0 ... TS1;
+  bool in_bounds[TS0, TS1] = rs0[:, newaxis] < S0 && rs1[newaxis, :] < S1;
  TYPE* px[TS0, TS1] = X + rs0[:, newaxis] * STRIDE_XS0 + rs1[newaxis, :] * STRIDE_XS1;
  TYPE* py[TS0, TS1] = Y + rs0[:, newaxis] * STRIDE_YS0 + rs1[newaxis, :] * STRIDE_YS1;
-  *py = *px;
+  *?(in_bounds)py = *?(in_bounds)px;
 }
 )";

--- a/tests/common/util.h
+++ b/tests/common/util.h
@@ -46,8 +46,8 @@ inline std::vector<std::vector<std::string>> tile_nd(size_t rank) {
  if(rank == 1)
    return {{"128", "256", "512", "1024"}};
  if(rank == 2)
-    return {{"64"},
-            {"64"}};
+    return {{"16", "32", "64"},
+            {"16", "32", "64"}};
  if(rank == 3)
    return {{"4", "16", "32"},
            {"4", "16", "32"},
--- a/tests/unit/copy.cc
+++ b/tests/unit/copy.cc
@@ -50,7 +50,7 @@ int main() {
  bool result = true;
  for(const auto& c: configs){
    std::tie(shape, tile, ord_x, ord_y) = c;
-    bool pass = test_copy_nd(stream, shape, tile, ord_x, ord_y);
+    bool pass = test_copy_nd(stream, FLOAT, shape, tile, ord_x, ord_y);
    result = result && pass;
    std::cout << "// " << c << ", " << pass << std::endl;
  }