From ec2cb2155eca6b5f964b055f2c530769f0577551 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Thu, 9 Apr 2020 01:10:11 -0400 Subject: [PATCH] [TESTS] Simplified testing of half-precision transposes --- tests/bench/copy.cc | 12 ++++---- tests/common/copy.h | 61 +++++++++++++++++++++++++++++++++-------- tests/common/src/copy.h | 3 +- tests/common/util.h | 4 +-- tests/unit/copy.cc | 2 +- 5 files changed, 61 insertions(+), 21 deletions(-) diff --git a/tests/bench/copy.cc b/tests/bench/copy.cc index f1252797e..c8c56210a 100644 --- a/tests/bench/copy.cc +++ b/tests/bench/copy.cc @@ -12,16 +12,16 @@ int main() { typedef std::tuple, std::vector, std::vector> config_t; std::vector configs = { {{4096*4096}, {0}, {0}}, - {{4096, 4096}, {0, 1}, {1, 0}}, + {{4096, 4096}, {0, 1}, {0, 1}}, {{4096, 4096}, {0, 1}, {1, 0}}, {{4096, 4096}, {1, 0}, {0, 1}}, - {{4096, 4096}, {0, 1}, {0, 1}}, + {{4096, 4096}, {1, 0}, {1, 0}}, {{256, 256, 256}, {0, 1, 2}, {0, 1, 2}}, {{256, 256, 256}, {0, 1, 2}, {0, 2, 1}}, {{256, 256, 256}, {1, 0, 2}, {1, 2, 0}}, - {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}, - {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}}, - {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}} + {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}} +// {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}}, +// {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}} }; // does the work std::vector shape; @@ -29,7 +29,7 @@ int main() { for(const auto& c: configs){ std::tie(shape, ord_x, ord_y) = c; std::cout << "// " << c << std::flush; - for(auto perf: bench_copy_nd(stream, shape, ord_x, ord_y)) + for(auto perf: bench_copy_nd(stream, HALF, shape, ord_x, ord_y)) std::cout << ", " << perf << std::flush; std::cout << std::endl; } diff --git a/tests/common/copy.h b/tests/common/copy.h index 811e5d7a4..0398be7c9 100644 --- a/tests/common/copy.h +++ b/tests/common/copy.h @@ -2,6 +2,7 @@ #include "triton/driver/stream.h" #include "triton/runtime/function.h" #include "triton/tools/bench.hpp" +#include "triton/external/half.hpp" #include "util.h" int32_t off(const std::vector& idx, const std::vector& strides) { @@ -16,6 +17,13 @@ enum run_mode_t { TEST }; +enum dtype_t { + FLOAT, + HALF, + DOUBLE +}; + + template void cc_copy_nd(const std::vector& x, std::vector& y, const std::vector& shape, @@ -46,13 +54,28 @@ void cc_copy_nd(const std::vector& x, std::vector& y, y[off({i, j, k}, y_strides)] = x[off({i, j, k}, x_strides)]; } +template +struct to_string; + +template<> struct to_string{ + static constexpr const char* value = "half"; +}; + +template<> struct to_string{ + static constexpr const char* value = "float"; +}; + +template<> struct to_string{ + static constexpr const char* value = "double"; +}; + +template void triton_copy_nd(drv::stream* stream, const std::vector& shape, const std::vector& x_order, const std::vector& y_order, std::vector> TS, run_mode_t mode, std::vector& bench, bool &test) { - typedef float NumericT; - std::string ty = "float"; - size_t dtsize = sizeof(NumericT); + std::string ty = to_string::value; + size_t dtsize = sizeof(T); drv::context* context = stream->context(); // rank @@ -107,11 +130,11 @@ void triton_copy_nd(drv::stream* stream, const std::vector& shape, // test triton if(mode == TEST){ - std::vector hx(size); - std::vector hy(size); - std::vector ry(size); + std::vector hx(size); + std::vector hy(size); + std::vector ry(size); for(size_t i = 0; i < hx.size(); i++) - hx[i] = static_cast((float)rand()/RAND_MAX); + hx[i] = static_cast((float)rand()/RAND_MAX); stream->write(&*dx, true, 0, hx); function(args, grid, stream); stream->synchronize(); @@ -121,15 +144,23 @@ void triton_copy_nd(drv::stream* stream, const std::vector& shape, } } -std::vector bench_copy_nd(drv::stream* stream, const std::vector& shape, +std::vector bench_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector& shape, const std::vector& x_order, const std::vector& y_order) { std::vector bench; bool test; - triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test); + switch(dtype){ + case HALF: + triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test); + break; + case FLOAT: + triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test); + break; + default: break; + } return bench; } -bool test_copy_nd(drv::stream* stream, const std::vector& shape, +bool test_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector& shape, const std::vector& TS, const std::vector& x_order, const std::vector& y_order) { std::vector bench; @@ -137,6 +168,14 @@ bool test_copy_nd(drv::stream* stream, const std::vector& shape, std::vector> TSS; for(int32_t d: TS) TSS.push_back({std::to_string(d)}); - triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test); + switch(dtype){ + case HALF: + triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test); + break; + case FLOAT: + triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test); + break; + default: break; + } return test; } diff --git a/tests/common/src/copy.h b/tests/common/src/copy.h index f45f7a5cd..cd35cbf4e 100644 --- a/tests/common/src/copy.h +++ b/tests/common/src/copy.h @@ -26,9 +26,10 @@ void copy2d(TYPE * X __noalias __readonly __aligned(16), int pid1 = get_program_id(1); int rs0[TS0] = pid0 * TS0 + 0 ... TS0; int rs1[TS1] = pid1 * TS1 + 0 ... TS1; + bool in_bounds[TS0, TS1] = rs0[:, newaxis] < S0 && rs1[newaxis, :] < S1; TYPE* px[TS0, TS1] = X + rs0[:, newaxis] * STRIDE_XS0 + rs1[newaxis, :] * STRIDE_XS1; TYPE* py[TS0, TS1] = Y + rs0[:, newaxis] * STRIDE_YS0 + rs1[newaxis, :] * STRIDE_YS1; - *py = *px; + *?(in_bounds)py = *?(in_bounds)px; } )"; diff --git a/tests/common/util.h b/tests/common/util.h index 89489f889..56f98acde 100644 --- a/tests/common/util.h +++ b/tests/common/util.h @@ -46,8 +46,8 @@ inline std::vector> tile_nd(size_t rank) { if(rank == 1) return {{"128", "256", "512", "1024"}}; if(rank == 2) - return {{"64"}, - {"64"}}; + return {{"16", "32", "64"}, + {"16", "32", "64"}}; if(rank == 3) return {{"4", "16", "32"}, {"4", "16", "32"}, diff --git a/tests/unit/copy.cc b/tests/unit/copy.cc index ec586066c..0598ff21f 100644 --- a/tests/unit/copy.cc +++ b/tests/unit/copy.cc @@ -50,7 +50,7 @@ int main() { bool result = true; for(const auto& c: configs){ std::tie(shape, tile, ord_x, ord_y) = c; - bool pass = test_copy_nd(stream, shape, tile, ord_x, ord_y); + bool pass = test_copy_nd(stream, FLOAT, shape, tile, ord_x, ord_y); result = result && pass; std::cout << "// " << c << ", " << pass << std::endl; }