From 2781cdcf93acad6a582606e323bcf27a6d6eebc7 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Tue, 10 Sep 2019 15:54:16 -0400 Subject: [PATCH] [lang] added templates for reductions --- include/triton/lang/token.h | 3 ++ include/triton/runtime/function.h | 2 +- lib/lang/parser.cc | 25 ++++++++++--- tests/common/src/reduce.h | 27 ++++++++++++++ tests/common/util.h | 7 ++++ tests/unit/CMakeLists.txt | 2 +- tests/unit/dot.cc | 11 ++---- tests/unit/reduce.cc | 62 +++++++++++++++++++++++++++++++ 8 files changed, 124 insertions(+), 15 deletions(-) create mode 100644 tests/common/src/reduce.h create mode 100644 tests/unit/reduce.cc diff --git a/include/triton/lang/token.h b/include/triton/lang/token.h index 1690ba246..5724c50e3 100644 --- a/include/triton/lang/token.h +++ b/include/triton/lang/token.h @@ -180,6 +180,9 @@ public: PLUS, MINUS, CAST, + REDUCE_ADD, + REDUCE_MAX, + REDUCE_MIN, // For preprocessor PP_IF, diff --git a/include/triton/runtime/function.h b/include/triton/runtime/function.h index 96ec35ef7..42ecd69f9 100644 --- a/include/triton/runtime/function.h +++ b/include/triton/runtime/function.h @@ -70,7 +70,7 @@ public: struct options_space_t { typedef std::pair> define_t; std::vector defines; - std::vector num_warps; + std::vector num_warps; }; struct options_t { diff --git a/lib/lang/parser.cc b/lib/lang/parser.cc index fed1422fc..6c669208f 100644 --- a/lib/lang/parser.cc +++ b/lib/lang/parser.cc @@ -453,14 +453,27 @@ Expr* Parser::ParseSubScripting(Expr* lhs) { TileType::ShapeInt shape; size_t i = 0; const Token* tok; + std::vector> redList; do { tok = ts_.Next(); - if(tok->tag_ == ':') - shape.push_back(lhsShape[i++]); - else if(tok->tag_ == Token::NEWAXIS) - shape.push_back(1); - else - Error(tok, "only ':' and newaxis are supported in subscripts"); + switch(tok->tag_) { + case ':': + shape.push_back(lhsShape[i++]); + break; + + case Token::NEWAXIS: + shape.push_back(1); + break; + +// case Token::ADD: +// case Token::SUB: +// redList.push_back({i, tok->tag_}); +// break; + + default: + Error(tok, "Unexpected subscript symbol encountered at dimension %d", i); + break; + } }while(ts_.Try(',')); ts_.Expect(']'); if(lhsShape.size() > i) diff --git a/tests/common/src/reduce.h b/tests/common/src/reduce.h new file mode 100644 index 000000000..a9788f340 --- /dev/null +++ b/tests/common/src/reduce.h @@ -0,0 +1,27 @@ +namespace src { + + const char *reduce1d = +R"( +void reduce1d(TYPE * X __noalias __readonly __aligned(16), + TYPE * Y __noalias __readonly __aligned(16), + int N) { +} +)"; + + + const char *reduce2d = +R"( +void reduce2d(TYPE * X __noalias __readonly __aligned(16), + TYPE * Y __noalias __writeonly __aligned(16), + int M, int N, int ldx) { + int ridm = get_program_id(0); + int ridn = get_program_id(1); + int rm[TM] = ridm * TM + 0 ... TM; + int rn[TN] = ridn * TN + 0 ... TN; + TYPE* px[TM, TN] = X + rm[:, newaxis] + rn[newaxis, :] * ldx; + TYPE* py[TM, TN] = Y + rm[:, newaxis]; + *py = (*px)[:, +]; +} +)"; + +} diff --git a/tests/common/util.h b/tests/common/util.h index d8ffef090..e5cfef7b8 100644 --- a/tests/common/util.h +++ b/tests/common/util.h @@ -31,6 +31,13 @@ enum order_t { COLMAJOR }; +template +void init_rand(std::vector& x) { + for(size_t i = 0; i < x.size(); i++) + x[i] = static_cast((double)rand()/RAND_MAX); +} + + namespace aux{ template struct seq{}; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 78fbc79d1..3efbdd71f 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(PROG dot) +foreach(PROG dot reduce) set(TARGET unit_${PROG}) add_executable(${TARGET} ${PROG}.cc) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME ${TARGET}) diff --git a/tests/unit/dot.cc b/tests/unit/dot.cc index 69b8cf2d7..b08eb13ba 100644 --- a/tests/unit/dot.cc +++ b/tests/unit/dot.cc @@ -50,7 +50,7 @@ void cpu_ref(bool AT_, bool BT_, size_t M, size_t N, size_t K, } -bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_t K, int32_t TM, int32_t TN, int32_t TK, size_t nwarp){ +bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_t K, int32_t TM, int32_t TN, int32_t TK, int nwarp){ typedef float NumericT; std::string ty = "float"; size_t dt_nbytes = sizeof(NumericT); @@ -62,12 +62,9 @@ bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_ int32_t ldb = BT ? N : K; int32_t ldc = M; srand(0); - for(size_t i = 0; i < ha.size(); i++) - ha[i] = static_cast((float)rand()/RAND_MAX); - for(size_t i = 0; i < hb.size(); i++) - hb[i] = static_cast((float)rand()/RAND_MAX); - for(size_t i = 0; i < hc.size(); i++) - hc[i] = static_cast((double)0); + init_rand(ha); + init_rand(hb); + init_rand(hc); auto dc = std::shared_ptr(drv::buffer::create(context, hc.size()*dt_nbytes)); auto da = std::shared_ptr(drv::buffer::create(context, ha.size()*dt_nbytes)); auto db = std::shared_ptr(drv::buffer::create(context, hb.size()*dt_nbytes)); diff --git a/tests/unit/reduce.cc b/tests/unit/reduce.cc new file mode 100644 index 000000000..59b574c4d --- /dev/null +++ b/tests/unit/reduce.cc @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include "triton/driver/backend.h" +#include "triton/driver/stream.h" +#include "triton/tools/bench.hpp" +#include "triton/external/half.hpp" +#include "triton/runtime/function.h" +#include "src/reduce.h" +#include "cuda/cublas.h" +#include "util.h" + +namespace drv = triton::driver; +namespace rt = triton::runtime; + + +bool do_test(drv::stream* stream, int M, int N, std::string op, int nwarp){ + typedef float NumericT; + std::string ty = "float"; + size_t dt_nbytes = sizeof(NumericT); + drv::context* context = stream->context(); + std::vector hy(M); + std::vector hx(M*N); + srand(0); + init_rand(hy); + init_rand(hx); + auto dy = std::shared_ptr(drv::buffer::create(context, hy.size()*dt_nbytes)); + auto dx = std::shared_ptr(drv::buffer::create(context, hx.size()*dt_nbytes)); + stream->write(&*dy, true, 0, hy); + stream->write(&*dx, true, 0, hx); + rt::function::options_space_t opt; + opt.defines.push_back({"TYPE", {ty}}); + opt.defines.push_back({"TM", {std::to_string(M)}}); + opt.defines.push_back({"TN", {std::to_string(N)}}); + opt.num_warps = {nwarp}; + rt::function function(src::reduce2d, opt); + function({&*dy, &*dx, M, N, M}, grid2d(M, N), stream); + stream->synchronize(); +} + +int main() { + // initialize default compute device + auto context = triton::driver::backend::contexts::get_default(); + triton::driver::stream* stream = triton::driver::stream::create(context); + // shapes to benchmark + typedef std::tuple config_t; + std::vector configs = { + config_t{32, 32, "+"} + }; + // does the work + int M, N; + std::string op; + for(const auto& c: configs){ + std::tie(M, N, op) = c; + std::cout << "Testing " << c << " ... " << std::flush; + if(do_test(stream, M, N, op, 1)) + std::cout << " Pass! " << std::endl; + else + std::cout << " Fail! " << std::endl; + } +}