[lang] added templates for reductions
This commit is contained in:
@@ -180,6 +180,9 @@ public:
|
||||
PLUS,
|
||||
MINUS,
|
||||
CAST,
|
||||
REDUCE_ADD,
|
||||
REDUCE_MAX,
|
||||
REDUCE_MIN,
|
||||
|
||||
// For preprocessor
|
||||
PP_IF,
|
||||
|
@@ -70,7 +70,7 @@ public:
|
||||
struct options_space_t {
|
||||
typedef std::pair<std::string, std::vector<std::string>> define_t;
|
||||
std::vector<define_t> defines;
|
||||
std::vector<size_t> num_warps;
|
||||
std::vector<int> num_warps;
|
||||
};
|
||||
|
||||
struct options_t {
|
||||
|
@@ -453,14 +453,27 @@ Expr* Parser::ParseSubScripting(Expr* lhs) {
|
||||
TileType::ShapeInt shape;
|
||||
size_t i = 0;
|
||||
const Token* tok;
|
||||
std::vector<std::pair<int, int>> redList;
|
||||
do {
|
||||
tok = ts_.Next();
|
||||
if(tok->tag_ == ':')
|
||||
switch(tok->tag_) {
|
||||
case ':':
|
||||
shape.push_back(lhsShape[i++]);
|
||||
else if(tok->tag_ == Token::NEWAXIS)
|
||||
break;
|
||||
|
||||
case Token::NEWAXIS:
|
||||
shape.push_back(1);
|
||||
else
|
||||
Error(tok, "only ':' and newaxis are supported in subscripts");
|
||||
break;
|
||||
|
||||
// case Token::ADD:
|
||||
// case Token::SUB:
|
||||
// redList.push_back({i, tok->tag_});
|
||||
// break;
|
||||
|
||||
default:
|
||||
Error(tok, "Unexpected subscript symbol encountered at dimension %d", i);
|
||||
break;
|
||||
}
|
||||
}while(ts_.Try(','));
|
||||
ts_.Expect(']');
|
||||
if(lhsShape.size() > i)
|
||||
|
27
tests/common/src/reduce.h
Normal file
27
tests/common/src/reduce.h
Normal file
@@ -0,0 +1,27 @@
|
||||
namespace src {
|
||||
|
||||
const char *reduce1d =
|
||||
R"(
|
||||
void reduce1d(TYPE * X __noalias __readonly __aligned(16),
|
||||
TYPE * Y __noalias __readonly __aligned(16),
|
||||
int N) {
|
||||
}
|
||||
)";
|
||||
|
||||
|
||||
const char *reduce2d =
|
||||
R"(
|
||||
void reduce2d(TYPE * X __noalias __readonly __aligned(16),
|
||||
TYPE * Y __noalias __writeonly __aligned(16),
|
||||
int M, int N, int ldx) {
|
||||
int ridm = get_program_id(0);
|
||||
int ridn = get_program_id(1);
|
||||
int rm[TM] = ridm * TM + 0 ... TM;
|
||||
int rn[TN] = ridn * TN + 0 ... TN;
|
||||
TYPE* px[TM, TN] = X + rm[:, newaxis] + rn[newaxis, :] * ldx;
|
||||
TYPE* py[TM, TN] = Y + rm[:, newaxis];
|
||||
*py = (*px)[:, +];
|
||||
}
|
||||
)";
|
||||
|
||||
}
|
@@ -31,6 +31,13 @@ enum order_t {
|
||||
COLMAJOR
|
||||
};
|
||||
|
||||
template<class T>
|
||||
void init_rand(std::vector<T>& x) {
|
||||
for(size_t i = 0; i < x.size(); i++)
|
||||
x[i] = static_cast<T>((double)rand()/RAND_MAX);
|
||||
}
|
||||
|
||||
|
||||
|
||||
namespace aux{
|
||||
template<std::size_t...> struct seq{};
|
||||
|
@@ -1,4 +1,4 @@
|
||||
foreach(PROG dot)
|
||||
foreach(PROG dot reduce)
|
||||
set(TARGET unit_${PROG})
|
||||
add_executable(${TARGET} ${PROG}.cc)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME ${TARGET})
|
||||
|
@@ -50,7 +50,7 @@ void cpu_ref(bool AT_, bool BT_, size_t M, size_t N, size_t K,
|
||||
}
|
||||
|
||||
|
||||
bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_t K, int32_t TM, int32_t TN, int32_t TK, size_t nwarp){
|
||||
bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_t K, int32_t TM, int32_t TN, int32_t TK, int nwarp){
|
||||
typedef float NumericT;
|
||||
std::string ty = "float";
|
||||
size_t dt_nbytes = sizeof(NumericT);
|
||||
@@ -62,12 +62,9 @@ bool do_test(drv::stream* stream, bool AT, bool BT, int32_t M, int32_t N, int32_
|
||||
int32_t ldb = BT ? N : K;
|
||||
int32_t ldc = M;
|
||||
srand(0);
|
||||
for(size_t i = 0; i < ha.size(); i++)
|
||||
ha[i] = static_cast<NumericT>((float)rand()/RAND_MAX);
|
||||
for(size_t i = 0; i < hb.size(); i++)
|
||||
hb[i] = static_cast<NumericT>((float)rand()/RAND_MAX);
|
||||
for(size_t i = 0; i < hc.size(); i++)
|
||||
hc[i] = static_cast<NumericT>((double)0);
|
||||
init_rand(ha);
|
||||
init_rand(hb);
|
||||
init_rand(hc);
|
||||
auto dc = std::shared_ptr<drv::buffer>(drv::buffer::create(context, hc.size()*dt_nbytes));
|
||||
auto da = std::shared_ptr<drv::buffer>(drv::buffer::create(context, ha.size()*dt_nbytes));
|
||||
auto db = std::shared_ptr<drv::buffer>(drv::buffer::create(context, hb.size()*dt_nbytes));
|
||||
|
62
tests/unit/reduce.cc
Normal file
62
tests/unit/reduce.cc
Normal file
@@ -0,0 +1,62 @@
|
||||
#include <iomanip>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <cstdio>
|
||||
#include "triton/driver/backend.h"
|
||||
#include "triton/driver/stream.h"
|
||||
#include "triton/tools/bench.hpp"
|
||||
#include "triton/external/half.hpp"
|
||||
#include "triton/runtime/function.h"
|
||||
#include "src/reduce.h"
|
||||
#include "cuda/cublas.h"
|
||||
#include "util.h"
|
||||
|
||||
namespace drv = triton::driver;
|
||||
namespace rt = triton::runtime;
|
||||
|
||||
|
||||
bool do_test(drv::stream* stream, int M, int N, std::string op, int nwarp){
|
||||
typedef float NumericT;
|
||||
std::string ty = "float";
|
||||
size_t dt_nbytes = sizeof(NumericT);
|
||||
drv::context* context = stream->context();
|
||||
std::vector<NumericT> hy(M);
|
||||
std::vector<NumericT> hx(M*N);
|
||||
srand(0);
|
||||
init_rand(hy);
|
||||
init_rand(hx);
|
||||
auto dy = std::shared_ptr<drv::buffer>(drv::buffer::create(context, hy.size()*dt_nbytes));
|
||||
auto dx = std::shared_ptr<drv::buffer>(drv::buffer::create(context, hx.size()*dt_nbytes));
|
||||
stream->write(&*dy, true, 0, hy);
|
||||
stream->write(&*dx, true, 0, hx);
|
||||
rt::function::options_space_t opt;
|
||||
opt.defines.push_back({"TYPE", {ty}});
|
||||
opt.defines.push_back({"TM", {std::to_string(M)}});
|
||||
opt.defines.push_back({"TN", {std::to_string(N)}});
|
||||
opt.num_warps = {nwarp};
|
||||
rt::function function(src::reduce2d, opt);
|
||||
function({&*dy, &*dx, M, N, M}, grid2d(M, N), stream);
|
||||
stream->synchronize();
|
||||
}
|
||||
|
||||
int main() {
|
||||
// initialize default compute device
|
||||
auto context = triton::driver::backend::contexts::get_default();
|
||||
triton::driver::stream* stream = triton::driver::stream::create(context);
|
||||
// shapes to benchmark
|
||||
typedef std::tuple<int, int, std::string> config_t;
|
||||
std::vector<config_t> configs = {
|
||||
config_t{32, 32, "+"}
|
||||
};
|
||||
// does the work
|
||||
int M, N;
|
||||
std::string op;
|
||||
for(const auto& c: configs){
|
||||
std::tie(M, N, op) = c;
|
||||
std::cout << "Testing " << c << " ... " << std::flush;
|
||||
if(do_test(stream, M, N, op, 1))
|
||||
std::cout << " Pass! " << std::endl;
|
||||
else
|
||||
std::cout << " Fail! " << std::endl;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user