#include #include #include #include #include #include "triton/driver/backend.h" #include "triton/driver/stream.h" #include "triton/tools/bench.hpp" #include "triton/external/half.hpp" #include "triton/runtime/function.h" #include "cuda/cublas.h" #include "reduce.h" #include "util.h" int main() { // initialize default compute device auto context = triton::driver::backend::contexts::get_default(); triton::driver::stream* stream = triton::driver::stream::create(context); // shapes to benchmark typedef std::tuple, int, reduce_op_t> config_t; std::vector configs = { config_t{{8, 8, 4}, 2, ADD}, config_t{{32}, 0, ADD}, config_t{{32, 32}, 0, MAX}, config_t{{32, 32}, 1, ADD}, config_t{{32, 64}, 0, ADD}, config_t{{64, 32}, 1, ADD} }; // does the work int axis; std::vector shape; reduce_op_t op; for(const auto& c: configs){ std::tie(shape, axis, op) = c; std::cout << "Testing " << c << " ... " << std::flush; if(do_test(stream, shape, axis, op, 1)) std::cout << " Pass! " << std::endl; else std::cout << " Fail! " << std::endl; } }