Files
triton/tests/unit/reduce.cc
Philippe Tillet af080740f2 [GENERAL] Merged v1.0alpha into master. Added features are:
- A100 support via mma.16816
- Thread swizzling for conflict-free shared memory accesses without
padding
- Complete overhaul of the LLVM code generation in
codegen/selection/generator.cc to remove overengineering
- Added debugging capabilities in the Python binding
- Compilation error for kernels that spill
2021-01-11 19:23:24 -05:00

46 lines
1.3 KiB
C++

#include <iomanip>
#include <cstring>
#include <sstream>
#include <cstdio>
#include <functional>
#include "triton/driver/backend.h"
#include "triton/driver/stream.h"
#include "triton/tools/bench.hpp"
#include "triton/external/half.hpp"
#include "triton/runtime/function.h"
#include "cuda/cublas.h"
#include "reduce.h"
#include "util.h"
int main() {
// initialize default compute device
auto context = triton::driver::backend::contexts::get_default();
triton::driver::stream* stream = triton::driver::stream::create(context->backend());
// shapes to benchmark
typedef std::tuple<std::vector<int>, int, reduce_op_t> config_t;
std::vector<config_t> configs = {
config_t{{64}, 0, ADD},
config_t{{128}, 0, MIN},
config_t{{32, 32}, 0, MAX},
config_t{{32, 32}, 1, ADD},
config_t{{32, 64}, 0, ADD},
config_t{{64, 32}, 1, ADD},
config_t{{8, 8, 4}, 2, ADD},
config_t{{8, 8, 4}, 0, ADD},
config_t{{8, 8, 4}, 1, ADD}
};
// does the work
int axis;
std::vector<int> shape;
reduce_op_t op;
for(const auto& c: configs){
std::tie(shape, axis, op) = c;
std::cout << "Testing " << c << " ... " << std::flush;
if(do_test(context, stream, shape, axis, op, 1))
std::cout << " Pass! " << std::endl;
else
std::cout << " Fail! " << std::endl;
}
}