#include "triton/driver/backend.h" #include "triton/driver/stream.h" #include "dot.h" int main() { // initialize default compute device auto context = triton::driver::backend::contexts::get_default(); triton::driver::stream* stream = triton::driver::stream::create(context); // shapes to benchmark typedef std::tuple, bool, bool, int, int, int> config_t; std::vector configs; for(auto ord: std::vector>{{1, 0}}) for(auto x: std::vector>{{false, false}}){ std::vector tmp = { // config_t{ord, x[0], x[1], 512, 512, 512}, config_t{ord, x[0], x[1], 1024, 1024, 1024}, // config_t{ord, x[0], x[1], 127008, 768, 576}, // config_t{ord, x[0], x[1], 8192, 8192, 8192} // config_t{ord, x[0], x[1], 16, 2048, 2048}, // config_t{ord, x[0], x[1], 32, 2048, 2048}, // config_t{ord, x[0], x[1], 64, 2048, 2048}, // config_t{ord, x[0], x[1], 128, 2048, 2048}, // config_t{ord, x[0], x[1], 7000, 2048, 2048}, // config_t{ord, x[0], x[1], 16, 4096, 4096}, // config_t{ord, x[0], x[1], 32, 4096, 4096}, // config_t{ord, x[0], x[1], 64, 4096, 4096}, // config_t{ord, x[0], x[1], 128, 4096, 4096}, // config_t{ord, x[0], x[1], 7000, 4096, 4096} }; configs.insert(configs.end(), tmp.begin(), tmp.end()); } // does the work std::vector ord; bool AT, BT; int32_t M, N, K; for(const auto& c: configs){ std::tie(ord, AT, BT, M, N, K) = c; std::cout << "// " << c ; for(auto perf: bench_dot(stream, FLOAT, AT, BT, M, N, K, ord, ord)) std::cout << ", " << perf << std::flush; std::cout << std::endl; } }