#include "triton/driver/backend.h" #include "triton/driver/stream.h" #include "conv.h" int main() { // initialize default compute device auto context = triton::driver::backend::contexts::get_default(); triton::driver::stream* stream = triton::driver::stream::create(context->backend()); // shapes to benchmark typedef std::tuple config_t; std::vector configs = { // {1, 56, 56, 64, 64, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 128, 128, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 256, 256, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 384, 384, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 512, 512, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 768, 768, 3, 3, 1, 1, 1, 1}, // {1, 56, 56, 1024, 1024, 3, 3, 1, 1, 1, 1}, // {1, 8, 8, 256, 256, 3, 3, 1, 1, 1, 1}, // {1, 16, 16, 256, 256, 3, 3, 1, 1, 1, 1}, // {1, 32, 32, 256, 256, 3, 3, 1, 1, 1, 1}, // {1, 64, 64, 256, 256, 3, 3, 1, 1, 1, 1}, {1, 64, 64, 4096, 4096, 1, 1, 0, 0, 1, 1}, // {1, 256, 256, 256, 256, 3, 3, 1, 1, 1, 1} }; int Z, H, W, CO, CI, R, S, pad_h, pad_w, stride_h, stride_w; for(const auto& c: configs){ std::tie(Z, H, W, CO, CI, R, S, pad_h, pad_w, stride_h, stride_w) = c; std::cout << "// " << c ; for(auto perf: bench_conv(context, stream, HALF, Z, H, W, CO, CI, R, S, pad_h, pad_w, stride_h, stride_w)) std::cout << ", " << perf << std::flush; std::cout << std::endl; } }