triton/examples/cpp/conv.cpp

#include <cstring>
#include <cstdio>
#include "common.hpp"
#include "triton/runtime/jit.h"
#include "triton/driver/backend.h"
#include "triton/driver/stream.h"
#include "triton/dnn/conv.h"

int main() {
  // initialize default compute device
  auto context = triton::driver::backend::contexts::get_default();
  triton::jit jit(context);
  triton::dnn::conv::type ty = triton::dnn::conv::FPROP;
  // initialization
  int32_t B = 4, NF = 32;
  int32_t D = 1, H = 56, W = 56;
  int32_t NC = 32, T = 1, R = 3, S = 3;
  int32_t pad_d = 0, pad_h = 1, pad_w = 1;
  triton::dnn::conv configuration(B, NC, D, H, W, T, R, S, NF, 1, 1, 1, pad_d, pad_h, pad_w, ty);
  // convolution configuration
  std::vector<float> hc(configuration.c_size());
  std::vector<float> rc(configuration.c_size());
  std::vector<float> ha(configuration.a_size());
  std::vector<float> hb(configuration.b_size());
  srand(0);
  for(size_t i = 0; i < ha.size(); i++)
    ha[i] = (float)rand()/RAND_MAX;
  for(size_t i = 0; i < hb.size(); i++)
    hb[i] = (float)rand()/RAND_MAX;
  for(size_t i = 0; i < hc.size(); i++)
    hc[i] = 0;
  rc = hc;
  triton::driver::buffer* dc = triton::driver::buffer::create(context, hc.size()*4);
  triton::driver::buffer* da = triton::driver::buffer::create(context, ha.size()*4);
  triton::driver::buffer* db = triton::driver::buffer::create(context, hb.size()*4);
  triton::driver::stream* stream = triton::driver::stream::create(context);
  stream->write(da, true, 0, ha);
  stream->write(db, true, 0, hb);
  stream->write(dc, true, 0, hc);
  stream->synchronize();
  // benchmark a given convolution kernel
  auto benchmark = [&](triton::driver::kernel* kernel,
                       triton::jit::launch_information info) {
    unsigned TM = info.global_range_size[0];
    unsigned TN = info.global_range_size[1];
    unsigned nthreads = info.num_threads;
    std::array<size_t, 3> grid = configuration.get_grid(TM, TN);
    configuration.init(stream, (triton::driver::cu_module*)kernel->module());
    stream->synchronize();
    configuration.set_arg(kernel, da, db, dc);
    stream->enqueue(kernel, grid, {nthreads, 1, 1});
    stream->synchronize();
    double ts = bench([&](){stream->enqueue(kernel, grid, {nthreads, 1, 1});},
                      [&](){ stream->synchronize(); }, *context->device());
    return configuration.get_nflops() / ts * 1e-3;
  };
  std::string src = configuration.src();
//  jit.autotune("conv", src.c_str(), benchmark);
  jit.add_module("conv", src.c_str(), configuration.default_params());
  triton::driver::kernel* kernel = jit.get_function("conv");
  triton::jit::launch_information info = jit.get_launch_info("conv");
  std::cout << "Performance: " << benchmark(kernel, info) << " TFLOPS " << std::endl;
  stream->read(dc, true, 0, hc);
  configuration.cpu_ref(rc.data(), ha.data(), hb.data());
  for(size_t i = 0; i < hc.size(); i++){
    if(std::isnan(hc[i]) || std::abs(hc[i] - rc[i])/std::max(hc[i], rc[i]) > 1e-4){
      std::cout << i << " " << hc[i] << " " << rc[i] << std::endl;
      exit(EXIT_FAILURE);
    }
  }
  std::cout << "Pass!" << std::endl;
}
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`#include <cstring>`
			`#include <cstdio>`
			`#include "common.hpp"`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`#include "triton/runtime/jit.h"`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`#include "triton/driver/backend.h"`
			`#include "triton/driver/stream.h"`
[general] creation of dnn module for gemm/conv triton routines 2019-05-06 17:47:06 -04:00			`#include "triton/dnn/conv.h"`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00
			`int main() {`
			`// initialize default compute device`
			`auto context = triton::driver::backend::contexts::get_default();`
			`triton::jit jit(context);`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`triton::dnn::conv::type ty = triton::dnn::conv::FPROP;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`// initialization`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`int32_t B = 4, NF = 32;`
[dnn/conv] fixed formatting of generated Triton-C code 2019-05-16 15:48:02 -04:00			`int32_t D = 1, H = 56, W = 56;`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`int32_t NC = 32, T = 1, R = 3, S = 3;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`int32_t pad_d = 0, pad_h = 1, pad_w = 1;`
[dnn/conv] added triton-c code for wgrad 2019-05-11 18:09:23 -04:00			`triton::dnn::conv configuration(B, NC, D, H, W, T, R, S, NF, 1, 1, 1, pad_d, pad_h, pad_w, ty);`
more cleaning of conv 2019-05-06 19:30:22 -04:00			`// convolution configuration`
[examples/conv] now deferring shape computations to conv configuration 2019-05-08 13:58:25 -04:00			`std::vector<float> hc(configuration.c_size());`
			`std::vector<float> rc(configuration.c_size());`
			`std::vector<float> ha(configuration.a_size());`
			`std::vector<float> hb(configuration.b_size());`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`srand(0);`
			`for(size_t i = 0; i < ha.size(); i++)`
[general] creation of dnn module for gemm/conv triton routines 2019-05-06 17:47:06 -04:00			`ha[i] = (float)rand()/RAND_MAX;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`for(size_t i = 0; i < hb.size(); i++)`
[general] creation of dnn module for gemm/conv triton routines 2019-05-06 17:47:06 -04:00			`hb[i] = (float)rand()/RAND_MAX;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`for(size_t i = 0; i < hc.size(); i++)`
[examples/conv] now deferring shape computations to conv configuration 2019-05-08 13:58:25 -04:00			`hc[i] = 0;`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`rc = hc;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`triton::driver::buffer* dc = triton::driver::buffer::create(context, hc.size()*4);`
			`triton::driver::buffer* da = triton::driver::buffer::create(context, ha.size()*4);`
			`triton::driver::buffer* db = triton::driver::buffer::create(context, hb.size()*4);`
			`triton::driver::stream* stream = triton::driver::stream::create(context);`
			`stream->write(da, true, 0, ha);`
			`stream->write(db, true, 0, hb);`
			`stream->write(dc, true, 0, hc);`
			`stream->synchronize();`
			`// benchmark a given convolution kernel`
			`auto benchmark = [&](triton::driver::kernel* kernel,`
			`triton::jit::launch_information info) {`
			`unsigned TM = info.global_range_size[0];`
			`unsigned TN = info.global_range_size[1];`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`unsigned nthreads = info.num_threads;`
			`std::array<size_t, 3> grid = configuration.get_grid(TM, TN);`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`configuration.init(stream, (triton::driver::cu_module*)kernel->module());`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`stream->synchronize();`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`configuration.set_arg(kernel, da, db, dc);`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`stream->enqueue(kernel, grid, {nthreads, 1, 1});`
			`stream->synchronize();`
			`double ts = bench([&](){stream->enqueue(kernel, grid, {nthreads, 1, 1});},`
			`[&](){ stream->synchronize(); }, *context->device());`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`return configuration.get_nflops() / ts * 1e-3;`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`};`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`std::string src = configuration.src();`
[triton/python/conv]: Added cache for compiled kernels 2019-05-18 11:51:49 -04:00			`// jit.autotune("conv", src.c_str(), benchmark);`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`jit.add_module("conv", src.c_str(), configuration.default_params());`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`triton::driver::kernel* kernel = jit.get_function("conv");`
			`triton::jit::launch_information info = jit.get_launch_info("conv");`
			`std::cout << "Performance: " << benchmark(kernel, info) << " TFLOPS " << std::endl;`
			`stream->read(dc, true, 0, hc);`
[dnn/conv] some minor fixes 2019-05-08 10:09:30 -04:00			`configuration.cpu_ref(rc.data(), ha.data(), hb.data());`
[dnn/conv] added triton-c code for wgrad 2019-05-11 18:09:23 -04:00			`for(size_t i = 0; i < hc.size(); i++){`
[dnn/conv]: now using look-up table for wgrad computation as well 2019-05-15 14:57:31 -04:00			`if(std::isnan(hc[i]) \|\| std::abs(hc[i] - rc[i])/std::max(hc[i], rc[i]) > 1e-4){`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`std::cout << i << " " << hc[i] << " " << rc[i] << std::endl;`
			`exit(EXIT_FAILURE);`
[dnn] added Triton-C derivative computations in conv 2019-05-13 00:38:26 -04:00			`}`
[dnn/conv] added triton-c code for wgrad 2019-05-11 18:09:23 -04:00			`}`
[general] major overhaul of triton-c/triton-ir/triton-jit: - Added alloc const - Added atomics - Pruning tuning space - Added example for dot/conv/shift - Bugfixes 2019-04-25 16:17:36 -04:00			`std::cout << "Pass!" << std::endl;`
			`}`