Improvements w/ Auto-Tuning and standard benchmarks (#57)

[PYTHON] Bug-fixes in the auto-tuning module and improvement of the existing API for it
This commit is contained in:
Philippe Tillet
2021-02-03 13:37:21 -08:00
committed by Philippe Tillet
parent ad005d49ac
commit 6fb4800f57
12 changed files with 215 additions and 149 deletions

View File

@@ -224,7 +224,7 @@ void kernel::operator()(void *args, size_t args_size, driver::stream *stream, co
for(size_t i = 0; i < 3; i++)
grid[i] = (i < _grid.size()) ? _grid[i] : 1;
// enqueue
stream->enqueue(&*ker_, grid, {opt.num_warps * 32, 1, 1}, args, args_size);
stream->enqueue(&*ker_, grid, {(size_t)opt.num_warps * 32, 1, 1}, args, args_size);
}
std::string kernel::get_asm(asm_mode_t mode) {
@@ -282,35 +282,35 @@ void function::do_loop_nest(std::vector<size_t> const & ranges,
return;
values[i--] = 0;
}
i = D - 1;
i = D - 1; options_t opt;
}
}
void function::init_kernels(const std::string& src, const options_space_t& opts, driver::device *device) {
// all ranges
std::vector<size_t> ranges;
ranges.push_back(opts.num_warps.size());
for(const auto& x: opts.defines)
ranges.push_back(x.second.size());
// functor for source with given option
void function::init_kernels(const std::string& src, const options_t& opt,
const autotune_vals_t& confs, driver::device *device) {
// list of all possible configs
// just augment `opt` with each define of `confs`
// and override warp count
size_t num_opts = std::max(confs.size(), (size_t)1);
std::vector<options_t> opts(num_opts, opt);
for(size_t i = 0; i < confs.size(); i++){
opts[i].defines.insert(confs[i].first.begin(), confs[i].first.end());
opts[i].num_warps = confs[i].second;
}
// compile all possible configs
// compilation errors (e.g., too much shared mem)
// will populate `err`
std::vector<std::pair<options_t, std::string>> err;
auto do_make = [&](std::vector<size_t> params) {
// compilation options
unsigned i = 0;
options_t opt;
opt.num_warps = opts.num_warps[params[i++]];
for(auto D: opts.defines)
opt.defines[D.first] = D.second[params[i++]];
// compile
for(const options_t& opt: opts) {
try{
kernels_.push_back({opt, std::make_shared<kernel>(src, opt, device)});
}catch(const exception::base& e){
err.push_back({opt, e.what()});
}
};
// multi-threaded compilation
do_loop_nest(ranges, do_make);
}
// throw an exception if `err` is not empty
if(kernels_.empty()){
std::ostringstream dbg;
dbg << "Auto-Tuner could not find any valid configuration:" << std::endl;
@@ -357,9 +357,11 @@ kernel* function::autotune(void* args, size_t args_size, const grid_fn_ty& grid_
return it->second;
}
function::function(const std::string& src, const options_space_t& opt,
driver::device *device, const std::vector<std::string>& autotune_key) {
init_kernels(src, opt, device);
function::function(const std::string& src, const options_t &opt, driver::device *device,
const autotune_vals_t& autotune_vals, const std::vector<std::string>& autotune_key) {
// pre-compile all kernels
init_kernels(src, opt, autotune_vals, device);
// find indices of autotune keys
auto arg_names = kernels_.at(0).second->get_arg_names();
for(const std::string& name: autotune_key){
auto it = std::find(arg_names.begin(), arg_names.end(), name);