[auto-tuning] now not compiling kernels that use too much shared memory

2019-09-05 21:03:09 -04:00
parent 1f8fd525b5
commit b79bcbaee8
1 changed files with 6 additions and 2 deletions
--- a/lib/runtime/function.cc
+++ b/lib/runtime/function.cc
@@ -169,6 +169,9 @@ function::caller function::autotune(driver::stream* stream, const grid_fn_ty& gr
    }catch(const std::runtime_error& e) {
      return;
    }
+    // kernel uses too much resources
+    if(!bin)
+      return;
    // benchmark
    ir::function *tmp = ir->get_function_list()[0];
    caller call(tmp, std::move(bin), opt);
@@ -201,7 +204,6 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module, driver::c
  codegen::transform::peephole peephole;
  codegen::transform::reassociate reassociate(&alignment_info, &grids);
  codegen::selection selection(&shmem_allocation, &grids, &shmem_info, &alignment_info, target.get());
-
  // run passes
  peephole.run(module);
  dce.run(module);
@@ -210,11 +212,12 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module, driver::c
  reassociate.run(module);
  dce.run(module);
  peephole.run(module);
-
  if(target->is_gpu()){
    shmem_info.run(module);
    shmem_liveness.run(module);
    shmem_allocation.run();
+    if(shmem_allocation.get_allocated_size() > context->device()->max_shared_memory())
+      return std::unique_ptr<driver::module>();
    shmem_barriers.run(module);
  }
  dce.run(module);
@@ -226,6 +229,7 @@ std::unique_ptr<driver::module> function::make_bin(ir::module &module, driver::c
  selection.run(module, *llvm);
  // return binary
  std::unique_ptr<driver::module> res(driver::module::create(context, std::move(llvm)));
+  // done
  return res;
 }