From 32562677e983bcf7208e32c5cd56ce994a9291c4 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Tue, 12 Feb 2019 19:36:16 -0500 Subject: [PATCH] [code generation] added barriers placement --- examples/matrix.cpp | 8 ++-- include/codegen/allocation.h | 1 + include/codegen/barriers.h | 50 ++++++++++++++++++++ include/codegen/shared_copy.h | 11 ++++- include/ir/builder.h | 2 + include/ir/instructions.h | 9 ++++ lib/codegen/barriers.cpp | 89 +++++++++++++++++++++++++++++++++++ lib/codegen/selection.cpp | 5 ++ lib/codegen/shared_copy.cpp | 9 ++-- lib/ir/builder.cpp | 4 ++ lib/ir/instructions.cpp | 8 ++++ 11 files changed, 188 insertions(+), 8 deletions(-) create mode 100644 include/codegen/barriers.h create mode 100644 lib/codegen/barriers.cpp diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 31e4a173e..938a4eddb 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -12,6 +12,7 @@ #include "codegen/liveness.h" #include "codegen/vectorize.h" #include "codegen/buffer_info.h" +#include "codegen/barriers.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" @@ -167,6 +168,7 @@ int main() { tdl::codegen::tune tune; tdl::codegen::liveness liveness(&buffer_info); tdl::codegen::allocation allocation(&liveness, &buffer_info); + tdl::codegen::barriers barriers(&allocation, &buffer_info); tdl::codegen::vectorize vectorize(&tune); tdl::codegen::selection selection(&allocation, &tune, &buffer_info); @@ -202,17 +204,18 @@ int main() { buffer_info.run(module); liveness.run(module); allocation.run(); + barriers.run(module); vectorize.run(module); selection.run(module, llvm_module); // llvm source llvm::legacy::PassManager manager; - manager.add(llvm::createPrintModulePass(llvm::outs())); +// manager.add(llvm::createPrintModulePass(llvm::outs())); manager.add(llvm::createVerifierPass(true)); manager.run(llvm_module); std::string src = generate_machine_code(llvm_module, "nvptx64-nvidia-cuda", compute_data_layout(true, true)); - std::cout << src << std::endl; +// std::cout << src << std::endl; // compile machine code CUdevice cu_device; @@ -222,7 +225,6 @@ int main() { CUstream cu_stream; int major, minor; compile_machine_code(cu_device, cu_context, cu_module, cu_kernel, cu_stream, major, minor, src, "test"); - std::cout << src << std::endl; // execute machine code // Allocate buffers diff --git a/include/codegen/allocation.h b/include/codegen/allocation.h index 5bd5e85a2..ad58ccea7 100644 --- a/include/codegen/allocation.h +++ b/include/codegen/allocation.h @@ -3,6 +3,7 @@ #include #include +#include namespace tdl{ diff --git a/include/codegen/barriers.h b/include/codegen/barriers.h new file mode 100644 index 000000000..9b476ae75 --- /dev/null +++ b/include/codegen/barriers.h @@ -0,0 +1,50 @@ +#ifndef TDL_INCLUDE_CODEGEN_BARRIERS_H +#define TDL_INCLUDE_CODEGEN_BARRIERS_H + +#include +#include +#include + +namespace tdl { + +namespace ir { + class module; + class basic_block; + class instruction; + class value; + class builder; +} + +namespace codegen{ + +class allocation; +class buffer_info_pass; + +class barriers { +private: + typedef std::pair interval_t; + typedef std::vector interval_vec_t; + +private: + void insert_barrier(ir::instruction *instr, ir::builder &builder); + bool intersect(const interval_vec_t &X, interval_t x); + bool intersect(const interval_vec_t &X, const interval_vec_t &Y); + void add_reference(ir::value *v, interval_vec_t &res); + void get_read_intervals(ir::instruction *i, interval_vec_t &res); + void get_written_intervals(ir::instruction *i, interval_vec_t &res); + void add(ir::basic_block *block, interval_vec_t ¬_synced, std::set &insert_pts); + +public: + barriers(allocation *alloc, buffer_info_pass *buffer_info): alloc_(alloc), buffer_info_(buffer_info) {} + void run(ir::module &mod); + +private: + allocation *alloc_; + buffer_info_pass *buffer_info_; +}; + + +} +} + +#endif diff --git a/include/codegen/shared_copy.h b/include/codegen/shared_copy.h index 8512ec358..927915805 100644 --- a/include/codegen/shared_copy.h +++ b/include/codegen/shared_copy.h @@ -1,19 +1,28 @@ #ifndef TDL_INCLUDE_CODEGEN_SHARED_COPY_H #define TDL_INCLUDE_CODEGEN_SHARED_COPY_H +#include +#include + namespace tdl { namespace ir { class module; class value; class builder; + class basic_block; } namespace codegen{ class place_shared_copy { private: - void add(ir::value *x, ir::builder &builder); + typedef std::pair interval_t; + typedef std::vector interval_vec_t; + +private: + bool intersect(const interval_vec_t &I, interval_t i); + void add_copies(ir::value *x, ir::builder &builder); public: void run(ir::module &mod); diff --git a/include/ir/builder.h b/include/ir/builder.h index f0b0d1aa3..d7e49cf14 100644 --- a/include/ir/builder.h +++ b/include/ir/builder.h @@ -119,6 +119,8 @@ public: // Intrinsics value *create_copy_to_shared(value *arg, const std::string &name = ""); value *create_vectorize(value *arg, const std::string &name = ""); + value *create_barrier(const std::string &name = ""); + private: context &ctx_; basic_block *block_; diff --git a/include/ir/instructions.h b/include/ir/instructions.h index 0745f62c0..08f472786 100644 --- a/include/ir/instructions.h +++ b/include/ir/instructions.h @@ -397,6 +397,15 @@ public: instruction *next = nullptr); }; +class barrier_inst: public instruction{ +private: + barrier_inst(context &ctx, const std::string &name, instruction *next); + +public: + static barrier_inst* create(context &ctx, const std::string &name = "", + instruction *next = nullptr); +}; + class vectorize_inst: public unary_inst{ using unary_inst::unary_inst; diff --git a/lib/codegen/barriers.cpp b/lib/codegen/barriers.cpp new file mode 100644 index 000000000..f21c1e1d6 --- /dev/null +++ b/lib/codegen/barriers.cpp @@ -0,0 +1,89 @@ +#include +#include "codegen/barriers.h" +#include "codegen/allocation.h" +#include "codegen/buffer_info.h" +#include "ir/module.h" +#include "ir/function.h" +#include "ir/basic_block.h" +#include "ir/instructions.h" + +namespace tdl { + +namespace codegen{ + +bool barriers::intersect(const interval_vec_t &X, interval_t x) { + return std::any_of(X.begin(), X.end(), [&](const interval_t &y){ + bool left_intersect = y.first <= x.first && x.first < y.second; + bool right_intersect = y.first <= x.second && x.second < y.second; + return left_intersect || right_intersect; + }); +} + +bool barriers::intersect(const interval_vec_t &X, const interval_vec_t &Y) { + return std::any_of(Y.begin(), Y.end(), [&](const interval_t &y){ + return intersect(X, y); + }); +} + +void barriers::add_reference(ir::value *v, interval_vec_t &res){ + if(buffer_info_->is_shared(v)){ + unsigned offset = alloc_->get_offset(v); + unsigned num_bytes = alloc_->get_num_bytes(v); + res.push_back(interval_t(offset, offset + num_bytes)); + } +} + +void barriers::get_read_intervals(ir::instruction *i, interval_vec_t &res){ + for(ir::value *op: i->ops()) + add_reference(op, res); +} + +void barriers::get_written_intervals(ir::instruction *i, interval_vec_t &res){ + if(!dynamic_cast(i)) + add_reference(i, res); +} + +void barriers::insert_barrier(ir::instruction *instr, ir::builder &builder) { + if(auto *phi = dynamic_cast(instr)) { + for(unsigned n = 0; n < phi->get_num_incoming(); n++){ + ir::basic_block *block = phi->get_incoming_block(n); + builder.set_insert_point(block->get_inst_list().back()); + builder.create_barrier(); + } + } + else{ + builder.set_insert_point(instr); + builder.create_barrier(); + } +} + +void barriers::add(ir::basic_block *block, interval_vec_t ¬_synced, std::set &insert_pts) { + for(ir::instruction *i: block->get_inst_list()){ + interval_vec_t read, written; + get_read_intervals(i, read); + get_written_intervals(i, written); + if(intersect(not_synced, read) + || intersect(not_synced, written)) { + not_synced.clear(); + insert_pts.insert(i); + } + std::copy(written.begin(), written.end(), std::back_inserter(not_synced)); + } +} + +void barriers::run(ir::module &mod) { + ir::builder &builder = mod.get_builder(); + for(ir::function *fn: mod.get_function_list()){ + // find barrier location + interval_vec_t not_synced; + std::set insert_pts; + for(ir::basic_block *block: fn->blocks()) + add(block, not_synced, insert_pts); + // insert barrier + for(ir::instruction *i: insert_pts) + insert_barrier(i, builder); + } +} + +} +} diff --git a/lib/codegen/selection.cpp b/lib/codegen/selection.cpp index b9857eb12..9ef405e06 100644 --- a/lib/codegen/selection.cpp +++ b/lib/codegen/selection.cpp @@ -211,6 +211,11 @@ Instruction *selection::llvm_inst(ir::instruction *inst, std::functionget_dest()); return builder.Insert(BranchInst::Create(dest)); } + if(dynamic_cast(inst)){ + Module *module = builder.GetInsertBlock()->getModule(); + Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::nvvm_barrier0); + return builder.CreateCall(barrier, {}); + } if(auto* ii = dynamic_cast(inst)){ Type *ty = type(ii->get_type()->get_scalar_ty()); unsigned num_ops = ii->get_num_operands(); diff --git a/lib/codegen/shared_copy.cpp b/lib/codegen/shared_copy.cpp index 08bac4f9a..07d6a5c29 100644 --- a/lib/codegen/shared_copy.cpp +++ b/lib/codegen/shared_copy.cpp @@ -1,3 +1,4 @@ +#include #include "codegen/shared_copy.h" #include "ir/module.h" #include "ir/function.h" @@ -8,10 +9,10 @@ namespace tdl { namespace codegen{ -void place_shared_copy::add(ir::value *x, ir::builder &builder) { +void place_shared_copy::add_copies(ir::value *x, ir::builder &builder) { if(auto *phi = dynamic_cast(x)) { for(auto *op: phi->ops()) - add(op, builder); + add_copies(op, builder); } else { if(auto *i = dynamic_cast(x)){ @@ -31,8 +32,8 @@ void place_shared_copy::run(ir::module &mod) { for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i: block->get_inst_list()) if(dynamic_cast(i)){ - add(i->get_operand(0), builder); - add(i->get_operand(1), builder); + add_copies(i->get_operand(0), builder); + add_copies(i->get_operand(1), builder); } } diff --git a/lib/ir/builder.cpp b/lib/ir/builder.cpp index 7422a47fc..cb5edd2b6 100644 --- a/lib/ir/builder.cpp +++ b/lib/ir/builder.cpp @@ -281,5 +281,9 @@ value *builder::create_vectorize(value *arg, const std::string &name) { return insert(vectorize_inst::create(arg, name)); } +value *builder::create_barrier(const std::string &name) { + return insert(barrier_inst::create(ctx_, name)); +} + } } diff --git a/lib/ir/instructions.cpp b/lib/ir/instructions.cpp index 522f4c029..f335bbeea 100644 --- a/lib/ir/instructions.cpp +++ b/lib/ir/instructions.cpp @@ -403,5 +403,13 @@ vectorize_inst* vectorize_inst::create(value *arg, const std::string &name, inst return new vectorize_inst(arg->get_type(), arg, name, next); } +barrier_inst::barrier_inst(context &ctx, const std::string &name, + instruction *next) + : instruction(type::get_void_ty(ctx), 0, name, next){ } + +barrier_inst* barrier_inst::create(context &ctx, const std::string &name, instruction *next) { + return new barrier_inst(ctx, name, next); +} + } }