[code generation] added barriers placement

This commit is contained in:
Philippe Tillet
2019-02-12 19:36:16 -05:00
parent 41aad4800c
commit 32562677e9
11 changed files with 188 additions and 8 deletions

View File

@@ -12,6 +12,7 @@
#include "codegen/liveness.h"
#include "codegen/vectorize.h"
#include "codegen/buffer_info.h"
#include "codegen/barriers.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/LLVMContext.h"
@@ -167,6 +168,7 @@ int main() {
tdl::codegen::tune tune;
tdl::codegen::liveness liveness(&buffer_info);
tdl::codegen::allocation allocation(&liveness, &buffer_info);
tdl::codegen::barriers barriers(&allocation, &buffer_info);
tdl::codegen::vectorize vectorize(&tune);
tdl::codegen::selection selection(&allocation, &tune, &buffer_info);
@@ -202,17 +204,18 @@ int main() {
buffer_info.run(module);
liveness.run(module);
allocation.run();
barriers.run(module);
vectorize.run(module);
selection.run(module, llvm_module);
// llvm source
llvm::legacy::PassManager manager;
manager.add(llvm::createPrintModulePass(llvm::outs()));
// manager.add(llvm::createPrintModulePass(llvm::outs()));
manager.add(llvm::createVerifierPass(true));
manager.run(llvm_module);
std::string src = generate_machine_code(llvm_module, "nvptx64-nvidia-cuda", compute_data_layout(true, true));
std::cout << src << std::endl;
// std::cout << src << std::endl;
// compile machine code
CUdevice cu_device;
@@ -222,7 +225,6 @@ int main() {
CUstream cu_stream;
int major, minor;
compile_machine_code(cu_device, cu_context, cu_module, cu_kernel, cu_stream, major, minor, src, "test");
std::cout << src << std::endl;
// execute machine code
// Allocate buffers

View File

@@ -3,6 +3,7 @@
#include <map>
#include <set>
#include <iostream>
namespace tdl{

View File

@@ -0,0 +1,50 @@
#ifndef TDL_INCLUDE_CODEGEN_BARRIERS_H
#define TDL_INCLUDE_CODEGEN_BARRIERS_H
#include <tuple>
#include <vector>
#include <set>
namespace tdl {
namespace ir {
class module;
class basic_block;
class instruction;
class value;
class builder;
}
namespace codegen{
class allocation;
class buffer_info_pass;
class barriers {
private:
typedef std::pair<unsigned, unsigned> interval_t;
typedef std::vector<interval_t> interval_vec_t;
private:
void insert_barrier(ir::instruction *instr, ir::builder &builder);
bool intersect(const interval_vec_t &X, interval_t x);
bool intersect(const interval_vec_t &X, const interval_vec_t &Y);
void add_reference(ir::value *v, interval_vec_t &res);
void get_read_intervals(ir::instruction *i, interval_vec_t &res);
void get_written_intervals(ir::instruction *i, interval_vec_t &res);
void add(ir::basic_block *block, interval_vec_t &not_synced, std::set<ir::instruction *> &insert_pts);
public:
barriers(allocation *alloc, buffer_info_pass *buffer_info): alloc_(alloc), buffer_info_(buffer_info) {}
void run(ir::module &mod);
private:
allocation *alloc_;
buffer_info_pass *buffer_info_;
};
}
}
#endif

View File

@@ -1,19 +1,28 @@
#ifndef TDL_INCLUDE_CODEGEN_SHARED_COPY_H
#define TDL_INCLUDE_CODEGEN_SHARED_COPY_H
#include <tuple>
#include <vector>
namespace tdl {
namespace ir {
class module;
class value;
class builder;
class basic_block;
}
namespace codegen{
class place_shared_copy {
private:
void add(ir::value *x, ir::builder &builder);
typedef std::pair<unsigned, unsigned> interval_t;
typedef std::vector<interval_t> interval_vec_t;
private:
bool intersect(const interval_vec_t &I, interval_t i);
void add_copies(ir::value *x, ir::builder &builder);
public:
void run(ir::module &mod);

View File

@@ -119,6 +119,8 @@ public:
// Intrinsics
value *create_copy_to_shared(value *arg, const std::string &name = "");
value *create_vectorize(value *arg, const std::string &name = "");
value *create_barrier(const std::string &name = "");
private:
context &ctx_;
basic_block *block_;

View File

@@ -397,6 +397,15 @@ public:
instruction *next = nullptr);
};
class barrier_inst: public instruction{
private:
barrier_inst(context &ctx, const std::string &name, instruction *next);
public:
static barrier_inst* create(context &ctx, const std::string &name = "",
instruction *next = nullptr);
};
class vectorize_inst: public unary_inst{
using unary_inst::unary_inst;

89
lib/codegen/barriers.cpp Normal file
View File

@@ -0,0 +1,89 @@
#include <algorithm>
#include "codegen/barriers.h"
#include "codegen/allocation.h"
#include "codegen/buffer_info.h"
#include "ir/module.h"
#include "ir/function.h"
#include "ir/basic_block.h"
#include "ir/instructions.h"
namespace tdl {
namespace codegen{
bool barriers::intersect(const interval_vec_t &X, interval_t x) {
return std::any_of(X.begin(), X.end(), [&](const interval_t &y){
bool left_intersect = y.first <= x.first && x.first < y.second;
bool right_intersect = y.first <= x.second && x.second < y.second;
return left_intersect || right_intersect;
});
}
bool barriers::intersect(const interval_vec_t &X, const interval_vec_t &Y) {
return std::any_of(Y.begin(), Y.end(), [&](const interval_t &y){
return intersect(X, y);
});
}
void barriers::add_reference(ir::value *v, interval_vec_t &res){
if(buffer_info_->is_shared(v)){
unsigned offset = alloc_->get_offset(v);
unsigned num_bytes = alloc_->get_num_bytes(v);
res.push_back(interval_t(offset, offset + num_bytes));
}
}
void barriers::get_read_intervals(ir::instruction *i, interval_vec_t &res){
for(ir::value *op: i->ops())
add_reference(op, res);
}
void barriers::get_written_intervals(ir::instruction *i, interval_vec_t &res){
if(!dynamic_cast<ir::phi_node*>(i))
add_reference(i, res);
}
void barriers::insert_barrier(ir::instruction *instr, ir::builder &builder) {
if(auto *phi = dynamic_cast<ir::phi_node*>(instr)) {
for(unsigned n = 0; n < phi->get_num_incoming(); n++){
ir::basic_block *block = phi->get_incoming_block(n);
builder.set_insert_point(block->get_inst_list().back());
builder.create_barrier();
}
}
else{
builder.set_insert_point(instr);
builder.create_barrier();
}
}
void barriers::add(ir::basic_block *block, interval_vec_t &not_synced, std::set<ir::instruction*> &insert_pts) {
for(ir::instruction *i: block->get_inst_list()){
interval_vec_t read, written;
get_read_intervals(i, read);
get_written_intervals(i, written);
if(intersect(not_synced, read)
|| intersect(not_synced, written)) {
not_synced.clear();
insert_pts.insert(i);
}
std::copy(written.begin(), written.end(), std::back_inserter(not_synced));
}
}
void barriers::run(ir::module &mod) {
ir::builder &builder = mod.get_builder();
for(ir::function *fn: mod.get_function_list()){
// find barrier location
interval_vec_t not_synced;
std::set<ir::instruction*> insert_pts;
for(ir::basic_block *block: fn->blocks())
add(block, not_synced, insert_pts);
// insert barrier
for(ir::instruction *i: insert_pts)
insert_barrier(i, builder);
}
}
}
}

View File

@@ -211,6 +211,11 @@ Instruction *selection::llvm_inst(ir::instruction *inst, std::function<Value*(ir
BasicBlock *dest = block(ii->get_dest());
return builder.Insert(BranchInst::Create(dest));
}
if(dynamic_cast<ir::barrier_inst*>(inst)){
Module *module = builder.GetInsertBlock()->getModule();
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::nvvm_barrier0);
return builder.CreateCall(barrier, {});
}
if(auto* ii = dynamic_cast<ir::phi_node*>(inst)){
Type *ty = type(ii->get_type()->get_scalar_ty());
unsigned num_ops = ii->get_num_operands();

View File

@@ -1,3 +1,4 @@
#include <algorithm>
#include "codegen/shared_copy.h"
#include "ir/module.h"
#include "ir/function.h"
@@ -8,10 +9,10 @@ namespace tdl {
namespace codegen{
void place_shared_copy::add(ir::value *x, ir::builder &builder) {
void place_shared_copy::add_copies(ir::value *x, ir::builder &builder) {
if(auto *phi = dynamic_cast<ir::phi_node*>(x)) {
for(auto *op: phi->ops())
add(op, builder);
add_copies(op, builder);
}
else {
if(auto *i = dynamic_cast<ir::instruction*>(x)){
@@ -31,8 +32,8 @@ void place_shared_copy::run(ir::module &mod) {
for(ir::basic_block *block: fn->blocks())
for(ir::instruction *i: block->get_inst_list())
if(dynamic_cast<ir::matmul_inst*>(i)){
add(i->get_operand(0), builder);
add(i->get_operand(1), builder);
add_copies(i->get_operand(0), builder);
add_copies(i->get_operand(1), builder);
}
}

View File

@@ -281,5 +281,9 @@ value *builder::create_vectorize(value *arg, const std::string &name) {
return insert(vectorize_inst::create(arg, name));
}
value *builder::create_barrier(const std::string &name) {
return insert(barrier_inst::create(ctx_, name));
}
}
}

View File

@@ -403,5 +403,13 @@ vectorize_inst* vectorize_inst::create(value *arg, const std::string &name, inst
return new vectorize_inst(arg->get_type(), arg, name, next);
}
barrier_inst::barrier_inst(context &ctx, const std::string &name,
instruction *next)
: instruction(type::get_void_ty(ctx), 0, name, next){ }
barrier_inst* barrier_inst::create(context &ctx, const std::string &name, instruction *next) {
return new barrier_inst(ctx, name, next);
}
}
}