[code generation] added barriers placement
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
#include "codegen/liveness.h"
|
||||
#include "codegen/vectorize.h"
|
||||
#include "codegen/buffer_info.h"
|
||||
#include "codegen/barriers.h"
|
||||
#include "llvm/IR/IRPrintingPasses.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
@@ -167,6 +168,7 @@ int main() {
|
||||
tdl::codegen::tune tune;
|
||||
tdl::codegen::liveness liveness(&buffer_info);
|
||||
tdl::codegen::allocation allocation(&liveness, &buffer_info);
|
||||
tdl::codegen::barriers barriers(&allocation, &buffer_info);
|
||||
tdl::codegen::vectorize vectorize(&tune);
|
||||
tdl::codegen::selection selection(&allocation, &tune, &buffer_info);
|
||||
|
||||
@@ -202,17 +204,18 @@ int main() {
|
||||
buffer_info.run(module);
|
||||
liveness.run(module);
|
||||
allocation.run();
|
||||
barriers.run(module);
|
||||
vectorize.run(module);
|
||||
selection.run(module, llvm_module);
|
||||
|
||||
// llvm source
|
||||
llvm::legacy::PassManager manager;
|
||||
manager.add(llvm::createPrintModulePass(llvm::outs()));
|
||||
// manager.add(llvm::createPrintModulePass(llvm::outs()));
|
||||
manager.add(llvm::createVerifierPass(true));
|
||||
manager.run(llvm_module);
|
||||
|
||||
std::string src = generate_machine_code(llvm_module, "nvptx64-nvidia-cuda", compute_data_layout(true, true));
|
||||
std::cout << src << std::endl;
|
||||
// std::cout << src << std::endl;
|
||||
|
||||
// compile machine code
|
||||
CUdevice cu_device;
|
||||
@@ -222,7 +225,6 @@ int main() {
|
||||
CUstream cu_stream;
|
||||
int major, minor;
|
||||
compile_machine_code(cu_device, cu_context, cu_module, cu_kernel, cu_stream, major, minor, src, "test");
|
||||
std::cout << src << std::endl;
|
||||
|
||||
// execute machine code
|
||||
// Allocate buffers
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
|
||||
namespace tdl{
|
||||
|
||||
|
50
include/codegen/barriers.h
Normal file
50
include/codegen/barriers.h
Normal file
@@ -0,0 +1,50 @@
|
||||
#ifndef TDL_INCLUDE_CODEGEN_BARRIERS_H
|
||||
#define TDL_INCLUDE_CODEGEN_BARRIERS_H
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
namespace tdl {
|
||||
|
||||
namespace ir {
|
||||
class module;
|
||||
class basic_block;
|
||||
class instruction;
|
||||
class value;
|
||||
class builder;
|
||||
}
|
||||
|
||||
namespace codegen{
|
||||
|
||||
class allocation;
|
||||
class buffer_info_pass;
|
||||
|
||||
class barriers {
|
||||
private:
|
||||
typedef std::pair<unsigned, unsigned> interval_t;
|
||||
typedef std::vector<interval_t> interval_vec_t;
|
||||
|
||||
private:
|
||||
void insert_barrier(ir::instruction *instr, ir::builder &builder);
|
||||
bool intersect(const interval_vec_t &X, interval_t x);
|
||||
bool intersect(const interval_vec_t &X, const interval_vec_t &Y);
|
||||
void add_reference(ir::value *v, interval_vec_t &res);
|
||||
void get_read_intervals(ir::instruction *i, interval_vec_t &res);
|
||||
void get_written_intervals(ir::instruction *i, interval_vec_t &res);
|
||||
void add(ir::basic_block *block, interval_vec_t ¬_synced, std::set<ir::instruction *> &insert_pts);
|
||||
|
||||
public:
|
||||
barriers(allocation *alloc, buffer_info_pass *buffer_info): alloc_(alloc), buffer_info_(buffer_info) {}
|
||||
void run(ir::module &mod);
|
||||
|
||||
private:
|
||||
allocation *alloc_;
|
||||
buffer_info_pass *buffer_info_;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,19 +1,28 @@
|
||||
#ifndef TDL_INCLUDE_CODEGEN_SHARED_COPY_H
|
||||
#define TDL_INCLUDE_CODEGEN_SHARED_COPY_H
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
namespace tdl {
|
||||
|
||||
namespace ir {
|
||||
class module;
|
||||
class value;
|
||||
class builder;
|
||||
class basic_block;
|
||||
}
|
||||
|
||||
namespace codegen{
|
||||
|
||||
class place_shared_copy {
|
||||
private:
|
||||
void add(ir::value *x, ir::builder &builder);
|
||||
typedef std::pair<unsigned, unsigned> interval_t;
|
||||
typedef std::vector<interval_t> interval_vec_t;
|
||||
|
||||
private:
|
||||
bool intersect(const interval_vec_t &I, interval_t i);
|
||||
void add_copies(ir::value *x, ir::builder &builder);
|
||||
|
||||
public:
|
||||
void run(ir::module &mod);
|
||||
|
@@ -119,6 +119,8 @@ public:
|
||||
// Intrinsics
|
||||
value *create_copy_to_shared(value *arg, const std::string &name = "");
|
||||
value *create_vectorize(value *arg, const std::string &name = "");
|
||||
value *create_barrier(const std::string &name = "");
|
||||
|
||||
private:
|
||||
context &ctx_;
|
||||
basic_block *block_;
|
||||
|
@@ -397,6 +397,15 @@ public:
|
||||
instruction *next = nullptr);
|
||||
};
|
||||
|
||||
class barrier_inst: public instruction{
|
||||
private:
|
||||
barrier_inst(context &ctx, const std::string &name, instruction *next);
|
||||
|
||||
public:
|
||||
static barrier_inst* create(context &ctx, const std::string &name = "",
|
||||
instruction *next = nullptr);
|
||||
};
|
||||
|
||||
class vectorize_inst: public unary_inst{
|
||||
using unary_inst::unary_inst;
|
||||
|
||||
|
89
lib/codegen/barriers.cpp
Normal file
89
lib/codegen/barriers.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
#include <algorithm>
|
||||
#include "codegen/barriers.h"
|
||||
#include "codegen/allocation.h"
|
||||
#include "codegen/buffer_info.h"
|
||||
#include "ir/module.h"
|
||||
#include "ir/function.h"
|
||||
#include "ir/basic_block.h"
|
||||
#include "ir/instructions.h"
|
||||
|
||||
namespace tdl {
|
||||
|
||||
namespace codegen{
|
||||
|
||||
bool barriers::intersect(const interval_vec_t &X, interval_t x) {
|
||||
return std::any_of(X.begin(), X.end(), [&](const interval_t &y){
|
||||
bool left_intersect = y.first <= x.first && x.first < y.second;
|
||||
bool right_intersect = y.first <= x.second && x.second < y.second;
|
||||
return left_intersect || right_intersect;
|
||||
});
|
||||
}
|
||||
|
||||
bool barriers::intersect(const interval_vec_t &X, const interval_vec_t &Y) {
|
||||
return std::any_of(Y.begin(), Y.end(), [&](const interval_t &y){
|
||||
return intersect(X, y);
|
||||
});
|
||||
}
|
||||
|
||||
void barriers::add_reference(ir::value *v, interval_vec_t &res){
|
||||
if(buffer_info_->is_shared(v)){
|
||||
unsigned offset = alloc_->get_offset(v);
|
||||
unsigned num_bytes = alloc_->get_num_bytes(v);
|
||||
res.push_back(interval_t(offset, offset + num_bytes));
|
||||
}
|
||||
}
|
||||
|
||||
void barriers::get_read_intervals(ir::instruction *i, interval_vec_t &res){
|
||||
for(ir::value *op: i->ops())
|
||||
add_reference(op, res);
|
||||
}
|
||||
|
||||
void barriers::get_written_intervals(ir::instruction *i, interval_vec_t &res){
|
||||
if(!dynamic_cast<ir::phi_node*>(i))
|
||||
add_reference(i, res);
|
||||
}
|
||||
|
||||
void barriers::insert_barrier(ir::instruction *instr, ir::builder &builder) {
|
||||
if(auto *phi = dynamic_cast<ir::phi_node*>(instr)) {
|
||||
for(unsigned n = 0; n < phi->get_num_incoming(); n++){
|
||||
ir::basic_block *block = phi->get_incoming_block(n);
|
||||
builder.set_insert_point(block->get_inst_list().back());
|
||||
builder.create_barrier();
|
||||
}
|
||||
}
|
||||
else{
|
||||
builder.set_insert_point(instr);
|
||||
builder.create_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void barriers::add(ir::basic_block *block, interval_vec_t ¬_synced, std::set<ir::instruction*> &insert_pts) {
|
||||
for(ir::instruction *i: block->get_inst_list()){
|
||||
interval_vec_t read, written;
|
||||
get_read_intervals(i, read);
|
||||
get_written_intervals(i, written);
|
||||
if(intersect(not_synced, read)
|
||||
|| intersect(not_synced, written)) {
|
||||
not_synced.clear();
|
||||
insert_pts.insert(i);
|
||||
}
|
||||
std::copy(written.begin(), written.end(), std::back_inserter(not_synced));
|
||||
}
|
||||
}
|
||||
|
||||
void barriers::run(ir::module &mod) {
|
||||
ir::builder &builder = mod.get_builder();
|
||||
for(ir::function *fn: mod.get_function_list()){
|
||||
// find barrier location
|
||||
interval_vec_t not_synced;
|
||||
std::set<ir::instruction*> insert_pts;
|
||||
for(ir::basic_block *block: fn->blocks())
|
||||
add(block, not_synced, insert_pts);
|
||||
// insert barrier
|
||||
for(ir::instruction *i: insert_pts)
|
||||
insert_barrier(i, builder);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@@ -211,6 +211,11 @@ Instruction *selection::llvm_inst(ir::instruction *inst, std::function<Value*(ir
|
||||
BasicBlock *dest = block(ii->get_dest());
|
||||
return builder.Insert(BranchInst::Create(dest));
|
||||
}
|
||||
if(dynamic_cast<ir::barrier_inst*>(inst)){
|
||||
Module *module = builder.GetInsertBlock()->getModule();
|
||||
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::nvvm_barrier0);
|
||||
return builder.CreateCall(barrier, {});
|
||||
}
|
||||
if(auto* ii = dynamic_cast<ir::phi_node*>(inst)){
|
||||
Type *ty = type(ii->get_type()->get_scalar_ty());
|
||||
unsigned num_ops = ii->get_num_operands();
|
||||
|
@@ -1,3 +1,4 @@
|
||||
#include <algorithm>
|
||||
#include "codegen/shared_copy.h"
|
||||
#include "ir/module.h"
|
||||
#include "ir/function.h"
|
||||
@@ -8,10 +9,10 @@ namespace tdl {
|
||||
|
||||
namespace codegen{
|
||||
|
||||
void place_shared_copy::add(ir::value *x, ir::builder &builder) {
|
||||
void place_shared_copy::add_copies(ir::value *x, ir::builder &builder) {
|
||||
if(auto *phi = dynamic_cast<ir::phi_node*>(x)) {
|
||||
for(auto *op: phi->ops())
|
||||
add(op, builder);
|
||||
add_copies(op, builder);
|
||||
}
|
||||
else {
|
||||
if(auto *i = dynamic_cast<ir::instruction*>(x)){
|
||||
@@ -31,8 +32,8 @@ void place_shared_copy::run(ir::module &mod) {
|
||||
for(ir::basic_block *block: fn->blocks())
|
||||
for(ir::instruction *i: block->get_inst_list())
|
||||
if(dynamic_cast<ir::matmul_inst*>(i)){
|
||||
add(i->get_operand(0), builder);
|
||||
add(i->get_operand(1), builder);
|
||||
add_copies(i->get_operand(0), builder);
|
||||
add_copies(i->get_operand(1), builder);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -281,5 +281,9 @@ value *builder::create_vectorize(value *arg, const std::string &name) {
|
||||
return insert(vectorize_inst::create(arg, name));
|
||||
}
|
||||
|
||||
value *builder::create_barrier(const std::string &name) {
|
||||
return insert(barrier_inst::create(ctx_, name));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -403,5 +403,13 @@ vectorize_inst* vectorize_inst::create(value *arg, const std::string &name, inst
|
||||
return new vectorize_inst(arg->get_type(), arg, name, next);
|
||||
}
|
||||
|
||||
barrier_inst::barrier_inst(context &ctx, const std::string &name,
|
||||
instruction *next)
|
||||
: instruction(type::get_void_ty(ctx), 0, name, next){ }
|
||||
|
||||
barrier_inst* barrier_inst::create(context &ctx, const std::string &name, instruction *next) {
|
||||
return new barrier_inst(ctx, name, next);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user