[code generation] now vectorizing shared memory stores

This commit is contained in:
Philippe Tillet
2019-02-10 21:59:41 -05:00
parent 8ab5ca3de3
commit b2e487491f
8 changed files with 64 additions and 85 deletions

View File

@@ -9,6 +9,7 @@
#include "codegen/shared_copy.h"
#include "codegen/allocation.h"
#include "codegen/liveness.h"
#include "codegen/vectorize.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/LLVMContext.h"
@@ -160,6 +161,7 @@ int main() {
tdl::codegen::tune tune;
tdl::codegen::liveness liveness;
tdl::codegen::allocation allocation(&liveness);
tdl::codegen::vectorize vectorize(&tune);
tdl::codegen::selection selection(&allocation, &tune);
// tuning parameters
@@ -194,6 +196,7 @@ int main() {
shared.run(module);
liveness.run(module);
allocation.run();
vectorize.run(module);
selection.run(module, llvm_module);
// llvm source

View File

@@ -68,42 +68,25 @@ class distributed_tile: public tile{
private:
void init_indices();
public:
distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes);
virtual void for_each(std::function<void(indices_t)> fn) = 0;
protected:
axes_t axes_;
indices_map_t indices_;
values_t values_;
};
class serialized_distributed_tile: public distributed_tile {
public:
using distributed_tile::distributed_tile;
public:
void set_value(indices_t, llvm::Value *);
llvm::Value* get_value(indices_t idx);
void for_each(std::function<void(indices_t)> fn);
};
class vectorized_distributed_tile: public distributed_tile {
private:
llvm::Type *make_vector_ty(llvm::Type *ty, size_t vector_size);
public:
vectorized_distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes, llvm::IRBuilder<> &builder);
void set_value(indices_t, llvm::Value *);
distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes, llvm::IRBuilder<> &builder, bool vectorize);
void set_value(indices_t idx, llvm::Value *v);
llvm::Value* get_value(indices_t idx);
unsigned get_linear_index(indices_t idx);
void for_each(std::function<void(indices_t)> fn);
const distributed_axis &axis(unsigned dim) { return axes_.at(dim); }
private:
llvm::IRBuilder<> &builder_;
axes_t axes_;
indices_map_t indices_;
values_t values_;
size_t vector_size_;
llvm::IRBuilder<> &builder_;
};
class selection{
typedef std::map<ir::value *, llvm::Value *> vmap_t;
typedef std::map<ir::value *, tile *> tmap_t;

View File

@@ -32,6 +32,7 @@ public:
std::vector<unsigned *> get_params(ir::module& mod);
std::map<std::string, unsigned *> get_params(ir::instruction* i);
unsigned *get_param(ir::value *value, const std::string &key) { return params_[value][key]; }
void copy(ir::value *dst, ir::value *src) { params_[dst] = params_[src]; }
bool check_constraints(ir::module &fn, std::map<ir::value *, std::vector<std::string>> &errors);
void run(ir::module &mod);

View File

@@ -118,6 +118,7 @@ public:
value *create_matmul(value *A, value *B, value *C, const std::string &name = "");
// Intrinsics
value *create_copy_to_shared(value *arg, const std::string &name = "");
value *create_vectorize(value *arg, const std::string &name = "");
private:
context &ctx_;
basic_block *block_;

View File

@@ -397,6 +397,12 @@ public:
instruction *next = nullptr);
};
class vectorize_inst: public unary_inst{
using unary_inst::unary_inst;
public:
static vectorize_inst* create(value *arg, const std::string &name = "", instruction *next = nullptr);
};
}
}

View File

@@ -34,79 +34,36 @@ void distributed_tile::init_indices() {
}
}
distributed_tile::distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes)
: tile(ty, shapes), axes_(axes) {
init_indices();
for(size_t i = 0; i < indices_.size(); i++)
values_.push_back(UndefValue::get(ty_));
}
/* Serialized distributed tile */
void serialized_distributed_tile::set_value(indices_t idx, Value *v) {
values_[indices_[idx]] = v;
}
void serialized_distributed_tile::get_value(indices_t idx) {
return values_[indices_[idx]];
}
void serialized_distributed_tile::for_each(std::function<void (indices_t)> fn) {
for(auto &idx: indices_)
fn(idx.first);
}
/* Vectorized distributed tile */
llvm::Type *vectorized_distributed_tile::make_vector_ty(llvm::Type *ty, size_t vector_size) {
llvm::Type *distributed_tile::make_vector_ty(llvm::Type *ty, size_t vector_size) {
if(vector_size == 1)
return ty;
return VectorType::get(ty, vector_size);
}
vectorized_distributed_tile::vectorized_distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes, llvm::IRBuilder<> &builder)
: distributed_tile(make_vector_ty(ty, axes[0].contiguous), shapes), axes_(axes), builder_(builder) {
vector_size_ = 1;
if(ty_->isVectorTy())
vector_size_ = ty_->getVectorNumElements();
distributed_tile::distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes, llvm::IRBuilder<> &builder, bool vectorize)
: tile(make_vector_ty(ty, vectorize?axes[0].contiguous:1), shapes), axes_(axes), builder_(builder) {
vector_size_ = vectorize?ty_->getVectorNumElements():1;
init_indices();
for(size_t i = 0; i < indices_.size(); i++)
values_.push_back(UndefValue::get(ty_));
}
void distributed_tile::set_value(indices_t idx, Value *v) {
unsigned value_idx = indices_[idx];
Value *&result = values_[value_idx/vector_size_*vector_size_];
if(v->getType() == result->getType()) {
assert(value_idx % vector_size_ == 0);
result = v;
}
// insert scalar in vector
else {
std::cout << v->getType()->getScalarType()->getTypeID() << " " << result->getType()->getScalarType()->getTypeID() << std::endl;
assert(vector_size_==1 || result->getType()->isVectorTy());
assert(v->getType()->getScalarType() == result->getType()->getScalarType());
result = builder_.CreateInsertElement(result, v, value_idx % vector_size_);
}
values_[indices_[idx]] = v;
}
Value* distributed_tile::get_value(indices_t idx) {
unsigned value_idx = indices_[idx];
Value *&result = values_[value_idx/vector_size_*vector_size_];
if(vectorize_ || vector_size_ == 1) {
assert(value_idx % vector_size_ == 0);
return result;
}
// extract scalar from vector
else {
assert(result->getType()->isVectorTy());
return builder_.CreateExtractElement(result, value_idx % vector_size_);
}
return result;
return values_[indices_[idx]];
}
unsigned distributed_tile::get_linear_index(indices_t idx) {
return indices_[idx];
}
void distributed_tile::for_each(std::function<void (indices_t)> fn) {
for(auto &idx: indices_) {
if(!vectorize_ || (idx.second % vector_size_ == 0))
for(auto &idx: indices_)
if(idx.second % vector_size_ == 0)
fn(idx.first);
}
}
/* Shared Tile */
@@ -444,7 +401,7 @@ void selection::create_tile(ir::value *v, IRBuilder<> &builder,
axes[d].values = {builder.getInt32(0)};
}
}
bool vectorize = dynamic_cast<ir::load_inst*>(v);
bool vectorize = dynamic_cast<ir::vectorize_inst*>(v);
distributed_tile *T = new distributed_tile(ty, shapes, axes, builder, vectorize);
tmap_.insert({v, T});
// constant range
@@ -548,6 +505,26 @@ void selection::lower_tile_instruction(ir::instruction *ins, llvm::IRBuilder<> &
result->set_value(out_idx, in_tile->get_value(in_idx));
});
}
// vectorize
else if(dynamic_cast<ir::vectorize_inst*>(ins)) {
distributed_tile* in = (distributed_tile*)tmap_.at(ins->get_operand(0));
unsigned vector_size = result->axis(0).contiguous;
std::map<unsigned, Value*> packets;
in->for_each([&](indices_t idx){
unsigned linear = in->get_linear_index(idx);
unsigned id = linear / vector_size;
if(linear % vector_size == 0)
packets[id] = result->get_value(idx);
packets[id] = builder.CreateInsertElement(packets[id], in->get_value(idx), linear % vector_size);
std::cout << linear << std::endl;
});
result->for_each([&](indices_t idx){
unsigned linear = in->get_linear_index(idx);
unsigned id = linear / vector_size;
if(linear % vector_size == 0)
result->set_value(idx, packets[id]);
});
}
// copy to shared
else if(dynamic_cast<ir::copy_to_shared_inst*>(ins)) {
distributed_tile* in = (distributed_tile*)tmap_.at(ins->get_operand(0));

View File

@@ -277,5 +277,9 @@ value *builder::create_copy_to_shared(value *arg, const std::string &name) {
return insert(copy_to_shared_inst::create(arg, name));
}
value *builder::create_vectorize(value *arg, const std::string &name) {
return insert(vectorize_inst::create(arg, name));
}
}
}

View File

@@ -399,5 +399,9 @@ copy_to_shared_inst* copy_to_shared_inst::create(value *arg, const std::string &
return new copy_to_shared_inst(arg->get_type(), arg, name, next);
}
vectorize_inst* vectorize_inst::create(value *arg, const std::string &name, instruction *next) {
return new vectorize_inst(arg->get_type(), arg, name, next);
}
}
}