diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 9f9e2a724..7af5c0fb9 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -9,6 +9,7 @@ #include "codegen/shared_copy.h" #include "codegen/allocation.h" #include "codegen/liveness.h" +#include "codegen/vectorize.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" @@ -160,6 +161,7 @@ int main() { tdl::codegen::tune tune; tdl::codegen::liveness liveness; tdl::codegen::allocation allocation(&liveness); + tdl::codegen::vectorize vectorize(&tune); tdl::codegen::selection selection(&allocation, &tune); // tuning parameters @@ -194,6 +196,7 @@ int main() { shared.run(module); liveness.run(module); allocation.run(); + vectorize.run(module); selection.run(module, llvm_module); // llvm source diff --git a/include/codegen/selection.h b/include/codegen/selection.h index 729c36adb..18b77f42c 100644 --- a/include/codegen/selection.h +++ b/include/codegen/selection.h @@ -68,42 +68,25 @@ class distributed_tile: public tile{ private: void init_indices(); - -public: - distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes); - virtual void for_each(std::function fn) = 0; - -protected: - axes_t axes_; - indices_map_t indices_; - values_t values_; -}; - -class serialized_distributed_tile: public distributed_tile { -public: - using distributed_tile::distributed_tile; - -public: - void set_value(indices_t, llvm::Value *); - llvm::Value* get_value(indices_t idx); - void for_each(std::function fn); -}; - -class vectorized_distributed_tile: public distributed_tile { -private: llvm::Type *make_vector_ty(llvm::Type *ty, size_t vector_size); public: - vectorized_distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes, llvm::IRBuilder<> &builder); - void set_value(indices_t, llvm::Value *); + distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes, llvm::IRBuilder<> &builder, bool vectorize); + void set_value(indices_t idx, llvm::Value *v); llvm::Value* get_value(indices_t idx); + unsigned get_linear_index(indices_t idx); void for_each(std::function fn); + const distributed_axis &axis(unsigned dim) { return axes_.at(dim); } private: - llvm::IRBuilder<> &builder_; + axes_t axes_; + indices_map_t indices_; + values_t values_; size_t vector_size_; + llvm::IRBuilder<> &builder_; }; + class selection{ typedef std::map vmap_t; typedef std::map tmap_t; diff --git a/include/codegen/tune.h b/include/codegen/tune.h index d1fc67549..dfa1fcc97 100644 --- a/include/codegen/tune.h +++ b/include/codegen/tune.h @@ -32,6 +32,7 @@ public: std::vector get_params(ir::module& mod); std::map get_params(ir::instruction* i); unsigned *get_param(ir::value *value, const std::string &key) { return params_[value][key]; } + void copy(ir::value *dst, ir::value *src) { params_[dst] = params_[src]; } bool check_constraints(ir::module &fn, std::map> &errors); void run(ir::module &mod); diff --git a/include/ir/builder.h b/include/ir/builder.h index 438390940..f0b0d1aa3 100644 --- a/include/ir/builder.h +++ b/include/ir/builder.h @@ -118,6 +118,7 @@ public: value *create_matmul(value *A, value *B, value *C, const std::string &name = ""); // Intrinsics value *create_copy_to_shared(value *arg, const std::string &name = ""); + value *create_vectorize(value *arg, const std::string &name = ""); private: context &ctx_; basic_block *block_; diff --git a/include/ir/instructions.h b/include/ir/instructions.h index cc694fd7b..0745f62c0 100644 --- a/include/ir/instructions.h +++ b/include/ir/instructions.h @@ -397,6 +397,12 @@ public: instruction *next = nullptr); }; +class vectorize_inst: public unary_inst{ + using unary_inst::unary_inst; + +public: + static vectorize_inst* create(value *arg, const std::string &name = "", instruction *next = nullptr); +}; } } diff --git a/lib/codegen/selection.cpp b/lib/codegen/selection.cpp index 2510f89e2..d6a4a0bc5 100644 --- a/lib/codegen/selection.cpp +++ b/lib/codegen/selection.cpp @@ -34,79 +34,36 @@ void distributed_tile::init_indices() { } } - -distributed_tile::distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes) - : tile(ty, shapes), axes_(axes) { - init_indices(); - for(size_t i = 0; i < indices_.size(); i++) - values_.push_back(UndefValue::get(ty_)); -} - -/* Serialized distributed tile */ -void serialized_distributed_tile::set_value(indices_t idx, Value *v) { - values_[indices_[idx]] = v; -} - -void serialized_distributed_tile::get_value(indices_t idx) { - return values_[indices_[idx]]; -} - -void serialized_distributed_tile::for_each(std::function fn) { - for(auto &idx: indices_) - fn(idx.first); -} - -/* Vectorized distributed tile */ -llvm::Type *vectorized_distributed_tile::make_vector_ty(llvm::Type *ty, size_t vector_size) { +llvm::Type *distributed_tile::make_vector_ty(llvm::Type *ty, size_t vector_size) { if(vector_size == 1) return ty; return VectorType::get(ty, vector_size); } -vectorized_distributed_tile::vectorized_distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes, llvm::IRBuilder<> &builder) - : distributed_tile(make_vector_ty(ty, axes[0].contiguous), shapes), axes_(axes), builder_(builder) { - vector_size_ = 1; - if(ty_->isVectorTy()) - vector_size_ = ty_->getVectorNumElements(); +distributed_tile::distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes, llvm::IRBuilder<> &builder, bool vectorize) + : tile(make_vector_ty(ty, vectorize?axes[0].contiguous:1), shapes), axes_(axes), builder_(builder) { + vector_size_ = vectorize?ty_->getVectorNumElements():1; + init_indices(); + for(size_t i = 0; i < indices_.size(); i++) + values_.push_back(UndefValue::get(ty_)); } - void distributed_tile::set_value(indices_t idx, Value *v) { - unsigned value_idx = indices_[idx]; - Value *&result = values_[value_idx/vector_size_*vector_size_]; - if(v->getType() == result->getType()) { - assert(value_idx % vector_size_ == 0); - result = v; - } - // insert scalar in vector - else { - std::cout << v->getType()->getScalarType()->getTypeID() << " " << result->getType()->getScalarType()->getTypeID() << std::endl; - assert(vector_size_==1 || result->getType()->isVectorTy()); - assert(v->getType()->getScalarType() == result->getType()->getScalarType()); - result = builder_.CreateInsertElement(result, v, value_idx % vector_size_); - } + values_[indices_[idx]] = v; } Value* distributed_tile::get_value(indices_t idx) { - unsigned value_idx = indices_[idx]; - Value *&result = values_[value_idx/vector_size_*vector_size_]; - if(vectorize_ || vector_size_ == 1) { - assert(value_idx % vector_size_ == 0); - return result; - } - // extract scalar from vector - else { - assert(result->getType()->isVectorTy()); - return builder_.CreateExtractElement(result, value_idx % vector_size_); - } - return result; + return values_[indices_[idx]]; +} + +unsigned distributed_tile::get_linear_index(indices_t idx) { + return indices_[idx]; } void distributed_tile::for_each(std::function fn) { - for(auto &idx: indices_) { - if(!vectorize_ || (idx.second % vector_size_ == 0)) + for(auto &idx: indices_) + if(idx.second % vector_size_ == 0) fn(idx.first); - } } /* Shared Tile */ @@ -444,7 +401,7 @@ void selection::create_tile(ir::value *v, IRBuilder<> &builder, axes[d].values = {builder.getInt32(0)}; } } - bool vectorize = dynamic_cast(v); + bool vectorize = dynamic_cast(v); distributed_tile *T = new distributed_tile(ty, shapes, axes, builder, vectorize); tmap_.insert({v, T}); // constant range @@ -548,6 +505,26 @@ void selection::lower_tile_instruction(ir::instruction *ins, llvm::IRBuilder<> & result->set_value(out_idx, in_tile->get_value(in_idx)); }); } + // vectorize + else if(dynamic_cast(ins)) { + distributed_tile* in = (distributed_tile*)tmap_.at(ins->get_operand(0)); + unsigned vector_size = result->axis(0).contiguous; + std::map packets; + in->for_each([&](indices_t idx){ + unsigned linear = in->get_linear_index(idx); + unsigned id = linear / vector_size; + if(linear % vector_size == 0) + packets[id] = result->get_value(idx); + packets[id] = builder.CreateInsertElement(packets[id], in->get_value(idx), linear % vector_size); + std::cout << linear << std::endl; + }); + result->for_each([&](indices_t idx){ + unsigned linear = in->get_linear_index(idx); + unsigned id = linear / vector_size; + if(linear % vector_size == 0) + result->set_value(idx, packets[id]); + }); + } // copy to shared else if(dynamic_cast(ins)) { distributed_tile* in = (distributed_tile*)tmap_.at(ins->get_operand(0)); diff --git a/lib/ir/builder.cpp b/lib/ir/builder.cpp index 848f668b7..7422a47fc 100644 --- a/lib/ir/builder.cpp +++ b/lib/ir/builder.cpp @@ -277,5 +277,9 @@ value *builder::create_copy_to_shared(value *arg, const std::string &name) { return insert(copy_to_shared_inst::create(arg, name)); } +value *builder::create_vectorize(value *arg, const std::string &name) { + return insert(vectorize_inst::create(arg, name)); +} + } } diff --git a/lib/ir/instructions.cpp b/lib/ir/instructions.cpp index 3cbabd45e..522f4c029 100644 --- a/lib/ir/instructions.cpp +++ b/lib/ir/instructions.cpp @@ -399,5 +399,9 @@ copy_to_shared_inst* copy_to_shared_inst::create(value *arg, const std::string & return new copy_to_shared_inst(arg->get_type(), arg, name, next); } +vectorize_inst* vectorize_inst::create(value *arg, const std::string &name, instruction *next) { + return new vectorize_inst(arg->get_type(), arg, name, next); +} + } }