diff --git a/examples/matrix.cpp b/examples/matrix.cpp index bc37b81ae..bc7239038 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -32,10 +32,7 @@ void test(fp32 *a, fp32 *b, fp32 *c, int32 M, int32 N, int32 K){\ int32 k;\ fp32* pa[32, 8] = a + rx[:, newaxis] + rka[newaxis, :]*M;\ fp32* pb[32, 8] = b + ry[:, newaxis] + rkb[newaxis, :]*K;\ - fp32* pc[32, 32];\ - for(k = 0; k < K; k = k + 8){\ - }\ - pc = c + rx[:, newaxis] + ry[newaxis, :];\ + fp32* pc[32, 32] = c + rx[:, newaxis] + ry[newaxis, :]*M;\ *pc = C;\ }\ "; @@ -59,15 +56,13 @@ int main() { tune.run(module); std::vector params = { // asm - 2, 16, 1, + 2, 8, 1, // bsn - 2, 16, 1, + 4, 4, 1, // pa - 1, 2, 4, + 2, 4, 1, // pb - 1, 2, 4, - // c - 2, 16, 1, 1, 2, 4 + 1, 8, 1, }; std::map> errors; unsigned i = 0; @@ -75,11 +70,11 @@ int main() { for(unsigned *x: tune.get_params(module)) *x = params[i++]; tune.check_constraints(module, errors); -// std::cout << "errors: " << errors.size() << std::endl; -// for(auto &x: errors){ -// for(auto &e: x.second) -// std::cout << e << std::endl; -// } + std::cout << "errors: " << errors.size() << std::endl; + for(auto &x: errors){ + for(auto &e: x.second) + std::cout << e << std::endl; + } shared.run(module); liveness.run(module); allocation.run(); diff --git a/include/ast/ast.h b/include/ast/ast.h index fed9d6556..7a2a62563 100644 --- a/include/ast/ast.h +++ b/include/ast/ast.h @@ -114,9 +114,15 @@ public: const slice_enum_t type_; }; +class named_expression; + class expression: public node{ public: virtual ir::value* codegen(ir::module *) const = 0; + named_expression *lvalue() const { return lvalue_; } + +protected: + named_expression *lvalue_; }; class postfix_expression: public expression{ @@ -163,10 +169,9 @@ private: const list* slices_; }; - class named_expression: public expression { public: - named_expression(node *id): id_((const identifier*)id) { } + named_expression(node *id): id_((const identifier*)id) { lvalue_ = this; } const identifier *id() const { return id_; } ir::value* codegen(ir::module * mod) const; @@ -227,8 +232,11 @@ private: public: unary_operator(UNARY_OP_T op, node *arg) - : op_(op), - arg_((expression*)arg) { } + : op_(op), + arg_((expression*)arg) { + if(op == DEREF) + this->lvalue_ = arg_->lvalue(); + } UNARY_OP_T get_op() const { return op_; } ir::value* codegen(ir::module *mod) const; diff --git a/include/codegen/selection.h b/include/codegen/selection.h index 92c9f79b5..6fa05782e 100644 --- a/include/codegen/selection.h +++ b/include/codegen/selection.h @@ -32,9 +32,10 @@ protected: typedef std::vector shapes_t; public: - tile(const shapes_t &shapes): shapes_(shapes){ } + tile(llvm::Type *ty, const shapes_t &shapes): shapes_(shapes){ } private: + llvm::Type *ty_; shapes_t shapes_; }; @@ -46,13 +47,20 @@ public: class distributed_tile: public tile{ typedef std::vector axes_t; + typedef std::vector indices_t; + typedef std::map indices_map_t; + typedef std::vector values_t; + +private: + void init_indices(); public: - distributed_tile(const shapes_t& shapes, const axes_t &axes) - : tile(shapes), axes_(axes) {} + distributed_tile(llvm::Type *ty, const shapes_t& shapes, const axes_t &axes); private: axes_t axes_; + indices_map_t indices_; + values_t values_; }; diff --git a/include/ir/instructions.h b/include/ir/instructions.h index 09c129160..44bafb151 100644 --- a/include/ir/instructions.h +++ b/include/ir/instructions.h @@ -26,6 +26,8 @@ public: const basic_block *get_parent() const { return parent_; } basic_block *get_parent() { return parent_; } void erase_from_parent(); + // helpers + bool has_tile_result_or_op(); private: basic_block *parent_; diff --git a/lib/ast/lowering.cpp b/lib/ast/lowering.cpp index 490dcdba1..9d91e3588 100644 --- a/lib/ast/lowering.cpp +++ b/lib/ast/lowering.cpp @@ -523,7 +523,8 @@ ir::value *assignment_expression::codegen(ir::module *mod) const{ mod->set_value(x->id()->name(), rvalue); else if(auto* x = dynamic_cast(lvalue_)){ assert(x->get_op()==DEREF); - ir::value *ptr = x->codegen(mod); + assert(x->lvalue()); + ir::value *ptr = x->lvalue()->codegen(mod); mod->get_builder().create_store(ptr, rvalue); } return rvalue; diff --git a/lib/codegen/selection.cpp b/lib/codegen/selection.cpp index 4092be811..497f1f302 100644 --- a/lib/codegen/selection.cpp +++ b/lib/codegen/selection.cpp @@ -13,6 +13,33 @@ namespace codegen{ using namespace llvm; +/* Distributed Tile */ +void distributed_tile::init_indices() { + std::vector id(axes_.size(), 0); + size_t k = 0; + while(true) { + indices_t current; + for(size_t d = 0; d < id.size(); d++) + current.push_back(axes_[d].values[id[d]]); + indices_[current] = indices_.size(); + id[0]++; + while(id[k] == axes_[k].values.size()){ + if(k == id.size() - 1) + return; + id[k++] = 0; + id[k]++; + } + k = 0; + } +} + +distributed_tile::distributed_tile(Type *ty, const shapes_t &shapes, const axes_t &axes) + : tile(ty, shapes), axes_(axes) { + init_indices(); + for(size_t i = 0; i < indices_.size(); i++) + values_.push_back(UndefValue::get(ty_)); +} + /* convert ir::type to Type */ Type *selection::llvm_type(ir::type *ty, LLVMContext &ctx) { @@ -186,7 +213,7 @@ void selection::init_axes(ir::instruction *instr, IRBuilder<> &builder, Value *u unsigned offset = n / contiguous[k] * per_block + n % contiguous[k]; idx_list[n] = builder.CreateAdd(thread_id, builder.getInt32(offset)); } - axes[k] = {idx_list}; + axes[k] = distributed_axis{idx_list}; } // Store axes axes_[instr] = axes; @@ -230,6 +257,7 @@ void selection::create_grids(std::vector &grids, void selection::init_grids(ir::function *fn, IRBuilder<> &builder){ // fetch linear ID Module *mod = builder.GetInsertBlock()->getParent()->getParent(); + LLVMContext &ctx = builder.getContext(); Function *get_thread_id = Intrinsic::getDeclaration(mod, Intrinsic::nvvm_read_ptx_sreg_tid_x); Value *warp_size = builder.getInt32(32); Value *u_thread_id = builder.CreateCall(get_thread_id, {}); @@ -248,9 +276,10 @@ void selection::init_grids(ir::function *fn, IRBuilder<> &builder){ continue; bool is_shared = dynamic_cast(i); const auto& shapes = i->get_type()->get_tile_shapes(); + Type* ty = llvm_type(i->get_type(), ctx); // create shared tile if(is_shared){ - tmap_.insert({i, new shared_tile(shapes)}); + tmap_.insert({i, new shared_tile(ty, shapes)}); } // create distributed tile else { @@ -264,20 +293,18 @@ void selection::init_grids(ir::function *fn, IRBuilder<> &builder){ else axes[d].values = {builder.getInt32(0)}; } - tmap_.insert({i, new distributed_tile(shapes, axes)}); + tmap_.insert({i, new distributed_tile(ty, shapes, axes)}); } } } void selection::lower_tile_instruction(ir::instruction *src, llvm::IRBuilder<> &builder) { - + std::cout << typeid(*src).name() << std::endl; } void selection::lower_instruction(ir::instruction *src, IRBuilder<> &builder) { LLVMContext &ctx = builder.getContext(); - std::cout << typeid(*src).name() << " " << src->get_type()->get_type_id() << std::endl; - if(src->get_type()->is_tile_ty()) { - std::cout << "tile instruction" << std::endl; + if(src->has_tile_result_or_op()) { lower_tile_instruction(src, builder); } else { diff --git a/lib/codegen/tune.cpp b/lib/codegen/tune.cpp index 7d0a673a2..63e4f582f 100644 --- a/lib/codegen/tune.cpp +++ b/lib/codegen/tune.cpp @@ -29,7 +29,13 @@ void tune::init_c_phi(ir::instruction *v) { } void tune::init_c_graph(ir::instruction *v) { - const auto& shapes = v->get_type()->get_tile_shapes(); + // Reference shape + std::vector shapes; + if(auto *store = dynamic_cast(v)) + shapes = store->get_pointer_operand()->get_type()->get_tile_shapes(); + else + shapes = v->get_type()->get_tile_shapes(); + // Reshape if(dynamic_cast(v)){ ir::value *op = v->get_operand(0); unsigned current = 0; @@ -40,9 +46,11 @@ void tune::init_c_graph(ir::instruction *v) { add_constraint({v, i}, {op, current++}); } } + // Splat else if(dynamic_cast(v)){ } + // Broadcast else if(dynamic_cast(v)){ ir::value *op = v->get_operand(0); ir::type *op_ty = op->get_type(); @@ -51,13 +59,14 @@ void tune::init_c_graph(ir::instruction *v) { if(op_shapes[i] == shapes[i] && v != op) add_constraint({v, i}, {op, i}); } - } + // Matrix multiplication else if(dynamic_cast(v)){ ir::value *D = v->get_operand(2); add_constraint({v, 0}, {D, 0}); add_constraint({v, 1}, {D, 1}); } + // Element-wise else if(dynamic_cast(v)){ for(unsigned i = 0; i < shapes.size(); i ++) for(ir::value* op: v->ops()){ @@ -102,18 +111,19 @@ std::map tune::get_params(ir::instruction* i) { return params_.at(i); } + void tune::run(ir::module &mod) { for(ir::function *fn: mod.get_function_list()){ // Build constraints graph for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i : block->get_inst_list()) - if(i->get_type()->is_tile_ty()){ + if(i->has_tile_result_or_op()){ init_c_graph(i); } // Build phi constraints for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i : block->get_inst_list()) - if(i->get_type()->is_tile_ty()) + if(i->has_tile_result_or_op()) init_c_phi(i); // Layout parameters while(!nodes_.empty()){ diff --git a/lib/ir/instructions.cpp b/lib/ir/instructions.cpp index 679ee5bb2..3cbabd45e 100644 --- a/lib/ir/instructions.cpp +++ b/lib/ir/instructions.cpp @@ -25,6 +25,12 @@ void instruction::erase_from_parent() { parent_->erase(this); } +bool instruction::has_tile_result_or_op() { + bool result = get_type()->is_tile_ty(); + for(ir::value *v: ops()) + result |= v->get_type()->is_tile_ty(); + return result; +} //===----------------------------------------------------------------------===// // phi_node classes