From b5c8c25d4372c16fe116d2df2e055c51c27e1fd9 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Thu, 10 Jan 2019 16:50:47 -0500 Subject: [PATCH] more debugging --- examples/matrix.cpp | 12 ++++++++--- include/ir/instructions.h | 4 ++++ lib/ast/lowering.cpp | 9 +++++--- lib/codegen/tune.cpp | 45 ++++++++++++++++++++++++++++----------- lib/ir/instructions.cpp | 11 +++++++++- lib/ir/module.cpp | 5 +++-- 6 files changed, 65 insertions(+), 21 deletions(-) diff --git a/examples/matrix.cpp b/examples/matrix.cpp index 12f98e0ec..e2ea19527 100644 --- a/examples/matrix.cpp +++ b/examples/matrix.cpp @@ -24,9 +24,15 @@ void test(fp32 *A, fp32 *B, fp32 *C, int32 M, int32 N, int32 K){\ int32 rx[16] = get_global_range[16](0);\ int32 ry[16] = get_global_range[16](1);\ int32 rk[8] = 0 ... 8;\ - fp32 acc[16, 16] = 0;\ - fp32 *pa[16, 8] = A + rx[:,newaxis] + rk[newaxis,:]*M;\ - fp32 *pb[16, 8] = B + ry[:,newaxis] + rk[newaxis,:]*K;\ + fp32 c[16, 16] = 0;\ + int32 k;\ + fp32* pa[16, 8] = A + rx[:, newaxis] + rk[newaxis, :]*M;\ + fp32* pb[16, 8] = B + ry[:, newaxis] + rk[newaxis, :]*K;\ + for(k = 0; k < K; k = k + 8){\ + fp32 a[16, 8] = *pa;\ + fp32 b[16, 8] = *pb;\ + pa = pa + 8;\ + }\ }\ "; diff --git a/include/ir/instructions.h b/include/ir/instructions.h index 2b412769f..e700bea04 100644 --- a/include/ir/instructions.h +++ b/include/ir/instructions.h @@ -279,8 +279,12 @@ private: //===----------------------------------------------------------------------===// class load_inst: public unary_inst{ +private: load_inst(value *ptr, const std::string &name, instruction *next); +private: + static type *get_pointee_type(type *ty); + public: // accessors value *get_pointer_operand() { return get_operand(0); } diff --git a/lib/ast/lowering.cpp b/lib/ast/lowering.cpp index 5d660bfad..14a5249cc 100644 --- a/lib/ast/lowering.cpp +++ b/lib/ast/lowering.cpp @@ -123,13 +123,16 @@ void node::implicit_broadcast(ir::module *mod, ir::value *&lhs, ir::value *&rhs) for(size_t i = 0; i < off; i++) shortest.insert(shortest.begin(), 1); ir::value *&target = (lhs_dim < rhs_dim)?lhs:rhs; - target = builder.create_reshape(target, shortest); + if(off > 0) + target = builder.create_reshape(target, shortest); // Broadcast std::vector shapes(ndim); for(size_t i = 0; i < ndim; i++) shapes[i] = std::max(shortest[i], longest[i]); - lhs = builder.create_broadcast(lhs, shapes); - rhs = builder.create_broadcast(rhs, shapes); + if(shapes != lhs_shapes) + lhs = builder.create_broadcast(lhs, shapes); + if(shapes != rhs_shapes) + rhs = builder.create_broadcast(rhs, shapes); } /* Translation unit */ diff --git a/lib/codegen/tune.cpp b/lib/codegen/tune.cpp index fe935a059..fcc5930a5 100644 --- a/lib/codegen/tune.cpp +++ b/lib/codegen/tune.cpp @@ -19,33 +19,46 @@ void tune::add_constraint(node_t x, node_t y) { void tune::init_c_phi(ir::instruction *v) { // Phi Nodes: all the incoming value share the result layout if(auto *phi = dynamic_cast(v)) - for(ir::value *inc: phi->ops()) + for(ir::value *op: phi->ops()) for(unsigned k = 0; k < phi->get_type()->get_tile_shapes().size(); k++) - if(dependencies_.find({inc, k}) != dependencies_.end() - || dependencies_.find({phi, k}) != dependencies_.end()) - add_constraint({phi, k}, {inc, k}); + if(dependencies_.find({op, k}) != dependencies_.end() + || dependencies_.find({phi, k}) != dependencies_.end()){ + add_constraint({phi, k}, {op, k}); + } } void tune::init_c_graph(ir::instruction *v) { - unsigned num_dim = v->get_type()->get_tile_shapes().size(); + const auto& shapes = v->get_type()->get_tile_shapes(); if(dynamic_cast(v)){ - + ir::value *op = v->get_operand(0); + unsigned current = 0; + for(unsigned i = 0; i < shapes.size(); i ++) + if(shapes[i] > 1) + add_constraint({v, i}, {op, current++}); } else if(dynamic_cast(v)){ } else if(dynamic_cast(v)){ + ir::value *op = v->get_operand(0); + ir::type *op_ty = op->get_type(); + const auto& op_shapes = op_ty->get_tile_shapes(); + for(unsigned i = 0; i < shapes.size(); i ++){ + if(op_shapes[i] == shapes[i] && v != op) + add_constraint({v, i}, {op, i}); + } } - else if(auto *ii = dynamic_cast(v)){ - ir::value *D = ii->get_operand(2); + else if(dynamic_cast(v)){ + ir::value *D = v->get_operand(2); add_constraint({v, 0}, {D, 0}); add_constraint({v, 1}, {D, 1}); } - else if(dynamic_cast(v)) - for(unsigned i = 0; i < num_dim; i ++) + else if(dynamic_cast(v)){ + for(unsigned i = 0; i < shapes.size(); i ++) for(ir::value* op: v->ops()) add_constraint({v, i}, {op, i}); + } } void tune::connected_components(node_t x, const std::vector vals, std::set &nodes, graph_t &graph) { @@ -57,6 +70,11 @@ void tune::connected_components(node_t x, const std::vector vals, st params_[instr].insert({"p1" + suffix, vals[1]}); params_[instr].insert({"p2" + suffix, vals[2]}); } + if(auto *cst = dynamic_cast(x.first)){ + *vals[0] = cst->get_value(); + *vals[1] = cst->get_value(); + *vals[2] = cst->get_value(); + } for(const node_t &y: graph[x]) connected_components(y, vals, nodes, graph); } @@ -69,8 +87,10 @@ void tune::get_params(ir::module &mod, std::vector &result) { for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i : block->get_inst_list()) for(auto &x: params_[i]) - if(seen.insert(x.second).second) + if(seen.insert(x.second).second && *x.second == 0){ + std::cout << typeid(*i).name() << " " << i << std::endl; result.push_back(x.second); + } } void tune::run(ir::module &mod) { @@ -78,8 +98,9 @@ void tune::run(ir::module &mod) { // Build constraints graph for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i : block->get_inst_list()) - if(i->get_type()->is_tile_ty()) + if(i->get_type()->is_tile_ty()){ init_c_graph(i); + } // Build phi constraints for(ir::basic_block *block: fn->blocks()) for(ir::instruction *i : block->get_inst_list()) diff --git a/lib/ir/instructions.cpp b/lib/ir/instructions.cpp index 2424ed074..eb2483132 100644 --- a/lib/ir/instructions.cpp +++ b/lib/ir/instructions.cpp @@ -294,8 +294,17 @@ getelementptr_inst *getelementptr_inst::create(value *ptr, const std::vectorget_scalar_ty(); + type *pointee_ty = scalar_ty->get_pointer_element_ty(); + if(ty->is_tile_ty()) + return tile_type::get_same_shapes(pointee_ty, ty); + return pointee_ty; +} + load_inst::load_inst(value *ptr, const std::string &name, instruction *next) - : unary_inst(ptr->get_type()->get_pointer_element_ty(), ptr, name, next) { } + : unary_inst(get_pointee_type(ptr->get_type()), ptr, name, next) { +} load_inst* load_inst::create(value *ptr, const std::string &name, instruction *next) { return new load_inst(ptr, name, next); diff --git a/lib/ir/module.cpp b/lib/ir/module.cpp index ce5d478ff..532f1ec2f 100644 --- a/lib/ir/module.cpp +++ b/lib/ir/module.cpp @@ -31,10 +31,11 @@ void module::set_value(const std::string& name, ir::value *value){ ir::phi_node* module::make_phi(ir::type *ty, unsigned num_values, ir::basic_block *block){ basic_block::iterator insert = block->get_first_non_phi(); - if(*insert) + if(insert != block->end()){ builder_.set_insert_point(insert); + } ir::phi_node *res = builder_.create_phi(ty, num_values); - if(*insert) + if(insert != block->end()) builder_.set_insert_point(block); return res; }