[CODEGEN][ANALYSIS] cleaning: moving towards better polymorphism for

tile layouts
2020-01-20 11:55:34 -05:00
parent fbf2a3f56f
commit 382ca2c745
11 changed files with 130 additions and 109 deletions
--- a/lib/codegen/selection/generator.cc
+++ b/lib/codegen/selection/generator.cc
@@ -655,13 +655,14 @@ void generator::visit_hmma_dot(ir::dot_inst* dot, shared_tile *TA, shared_tile *
                                             "{$8, $9}, "
                                             "{$10, $11}, "
                                             "{$0, $1, $2, $3, $4, $5, $6, $7};", "=f,=f,=f,=f,=f,=f,=f,=f,r,r,r,r,0,1,2,3,4,5,6,7", false);
+  analysis::layout_hmma_884_t* layout = layouts_->get(dot)->to_hmma884();

-  unsigned fpw_0 = layouts_->get(dot)->fpw.at(0);
-  unsigned fpw_1 = layouts_->get(dot)->fpw.at(1);
+  unsigned fpw_0 = layout->fpw.at(0);
+  unsigned fpw_1 = layout->fpw.at(1);
  unsigned wts_0 = fpw_0 * 8;
  unsigned wts_1 = fpw_1 * 8;
-  unsigned wpt_0 = layouts_->get(dot)->wpt.at(0);
-  unsigned wpt_1 = layouts_->get(dot)->wpt.at(1);
+  unsigned wpt_0 = layout->wpt.at(0);
+  unsigned wpt_1 = layout->wpt.at(1);
  unsigned stride_rep_i = wpt_0 * wts_0;
  unsigned stride_rep_j = wpt_1 * wts_1;
  unsigned num_rep_i = shapes[0] / stride_rep_i;
@@ -925,8 +926,8 @@ void generator::visit_recoalesce_inst(ir::recoalesce_inst* rc) {
  // pointer to temporary shared memory
  Type *ty = llvm_type(rc->get_type()->get_scalar_ty(), *ctx_);
  // layouts
-  const analysis::layout_t* in_layout = layouts_->get(op);
-  const analysis::layout_t* out_layout = layouts_->get(rc);
+  analysis::layout_hmma_884_t* in_layout = layouts_->get(op)->to_hmma884();
+  analysis::layout_scanline_t* out_layout = layouts_->get(rc)->to_scanline();
  // machine tiles
  distributed_tile *in_dt = (distributed_tile*)(tmap_.at(op));
  distributed_tile *out_dt = (distributed_tile*)(tmap_.at(rc));
@@ -1026,14 +1027,14 @@ void generator::visit_recoalesce_inst(ir::recoalesce_inst* rc) {

 void generator::visit_copy_to_shared_inst(ir::copy_to_shared_inst* cts) {
  unsigned vector_size = 1;
-  auto x_order = layouts_->get(cts)->order;
  ir::value *arg = cts->get_operand(0);
-  auto arg_order = layouts_->get(arg)->order;
+  analysis::layout_shared_t* out_layout = layouts_->get(cts)->to_shared();
+  analysis::layout_scanline_t* in_layout = layouts_->get(arg)->to_scanline();
+  auto out_order = out_layout->order;
+  auto in_order = in_layout->order;
  // tiles
-  if(x_order == arg_order){
-    size_t ld = arg_order[0];
-    vector_size = layouts_->get(arg)->nts.at(ld);
-  }
+  if(out_order == in_order)
+    vector_size = in_layout->nts.at(in_order[0]);

  std::map<unsigned, Value*> packets;
  for_each(arg, [&](indices_t idx){
--- a/lib/codegen/selection/machine_layout.cc
+++ b/lib/codegen/selection/machine_layout.cc
@@ -72,7 +72,7 @@ inline int32_t ceil(int32_t num, int32_t div){


 machine_layout_shared_t::machine_layout_shared_t(Module *mod, Builder *builder, target *tgt, analysis::allocation* alloc,
-                                                 Value *&sh_mem_ptr, analysis::layout_t *layout,
+                                                 Value *&sh_mem_ptr, analysis::layout_shared_t *layout,
                                                 std::map<ir::value *, Value *>& vmap,
                                                 std::map<ir::value *, tile *>& tmap)
  : mod_(mod), builder_(builder), tgt_(tgt), alloc_(alloc), sh_mem_ptr_(sh_mem_ptr), layout_(layout), vmap_(vmap), tmap_(tmap) {
@@ -132,7 +132,10 @@ machine_layout_distributed_t::machine_layout_distributed_t(Module *mod, Builder
 tile *machine_layout_distributed_t::create(ir::value *v) {
  Type *ty = llvm_type(v->get_type()->get_scalar_ty(), builder_->getContext());
  const auto &shapes = v->get_type()->get_tile_shapes();
-  std::vector<distributed_axis> axes(shapes.size());
+  size_t rank = shapes.size();
+  std::vector<distributed_axis> axes(rank);
+  std::vector<int> order(rank);
+  // compute axes
  for(size_t d = 0; d < shapes.size(); d++){
    if(shapes[d] > 1){
      unsigned x = a_axes_->get(v, d);
@@ -143,7 +146,22 @@ tile *machine_layout_distributed_t::create(ir::value *v) {
      axes[d].values = {builder_->getInt32(0)};
    }
  }
-  return new distributed_tile(ty, shapes, layout_->order, axes, *builder_);
+  // compute order
+  std::iota(order.begin(), order.end(), 0);
+  auto cmp = [&](int x, int y) {
+    unsigned axx = a_axes_->get(v, x);
+    unsigned axy = a_axes_->get(v, y);
+    auto itx = std::find(layout_->axes.begin(), layout_->axes.end(), axx);
+    auto ity = std::find(layout_->axes.begin(), layout_->axes.end(), axy);
+    size_t posx = std::distance(layout_->axes.begin(), itx);
+    size_t posy = std::distance(layout_->axes.begin(), ity);
+    if(posx < rank && posy < rank)
+      return layout_->order[posx] < layout_->order[posy];
+    return false;
+  };
+  std::sort(order.begin(), order.end(), cmp);
+
+  return new distributed_tile(ty, shapes, order, axes, *builder_);
 }

 machine_layout_hmma_884_t::machine_layout_hmma_884_t(Module *mod, Builder *builder,
--- a/lib/codegen/selection/machine_value.cc
+++ b/lib/codegen/selection/machine_value.cc
@@ -11,13 +11,6 @@ using namespace llvm;
 /* Distributed Tile */
 void distributed_tile::init_indices() {
  std::vector<size_t> id(axes_.size(), 0);
-  // create iteration order
-  std::vector<size_t> order(id.size());
-  std::iota(order.begin(), order.end(), 0);
-  auto cmp = [&](int x, int y) {
-    return order_[x] < order_[y];
-  };
-  std::sort(order.begin(), order.end(), cmp);
  // build
  size_t k = 0;
  while(true) {
@@ -28,12 +21,12 @@ void distributed_tile::init_indices() {
    indices_[current] = sz;
    values_[current] = nullptr;
    ordered_indices_.push_back(current);
-    id[order[0]]++;
-    while(id[order[k]] == axes_[order[k]].values.size()){
+    id[order_[0]]++;
+    while(id[order_[k]] == axes_[order_[k]].values.size()){
      if(k == id.size() - 1)
        return;
-      id[order[k++]] = 0;
-      id[order[k]]++;
+      id[order_[k++]] = 0;
+      id[order_[k]]++;
    }
    k = 0;
  }