[CODEGEN][ANALYSIS] cleaning: moving towards better polymorphism for
tile layouts
This commit is contained in:
@@ -655,13 +655,14 @@ void generator::visit_hmma_dot(ir::dot_inst* dot, shared_tile *TA, shared_tile *
|
||||
"{$8, $9}, "
|
||||
"{$10, $11}, "
|
||||
"{$0, $1, $2, $3, $4, $5, $6, $7};", "=f,=f,=f,=f,=f,=f,=f,=f,r,r,r,r,0,1,2,3,4,5,6,7", false);
|
||||
analysis::layout_hmma_884_t* layout = layouts_->get(dot)->to_hmma884();
|
||||
|
||||
unsigned fpw_0 = layouts_->get(dot)->fpw.at(0);
|
||||
unsigned fpw_1 = layouts_->get(dot)->fpw.at(1);
|
||||
unsigned fpw_0 = layout->fpw.at(0);
|
||||
unsigned fpw_1 = layout->fpw.at(1);
|
||||
unsigned wts_0 = fpw_0 * 8;
|
||||
unsigned wts_1 = fpw_1 * 8;
|
||||
unsigned wpt_0 = layouts_->get(dot)->wpt.at(0);
|
||||
unsigned wpt_1 = layouts_->get(dot)->wpt.at(1);
|
||||
unsigned wpt_0 = layout->wpt.at(0);
|
||||
unsigned wpt_1 = layout->wpt.at(1);
|
||||
unsigned stride_rep_i = wpt_0 * wts_0;
|
||||
unsigned stride_rep_j = wpt_1 * wts_1;
|
||||
unsigned num_rep_i = shapes[0] / stride_rep_i;
|
||||
@@ -925,8 +926,8 @@ void generator::visit_recoalesce_inst(ir::recoalesce_inst* rc) {
|
||||
// pointer to temporary shared memory
|
||||
Type *ty = llvm_type(rc->get_type()->get_scalar_ty(), *ctx_);
|
||||
// layouts
|
||||
const analysis::layout_t* in_layout = layouts_->get(op);
|
||||
const analysis::layout_t* out_layout = layouts_->get(rc);
|
||||
analysis::layout_hmma_884_t* in_layout = layouts_->get(op)->to_hmma884();
|
||||
analysis::layout_scanline_t* out_layout = layouts_->get(rc)->to_scanline();
|
||||
// machine tiles
|
||||
distributed_tile *in_dt = (distributed_tile*)(tmap_.at(op));
|
||||
distributed_tile *out_dt = (distributed_tile*)(tmap_.at(rc));
|
||||
@@ -1026,14 +1027,14 @@ void generator::visit_recoalesce_inst(ir::recoalesce_inst* rc) {
|
||||
|
||||
void generator::visit_copy_to_shared_inst(ir::copy_to_shared_inst* cts) {
|
||||
unsigned vector_size = 1;
|
||||
auto x_order = layouts_->get(cts)->order;
|
||||
ir::value *arg = cts->get_operand(0);
|
||||
auto arg_order = layouts_->get(arg)->order;
|
||||
analysis::layout_shared_t* out_layout = layouts_->get(cts)->to_shared();
|
||||
analysis::layout_scanline_t* in_layout = layouts_->get(arg)->to_scanline();
|
||||
auto out_order = out_layout->order;
|
||||
auto in_order = in_layout->order;
|
||||
// tiles
|
||||
if(x_order == arg_order){
|
||||
size_t ld = arg_order[0];
|
||||
vector_size = layouts_->get(arg)->nts.at(ld);
|
||||
}
|
||||
if(out_order == in_order)
|
||||
vector_size = in_layout->nts.at(in_order[0]);
|
||||
|
||||
std::map<unsigned, Value*> packets;
|
||||
for_each(arg, [&](indices_t idx){
|
||||
|
@@ -72,7 +72,7 @@ inline int32_t ceil(int32_t num, int32_t div){
|
||||
|
||||
|
||||
machine_layout_shared_t::machine_layout_shared_t(Module *mod, Builder *builder, target *tgt, analysis::allocation* alloc,
|
||||
Value *&sh_mem_ptr, analysis::layout_t *layout,
|
||||
Value *&sh_mem_ptr, analysis::layout_shared_t *layout,
|
||||
std::map<ir::value *, Value *>& vmap,
|
||||
std::map<ir::value *, tile *>& tmap)
|
||||
: mod_(mod), builder_(builder), tgt_(tgt), alloc_(alloc), sh_mem_ptr_(sh_mem_ptr), layout_(layout), vmap_(vmap), tmap_(tmap) {
|
||||
@@ -132,7 +132,10 @@ machine_layout_distributed_t::machine_layout_distributed_t(Module *mod, Builder
|
||||
tile *machine_layout_distributed_t::create(ir::value *v) {
|
||||
Type *ty = llvm_type(v->get_type()->get_scalar_ty(), builder_->getContext());
|
||||
const auto &shapes = v->get_type()->get_tile_shapes();
|
||||
std::vector<distributed_axis> axes(shapes.size());
|
||||
size_t rank = shapes.size();
|
||||
std::vector<distributed_axis> axes(rank);
|
||||
std::vector<int> order(rank);
|
||||
// compute axes
|
||||
for(size_t d = 0; d < shapes.size(); d++){
|
||||
if(shapes[d] > 1){
|
||||
unsigned x = a_axes_->get(v, d);
|
||||
@@ -143,7 +146,22 @@ tile *machine_layout_distributed_t::create(ir::value *v) {
|
||||
axes[d].values = {builder_->getInt32(0)};
|
||||
}
|
||||
}
|
||||
return new distributed_tile(ty, shapes, layout_->order, axes, *builder_);
|
||||
// compute order
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
auto cmp = [&](int x, int y) {
|
||||
unsigned axx = a_axes_->get(v, x);
|
||||
unsigned axy = a_axes_->get(v, y);
|
||||
auto itx = std::find(layout_->axes.begin(), layout_->axes.end(), axx);
|
||||
auto ity = std::find(layout_->axes.begin(), layout_->axes.end(), axy);
|
||||
size_t posx = std::distance(layout_->axes.begin(), itx);
|
||||
size_t posy = std::distance(layout_->axes.begin(), ity);
|
||||
if(posx < rank && posy < rank)
|
||||
return layout_->order[posx] < layout_->order[posy];
|
||||
return false;
|
||||
};
|
||||
std::sort(order.begin(), order.end(), cmp);
|
||||
|
||||
return new distributed_tile(ty, shapes, order, axes, *builder_);
|
||||
}
|
||||
|
||||
machine_layout_hmma_884_t::machine_layout_hmma_884_t(Module *mod, Builder *builder,
|
||||
|
@@ -11,13 +11,6 @@ using namespace llvm;
|
||||
/* Distributed Tile */
|
||||
void distributed_tile::init_indices() {
|
||||
std::vector<size_t> id(axes_.size(), 0);
|
||||
// create iteration order
|
||||
std::vector<size_t> order(id.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
auto cmp = [&](int x, int y) {
|
||||
return order_[x] < order_[y];
|
||||
};
|
||||
std::sort(order.begin(), order.end(), cmp);
|
||||
// build
|
||||
size_t k = 0;
|
||||
while(true) {
|
||||
@@ -28,12 +21,12 @@ void distributed_tile::init_indices() {
|
||||
indices_[current] = sz;
|
||||
values_[current] = nullptr;
|
||||
ordered_indices_.push_back(current);
|
||||
id[order[0]]++;
|
||||
while(id[order[k]] == axes_[order[k]].values.size()){
|
||||
id[order_[0]]++;
|
||||
while(id[order_[k]] == axes_[order_[k]].values.size()){
|
||||
if(k == id.size() - 1)
|
||||
return;
|
||||
id[order[k++]] = 0;
|
||||
id[order[k]]++;
|
||||
id[order_[k++]] = 0;
|
||||
id[order_[k]]++;
|
||||
}
|
||||
k = 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user