[PYTHON] Allow triton.code_gen.Binary to print Triton-IR asm. (#89)

This commit is contained in:
daadaada
2021-04-24 02:43:38 +08:00
committed by Philippe Tillet
parent 1112e2526e
commit f6688372db
3 changed files with 14 additions and 7 deletions

View File

@@ -817,9 +817,9 @@ void generator::visit_mma884(ir::dot_inst* C, ir::value *A, ir::value *B, ir::va
// update accumulators
unsigned num_m = layout_c->rep(0) * shape_c[0] / layout_c->spt(0);
unsigned num_n = layout_c->rep(1) * shape_c[1] / layout_c->spt(1);
for(unsigned K = 0; K < NK; K += 4)
for(unsigned m = 0; m < num_m/2; m++)
for(unsigned n = 0; n < num_n/2; n++)
for(unsigned K = 0; K < NK; K += 4){
for(unsigned n = 0; n < num_n/2; n++) {
if(has.find({m, K}) == has.end()){
Value* ptra = ptr_a[(is_a_row ? K/4 : m) % num_ptr_a];
int step_am = is_a_row ? m : m / (num_ptr_a)*(num_ptr_a);