diff --git a/lib/codegen/analysis/liveness.cc b/lib/codegen/analysis/liveness.cc index 707cbaa23..2d4162ff4 100644 --- a/lib/codegen/analysis/liveness.cc +++ b/lib/codegen/analysis/liveness.cc @@ -37,16 +37,16 @@ void liveness::run(ir::module &mod) { if(layout->type != SHARED) continue; // users - std::set users; + std::set users; for(ir::value *v: layout->values){ - users.insert(v); for(ir::user *u: v->get_users()) users.insert(u); } // compute intervals unsigned start = INT32_MAX; unsigned end = 0; - for(ir::value *u: users){ + for(ir::user *u: users) + if(indices.find(u) != indices.end()){ start = std::min(start, indices.at(u)); end = std::max(end, indices.at(u)); } diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc index 444a113db..02cceea63 100644 --- a/lib/runtime/function.cc +++ b/lib/runtime/function.cc @@ -231,11 +231,10 @@ std::unique_ptr function::make_bin(ir::module &module, driver::c align.run(module); dce.run(module); reassociate.run(module); - dce.run(module); cts.run(module); + dce.run(module); align.run(module); axes.run(module); -// ir::print(module, std::cout); layouts.run(module); liveness.run(module); allocation.run(module); diff --git a/tests/bench/copy2d.cc b/tests/bench/copy2d.cc index 22006aae7..f1252797e 100644 --- a/tests/bench/copy2d.cc +++ b/tests/bench/copy2d.cc @@ -11,17 +11,17 @@ int main() { // shapes to benchmark typedef std::tuple, std::vector, std::vector> config_t; std::vector configs = { -// {{4096*4096}, {0}, {0}}, + {{4096*4096}, {0}, {0}}, {{4096, 4096}, {0, 1}, {1, 0}}, -// {{4096, 4096}, {0, 1}, {1, 0}}, -// {{4096, 4096}, {1, 0}, {0, 1}}, -// {{4096, 4096}, {0, 1}, {0, 1}}, -// {{256, 256, 256}, {0, 1, 2}, {0, 1, 2}}, -// {{256, 256, 256}, {0, 1, 2}, {0, 2, 1}}, -// {{256, 256, 256}, {1, 0, 2}, {1, 2, 0}}, -// {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}, -// {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}}, -// {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}} + {{4096, 4096}, {0, 1}, {1, 0}}, + {{4096, 4096}, {1, 0}, {0, 1}}, + {{4096, 4096}, {0, 1}, {0, 1}}, + {{256, 256, 256}, {0, 1, 2}, {0, 1, 2}}, + {{256, 256, 256}, {0, 1, 2}, {0, 2, 1}}, + {{256, 256, 256}, {1, 0, 2}, {1, 2, 0}}, + {{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}, + {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}}, + {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}} }; // does the work std::vector shape; diff --git a/tests/bench/dot.cc b/tests/bench/dot.cc index 927f0044b..9857e9865 100644 --- a/tests/bench/dot.cc +++ b/tests/bench/dot.cc @@ -13,7 +13,7 @@ int main() { for(auto x: std::vector>{{false, false}, {false, true}, {true, false}, {true, true}}){ std::vector tmp = { - config_t{ord, x[0], x[1], 2048, 2048, 2048}, + config_t{ord, x[0], x[1], 4096, 4096, 4096}, // config_t{ord, x[0], x[1], 16, 2048, 2048}, // config_t{ord, x[0], x[1], 32, 2048, 2048}, // config_t{ord, x[0], x[1], 64, 2048, 2048}, diff --git a/tests/unit/dot.cc b/tests/unit/dot.cc index 38851b54d..02777fa4b 100644 --- a/tests/unit/dot.cc +++ b/tests/unit/dot.cc @@ -12,8 +12,8 @@ int main() { std::vector configs; for(int TM: std::vector{32, 64}) for(int TN: std::vector{32, 64}) - for(int TK: std::vector{8}) - for(int nwarps: std::vector{8}) + for(int TK: std::vector{16}) + for(int nwarps: std::vector{4}) for(bool AT: std::array{false, true}) for(bool BT: std::array{false, true}){ configs.push_back(config_t{FLOAT, AT, BT, 128, 128, 128, TM, TN, TK, nwarps});