[BACKEND] Various bug fixes; making reductions faster (#533)

This commit is contained in:
Philippe Tillet
2022-05-31 17:14:44 -07:00
committed by GitHub
parent 37037bb3be
commit 3e7500dfe6
12 changed files with 174 additions and 66 deletions

View File

@@ -224,7 +224,7 @@ struct scanline_layout: public distributed_layout {
int nts(size_t k) { return nts_.at(k); }
int contig_per_thread(size_t k) { return nts_.at(k); }
int per_thread(size_t k) { return nts(k) * shape_[k] / shape_per_cta(k);}
int per_thread(size_t k) { return contig_per_thread(k) * shape_[k] / shape_per_cta(k);}
public:
// micro tile size. The size of a tile held by a thread block.
std::vector<int> mts_;