[BUILD] Fix Warnings and Enable Warnings as Errors (#794)

This commit is contained in:
Ian Bearman
2022-10-28 12:36:09 -07:00
committed by GitHub
parent ac0f6793cc
commit f2106d0aa2
20 changed files with 205 additions and 213 deletions

View File

@@ -17,11 +17,6 @@ using namespace mlir;
namespace {
class LoopPipeliner {
/// comments on numStages:
/// [0, numStages-1) are in the prologue
/// numStages-1 is appended after the loop body
int numStages;
/// cache forOp we are working on
scf::ForOp forOp;
@@ -43,6 +38,11 @@ class LoopPipeliner {
///
Value loopIterIdx;
/// comments on numStages:
/// [0, numStages-1) are in the prologue
/// numStages-1 is appended after the loop body
int numStages;
/// value (in loop) => value at stage N
DenseMap<Value, SmallVector<Value>> valueMapping;
@@ -58,9 +58,6 @@ class LoopPipeliner {
Value lookupOrDefault(Value origin, int stage);
/// return true if this op uses any of `loads`
bool isDirectUserOfAsyncLoad(Operation &op);
/// returns a empty buffer of size <numStages, ...>
triton::gpu::AllocTensorOp allocateEmptyBuffer(Operation *op,
OpBuilder &builder);
@@ -84,7 +81,7 @@ public:
/// create the new ForOp (add new args & insert prefetched ops)
scf::ForOp createNewForOp();
friend class PipelinePass;
friend struct PipelinePass;
};
// helpers
@@ -123,19 +120,6 @@ void LoopPipeliner::collectDeps(Value v, int stages, DenseSet<Value> &deps) {
}
}
bool LoopPipeliner::isDirectUserOfAsyncLoad(Operation &op) {
for (Value loadOp : loads) {
assert(loadOp.hasOneUse() &&
"load should only have one use (ConvertLayout)");
Value loadUseResult = loadOp.getUsers().begin()->getResult(0);
for (Value opOperand : op.getOperands()) {
if (opOperand == loadUseResult)
return true;
}
}
return false;
}
triton::gpu::AllocTensorOp
LoopPipeliner::allocateEmptyBuffer(Operation *op, OpBuilder &builder) {
// allocate a buffer for each pipelined tensor
@@ -356,8 +340,8 @@ void LoopPipeliner::emitPrologue() {
} // for (int stage = 0; stage < numStages - 1; ++stage)
// async.wait & extract_slice
Operation *asyncWait = builder.create<triton::gpu::AsyncWaitOp>(
loads[0].getLoc(), loads.size() * (numStages - 2));
builder.create<triton::gpu::AsyncWaitOp>(loads[0].getLoc(),
loads.size() * (numStages - 2));
loopIterIdx = builder.create<arith::ConstantIntOp>(iv.getLoc(), 0, 32);
for (Value loadOp : loads) {
Value extractSlice = builder.create<triton::gpu::ExtractSliceOp>(
@@ -380,8 +364,7 @@ void LoopPipeliner::emitEpilogue() {
OpBuilder builder(forOp);
OpBuilder::InsertionGuard g(builder);
builder.setInsertionPointAfter(forOp);
Operation *asyncWait =
builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
}
scf::ForOp LoopPipeliner::createNewForOp() {
@@ -575,8 +558,8 @@ scf::ForOp LoopPipeliner::createNewForOp() {
yieldValues.push_back(loopIterIdx);
builder.setInsertionPointToEnd(newForOp.getBody());
auto test = builder.create<scf::YieldOp>(
forOp.getBody()->getTerminator()->getLoc(), yieldValues);
builder.create<scf::YieldOp>(forOp.getBody()->getTerminator()->getLoc(),
yieldValues);
return newForOp;
}