[BUILD] Fix Warnings and Enable Warnings as Errors (#794)
This commit is contained in:
@@ -17,11 +17,6 @@ using namespace mlir;
|
||||
|
||||
namespace {
|
||||
class LoopPipeliner {
|
||||
/// comments on numStages:
|
||||
/// [0, numStages-1) are in the prologue
|
||||
/// numStages-1 is appended after the loop body
|
||||
int numStages;
|
||||
|
||||
/// cache forOp we are working on
|
||||
scf::ForOp forOp;
|
||||
|
||||
@@ -43,6 +38,11 @@ class LoopPipeliner {
|
||||
///
|
||||
Value loopIterIdx;
|
||||
|
||||
/// comments on numStages:
|
||||
/// [0, numStages-1) are in the prologue
|
||||
/// numStages-1 is appended after the loop body
|
||||
int numStages;
|
||||
|
||||
/// value (in loop) => value at stage N
|
||||
DenseMap<Value, SmallVector<Value>> valueMapping;
|
||||
|
||||
@@ -58,9 +58,6 @@ class LoopPipeliner {
|
||||
|
||||
Value lookupOrDefault(Value origin, int stage);
|
||||
|
||||
/// return true if this op uses any of `loads`
|
||||
bool isDirectUserOfAsyncLoad(Operation &op);
|
||||
|
||||
/// returns a empty buffer of size <numStages, ...>
|
||||
triton::gpu::AllocTensorOp allocateEmptyBuffer(Operation *op,
|
||||
OpBuilder &builder);
|
||||
@@ -84,7 +81,7 @@ public:
|
||||
/// create the new ForOp (add new args & insert prefetched ops)
|
||||
scf::ForOp createNewForOp();
|
||||
|
||||
friend class PipelinePass;
|
||||
friend struct PipelinePass;
|
||||
};
|
||||
|
||||
// helpers
|
||||
@@ -123,19 +120,6 @@ void LoopPipeliner::collectDeps(Value v, int stages, DenseSet<Value> &deps) {
|
||||
}
|
||||
}
|
||||
|
||||
bool LoopPipeliner::isDirectUserOfAsyncLoad(Operation &op) {
|
||||
for (Value loadOp : loads) {
|
||||
assert(loadOp.hasOneUse() &&
|
||||
"load should only have one use (ConvertLayout)");
|
||||
Value loadUseResult = loadOp.getUsers().begin()->getResult(0);
|
||||
for (Value opOperand : op.getOperands()) {
|
||||
if (opOperand == loadUseResult)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
triton::gpu::AllocTensorOp
|
||||
LoopPipeliner::allocateEmptyBuffer(Operation *op, OpBuilder &builder) {
|
||||
// allocate a buffer for each pipelined tensor
|
||||
@@ -356,8 +340,8 @@ void LoopPipeliner::emitPrologue() {
|
||||
} // for (int stage = 0; stage < numStages - 1; ++stage)
|
||||
|
||||
// async.wait & extract_slice
|
||||
Operation *asyncWait = builder.create<triton::gpu::AsyncWaitOp>(
|
||||
loads[0].getLoc(), loads.size() * (numStages - 2));
|
||||
builder.create<triton::gpu::AsyncWaitOp>(loads[0].getLoc(),
|
||||
loads.size() * (numStages - 2));
|
||||
loopIterIdx = builder.create<arith::ConstantIntOp>(iv.getLoc(), 0, 32);
|
||||
for (Value loadOp : loads) {
|
||||
Value extractSlice = builder.create<triton::gpu::ExtractSliceOp>(
|
||||
@@ -380,8 +364,7 @@ void LoopPipeliner::emitEpilogue() {
|
||||
OpBuilder builder(forOp);
|
||||
OpBuilder::InsertionGuard g(builder);
|
||||
builder.setInsertionPointAfter(forOp);
|
||||
Operation *asyncWait =
|
||||
builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
|
||||
builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
|
||||
}
|
||||
|
||||
scf::ForOp LoopPipeliner::createNewForOp() {
|
||||
@@ -575,8 +558,8 @@ scf::ForOp LoopPipeliner::createNewForOp() {
|
||||
yieldValues.push_back(loopIterIdx);
|
||||
|
||||
builder.setInsertionPointToEnd(newForOp.getBody());
|
||||
auto test = builder.create<scf::YieldOp>(
|
||||
forOp.getBody()->getTerminator()->getLoc(), yieldValues);
|
||||
builder.create<scf::YieldOp>(forOp.getBody()->getTerminator()->getLoc(),
|
||||
yieldValues);
|
||||
return newForOp;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user