[BUILD] Fix Warnings and Enable Warnings as Errors (#794)

2022-10-28 12:36:09 -07:00
parent ac0f6793cc
commit f2106d0aa2
20 changed files with 205 additions and 213 deletions
--- a/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp
+++ b/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp
@@ -17,11 +17,6 @@ using namespace mlir;

 namespace {
 class LoopPipeliner {
-  /// comments on numStages:
-  ///   [0, numStages-1) are in the prologue
-  ///   numStages-1 is appended after the loop body
-  int numStages;
-
  /// cache forOp we are working on
  scf::ForOp forOp;

@@ -43,6 +38,11 @@ class LoopPipeliner {
  ///
  Value loopIterIdx;

+  /// comments on numStages:
+  ///   [0, numStages-1) are in the prologue
+  ///   numStages-1 is appended after the loop body
+  int numStages;
+
  /// value (in loop) => value at stage N
  DenseMap<Value, SmallVector<Value>> valueMapping;

@@ -58,9 +58,6 @@ class LoopPipeliner {

  Value lookupOrDefault(Value origin, int stage);

-  /// return true if this op uses any of `loads`
-  bool isDirectUserOfAsyncLoad(Operation &op);
-
  /// returns a empty buffer of size <numStages, ...>
  triton::gpu::AllocTensorOp allocateEmptyBuffer(Operation *op,
                                                 OpBuilder &builder);
@@ -84,7 +81,7 @@ public:
  /// create the new ForOp (add new args & insert prefetched ops)
  scf::ForOp createNewForOp();

-  friend class PipelinePass;
+  friend struct PipelinePass;
 };

 // helpers
@@ -123,19 +120,6 @@ void LoopPipeliner::collectDeps(Value v, int stages, DenseSet<Value> &deps) {
  }
 }

-bool LoopPipeliner::isDirectUserOfAsyncLoad(Operation &op) {
-  for (Value loadOp : loads) {
-    assert(loadOp.hasOneUse() &&
-           "load should only have one use (ConvertLayout)");
-    Value loadUseResult = loadOp.getUsers().begin()->getResult(0);
-    for (Value opOperand : op.getOperands()) {
-      if (opOperand == loadUseResult)
-        return true;
-    }
-  }
-  return false;
-}
-
 triton::gpu::AllocTensorOp
 LoopPipeliner::allocateEmptyBuffer(Operation *op, OpBuilder &builder) {
  // allocate a buffer for each pipelined tensor
@@ -356,8 +340,8 @@ void LoopPipeliner::emitPrologue() {
  } // for (int stage = 0; stage < numStages - 1; ++stage)

  // async.wait & extract_slice
-  Operation *asyncWait = builder.create<triton::gpu::AsyncWaitOp>(
-      loads[0].getLoc(), loads.size() * (numStages - 2));
+  builder.create<triton::gpu::AsyncWaitOp>(loads[0].getLoc(),
+                                           loads.size() * (numStages - 2));
  loopIterIdx = builder.create<arith::ConstantIntOp>(iv.getLoc(), 0, 32);
  for (Value loadOp : loads) {
    Value extractSlice = builder.create<triton::gpu::ExtractSliceOp>(
@@ -380,8 +364,7 @@ void LoopPipeliner::emitEpilogue() {
  OpBuilder builder(forOp);
  OpBuilder::InsertionGuard g(builder);
  builder.setInsertionPointAfter(forOp);
-  Operation *asyncWait =
-      builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
+  builder.create<triton::gpu::AsyncWaitOp>(forOp.getLoc(), 0);
 }

 scf::ForOp LoopPipeliner::createNewForOp() {
@@ -575,8 +558,8 @@ scf::ForOp LoopPipeliner::createNewForOp() {
  yieldValues.push_back(loopIterIdx);

  builder.setInsertionPointToEnd(newForOp.getBody());
-  auto test = builder.create<scf::YieldOp>(
-      forOp.getBody()->getTerminator()->getLoc(), yieldValues);
+  builder.create<scf::YieldOp>(forOp.getBody()->getTerminator()->getLoc(),
+                               yieldValues);
  return newForOp;
 }