[Analysis/Allocation] Allocation passes now assumes that slices always alias (#108)

This code in this branch assumes the `src` operand in `insert_slice_async` always aliases the result, which shouldn't hold for generally cases but is just a workaround to make the pipeline pass work. I'm also working on the complete analysis in another [branch](https://github.com/openai/triton-mlir/tree/keren/analyze-slice).
2022-09-09 12:03:41 -07:00
parent 9bd5a3dcd2
commit 16aed94ff5
14 changed files with 299 additions and 195 deletions
--- a/lib/Analysis/Alias.cpp
+++ b/lib/Analysis/Alias.cpp
@@ -22,25 +22,24 @@ ChangeResult SharedMemoryAliasAnalysis::visitOperation(
  AliasInfo aliasInfo;
  bool pessimistic = true;
  if (maybeSharedAllocationOp(op)) {
-    // These ops will allocate a new shared memory buffer.
+    // These ops may allocate a new shared memory buffer.
    auto result = op->getResult(0);
    if (isSharedEncoding(result)) {
-      aliasInfo.insert(result);
+      // FIXME(Keren): extract and insert are always alias for now
+      if (auto extractSliceOp = dyn_cast<triton::gpu::ExtractSliceOp>(op)) {
+        // extract_slice %src, %index
+        aliasInfo = AliasInfo(operands[0]->getValue());
+      } else if (auto insertSliceOp =
+                     dyn_cast<triton::gpu::InsertSliceAsyncOp>(op)) {
+        // insert_slice_async %src, %dst, %index
+        aliasInfo = AliasInfo(operands[1]->getValue());
+      } else {
+        aliasInfo.insert(result);
+      }
      pessimistic = false;
-    } else {
-      llvm::errs() << "op: " << op->getName() << "\n";
    }
  }
-  // XXX(Keren): triton ops don't support aliasing yet.
-  // else if (auto viewOp = dyn_cast<triton::ViewOp>(op) ||
-  //                         dyn_cast<triton::ExpandDimsOp>(op)) {
-  //  // These ops will reate a new view of the same shared memory buffer.
-  //  auto result = op->getResult(0);
-  //  if (isSharedEncoding(result)) {
-  //    aliasInfo = AliasInfo(operands[0]->getValue());
-  //    pessimistic = false;
-  //  }
-  //}
+
  if (pessimistic) {
    return markAllPessimisticFixpoint(op->getResults());
  }
--- a/lib/Analysis/Allocation.cpp
+++ b/lib/Analysis/Allocation.cpp
@@ -39,11 +39,13 @@ private:

  /// Initializes explicitly defined shared memory values for a given operation.
  void getExplicitValueSize(Operation *op) {
-    /// Values returned from scf.yield will not be allocated even though they
-    /// have the shared encoding.
-    /// For example: %a = scf.if -> yield
-    /// %a must be allocated elsewhere by other operations.
-    if (!maybeSharedAllocationOp(op)) {
+    // Values returned from scf.yield will not be allocated even though they
+    // have the shared encoding.
+    // For example: %a = scf.if -> yield
+    // %a must be allocated elsewhere by other operations.
+    // FIXME(Keren): extract and insert are always alias for now
+    if (!maybeSharedAllocationOp(op) || isa<triton::gpu::ExtractSliceOp>(op) ||
+        isa<triton::gpu::InsertSliceAsyncOp>(op)) {
      return;
    }

--- a/lib/Analysis/Membar.cpp
+++ b/lib/Analysis/Membar.cpp
@@ -45,19 +45,25 @@ void MembarAnalysis::transfer(Operation *op, RegionInfo *regionInfo,
  if (op->getNumResults() < 1)
    return;

-  if (dyn_cast<scf::ForOp>(op) || dyn_cast<scf::IfOp>(op) ||
-      dyn_cast<scf::YieldOp>(op)) {
-    // Do not insert barriers before control flow operations.
+  if (isa<scf::ForOp>(op) || isa<scf::IfOp>(op) || isa<scf::YieldOp>(op) ||
+      isa<triton::gpu::ExtractSliceOp>(op) ||
+      isa<triton::gpu::InsertSliceAsyncOp>(op) ||
+      isa<triton::gpu::AllocTensorOp>(op)) {
+    // Do not insert barriers before control flow operations and
+    // alloc/extract/insert
+    // alloc is an allocation op without memory write.
+    // In contrast, arith.constant is an allocation op with memory write.
+    // FIXME(Keren): extract and insert are always alias for now
    return;
  }

-  if (dyn_cast<gpu::BarrierOp>(op)) {
+  if (isa<gpu::BarrierOp>(op)) {
    // If the current op is a barrier, we sync previous reads and writes
    regionInfo->sync();
    return;
  }

-  if (dyn_cast<triton::gpu::AsyncWaitOp>(op)) {
+  if (isa<triton::gpu::AsyncWaitOp>(op)) {
    // If the current op is an async wait, we insert a barrier op and sync
    // previous reads and writes.
    OpBuilder::InsertionGuard g(*builder);