[Triton-MLIR][Backend] Remove unnecessary barriers (#901)

Cross operation barriers are taken care of by the Membar pass. 

Explicit barriers are only required if there's any synchronization
necessary within each operation.
This commit is contained in:
Keren Zhou
2022-11-22 10:03:29 -08:00
committed by GitHub
parent 136668bac3
commit 2afebcd79b
3 changed files with 4 additions and 11 deletions

View File

@@ -86,9 +86,10 @@ void MembarAnalysis::transfer(Operation *op, RegionInfo *regionInfo,
curRegionInfo.syncWriteBuffers.insert(bufferId);
}
}
// Scratch buffer is considered as a shared memory read
// Scratch buffer is considered as both shared memory write & read
auto bufferId = allocation->getBufferId(op);
if (bufferId != Allocation::InvalidBufferId) {
curRegionInfo.syncWriteBuffers.insert(bufferId);
curRegionInfo.syncReadBuffers.insert(bufferId);
}

View File

@@ -3077,7 +3077,8 @@ LogicalResult ConvertLayoutOpConversion::lowerDistributedToDistributed(
for (unsigned repId = 0; repId < accumNumReplicates; ++repId) {
auto multiDimRepId =
getMultiDimIndex<unsigned>(repId, numReplicates, outOrd);
barrier();
if (repId != 0)
barrier();
if (srcLayout.isa<BlockedEncodingAttr>() ||
srcLayout.isa<SliceEncodingAttr>() ||
srcLayout.isa<MmaEncodingAttr>()) {
@@ -3169,11 +3170,6 @@ LogicalResult ConvertLayoutOpConversion::lowerBlockedToShared(
auto retVal = getStructFromSharedMemoryObject(loc, smemObj, rewriter);
auto numWordsEachRep = product<unsigned>(wordsInEachRep);
SmallVector<Value> wordVecs(numWordsEachRep);
// TODO: We should get less barriers if it is handled by membar pass
// instead of the backend, since the later can only handle it in
// the most conservative way. However just keep for now and revisit
// in the future in case necessary.
barrier();
for (unsigned i = 0; i < numElems; ++i) {
if (i % srcAccumSizeInThreads == 0) {
// start of a replication