[TritonIR] simplify Load/StoreOps when mask is true/false (#79)
* [TritonIR] fix Load/Store/CopyAsyncOp's parsers * [TritonIR] simplify Load/StoreOps when mask is true/false * [TEST] adds tests to check load/store simplification
This commit is contained in:
@@ -36,6 +36,68 @@ static Type getPointerTypeFromTensor(Type type) {
|
||||
return Type();
|
||||
}
|
||||
|
||||
// Parser & printer for assembly forms
|
||||
ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) {
|
||||
SmallVector<OpAsmParser::OperandType, 4> allOperands;
|
||||
Type resultTypes[1];
|
||||
SMLoc allOperandLoc = parser.getCurrentLocation();
|
||||
if (parser.parseOperandList(allOperands) ||
|
||||
parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
|
||||
parser.parseCustomTypeWithFallback(resultTypes[0]))
|
||||
return failure();
|
||||
|
||||
result.addTypes(resultTypes);
|
||||
|
||||
SmallVector<Type> operandTypes;
|
||||
operandTypes.push_back(getPointerTypeFromTensor(resultTypes[0])); // ptr
|
||||
if (allOperands.size() >= 2)
|
||||
operandTypes.push_back(getI1SameShape(resultTypes[0])); // mask
|
||||
if (allOperands.size() >= 3)
|
||||
operandTypes.push_back(resultTypes[0]); // other
|
||||
|
||||
if (parser.resolveOperands(allOperands, operandTypes, allOperandLoc,
|
||||
result.operands))
|
||||
return failure();
|
||||
return success();
|
||||
}
|
||||
|
||||
void printLoadOp(OpAsmPrinter &printer, LoadOp loadOp) {
|
||||
printer << " ";
|
||||
printer << loadOp.getOperation()->getOperands();
|
||||
printer.printOptionalAttrDict(loadOp->getAttrs(), /*elidedAttrs=*/{});
|
||||
printer << " : ";
|
||||
printer.printStrippedAttrOrType(loadOp.result().getType());
|
||||
}
|
||||
|
||||
ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) {
|
||||
SmallVector<OpAsmParser::OperandType, 4> allOperands;
|
||||
Type valueType;
|
||||
SMLoc allOperandLoc = parser.getCurrentLocation();
|
||||
if (parser.parseOperandList(allOperands) ||
|
||||
parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
|
||||
parser.parseCustomTypeWithFallback(valueType))
|
||||
return failure();
|
||||
|
||||
SmallVector<Type> operandTypes;
|
||||
operandTypes.push_back(getPointerTypeFromTensor(valueType)); // ptr
|
||||
operandTypes.push_back(valueType); // value
|
||||
if (allOperands.size() >= 3)
|
||||
operandTypes.push_back(getI1SameShape(valueType)); // mask
|
||||
|
||||
if (parser.resolveOperands(allOperands, operandTypes, allOperandLoc,
|
||||
result.operands))
|
||||
return failure();
|
||||
return success();
|
||||
}
|
||||
|
||||
void printStoreOp(OpAsmPrinter &printer, StoreOp storeOp) {
|
||||
printer << " ";
|
||||
printer << storeOp.getOperation()->getOperands();
|
||||
printer.printOptionalAttrDict(storeOp->getAttrs(), /*elidedAttrs=*/{});
|
||||
printer << " : ";
|
||||
printer.printStrippedAttrOrType(storeOp.value().getType());
|
||||
}
|
||||
|
||||
} // namespace triton
|
||||
} // namespace mlir
|
||||
|
||||
|
@@ -92,6 +92,91 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// load(ptr, splat(1), ...) -> load(ptr, ...)
|
||||
// load(ptr, splat(0), other, ...) -> other
|
||||
struct CanonicalizeMaskedLoadPattern
|
||||
: public mlir::OpRewritePattern<triton::LoadOp> {
|
||||
CanonicalizeMaskedLoadPattern(mlir::MLIRContext *context)
|
||||
: OpRewritePattern<triton::LoadOp>(context, 1) {}
|
||||
|
||||
mlir::LogicalResult
|
||||
matchAndRewrite(triton::LoadOp loadOp,
|
||||
mlir::PatternRewriter &rewriter) const override {
|
||||
auto mask = loadOp.mask();
|
||||
if (!mask)
|
||||
return mlir::failure();
|
||||
|
||||
auto constantMask = llvm::dyn_cast<arith::ConstantOp>(mask.getDefiningOp());
|
||||
if (!constantMask)
|
||||
return mlir::failure();
|
||||
|
||||
auto splatMask = constantMask.getValue().dyn_cast<SplatElementsAttr>();
|
||||
if (!splatMask)
|
||||
return mlir::failure();
|
||||
|
||||
if (splatMask.getSplatValue<IntegerAttr>().getValue() == true) {
|
||||
// mask = splat(1)
|
||||
rewriter.replaceOpWithNewOp<triton::LoadOp>(
|
||||
loadOp, loadOp.getType(), loadOp.ptr(), Value(), Value(),
|
||||
loadOp.cache(), loadOp.evict(), loadOp.isVolatile());
|
||||
} else {
|
||||
// mask = splat(0)
|
||||
|
||||
// If there's no "other", the value is "undef". Perhaps we want to
|
||||
// optimize it in the future.x
|
||||
auto otherVal = loadOp.other();
|
||||
if (!otherVal)
|
||||
return mlir::failure();
|
||||
rewriter.replaceOp(loadOp, otherVal);
|
||||
}
|
||||
return mlir::success();
|
||||
}
|
||||
};
|
||||
|
||||
void triton::LoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
||||
MLIRContext *context) {
|
||||
results.add<CanonicalizeMaskedLoadPattern>(context);
|
||||
}
|
||||
|
||||
// store(ptr, value, splat(1), ...) -> store(ptr, value, ...)
|
||||
// store(ptr, value, splat(0), ...) -> [none]
|
||||
struct CanonicalizeMaskedStorePattern
|
||||
: public mlir::OpRewritePattern<triton::StoreOp> {
|
||||
CanonicalizeMaskedStorePattern(mlir::MLIRContext *context)
|
||||
: OpRewritePattern<triton::StoreOp>(context, 1) {}
|
||||
|
||||
mlir::LogicalResult
|
||||
matchAndRewrite(triton::StoreOp storeOp,
|
||||
mlir::PatternRewriter &rewriter) const override {
|
||||
auto mask = storeOp.mask();
|
||||
if (!mask)
|
||||
return mlir::failure();
|
||||
|
||||
auto constantMask = llvm::dyn_cast<arith::ConstantOp>(mask.getDefiningOp());
|
||||
if (!constantMask)
|
||||
return mlir::failure();
|
||||
|
||||
auto splatMask = constantMask.getValue().dyn_cast<SplatElementsAttr>();
|
||||
if (!splatMask)
|
||||
return mlir::failure();
|
||||
|
||||
if (splatMask.getSplatValue<IntegerAttr>().getValue() == true) {
|
||||
// mask = splat(1)
|
||||
rewriter.replaceOpWithNewOp<triton::StoreOp>(storeOp, storeOp.ptr(),
|
||||
storeOp.value());
|
||||
} else {
|
||||
// mask = splat(0)
|
||||
rewriter.eraseOp(storeOp);
|
||||
}
|
||||
return mlir::success();
|
||||
}
|
||||
};
|
||||
|
||||
void triton::StoreOp::getCanonicalizationPatterns(RewritePatternSet &results,
|
||||
MLIRContext *context) {
|
||||
results.add<CanonicalizeMaskedStorePattern>(context);
|
||||
}
|
||||
|
||||
#define GEN_PASS_CLASSES
|
||||
#include "triton/Dialect/Triton/Transforms/Passes.h.inc"
|
||||
|
||||
|
@@ -10,6 +10,37 @@
|
||||
using namespace mlir;
|
||||
using namespace mlir::triton::gpu;
|
||||
|
||||
// Utility
|
||||
namespace mlir {
|
||||
namespace triton {
|
||||
|
||||
// Type inference
|
||||
static Type getI1SameShape(Type type) {
|
||||
auto i1Type = IntegerType::get(type.getContext(), 1);
|
||||
if (auto tensorType = type.dyn_cast<RankedTensorType>())
|
||||
return RankedTensorType::get(tensorType.getShape(), i1Type,
|
||||
tensorType.getEncoding());
|
||||
return Type();
|
||||
}
|
||||
|
||||
static Type getPointeeType(Type type) {
|
||||
if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
|
||||
// Tensor of pointers
|
||||
auto shape = tensorType.getShape();
|
||||
auto ptrType = tensorType.getElementType().dyn_cast<PointerType>();
|
||||
Type pointeeType = ptrType.getPointeeType();
|
||||
return RankedTensorType::get(shape, pointeeType, tensorType.getEncoding());
|
||||
} else if (auto ptrType = type.dyn_cast<PointerType>()) {
|
||||
// scalar pointer
|
||||
Type pointeeType = ptrType.getPointeeType();
|
||||
return pointeeType;
|
||||
}
|
||||
return Type();
|
||||
}
|
||||
|
||||
} // namespace triton
|
||||
} // namespace mlir
|
||||
|
||||
static LogicalResult parseIntAttrValue(AsmParser &parser, const Attribute &attr,
|
||||
unsigned &value, StringRef desc) {
|
||||
auto intAttr = attr.dyn_cast<IntegerAttr>();
|
||||
@@ -260,6 +291,44 @@ void SharedEncodingAttr::print(AsmPrinter &printer) const {
|
||||
<< "}>";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CopyAsyncOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
ParseResult parseCopyAsyncOp(OpAsmParser &parser, OperationState &result) {
|
||||
SmallVector<OpAsmParser::OperandType, 4> allOperands;
|
||||
Type resultTypes[1], ptrType;
|
||||
SMLoc allOperandLoc = parser.getCurrentLocation();
|
||||
if (parser.parseOperandList(allOperands) ||
|
||||
parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
|
||||
parser.parseCustomTypeWithFallback(ptrType) || parser.parseArrow() ||
|
||||
parser.parseCustomTypeWithFallback(resultTypes[0]))
|
||||
return failure();
|
||||
result.addTypes(resultTypes);
|
||||
|
||||
SmallVector<Type> operandTypes;
|
||||
operandTypes.push_back(ptrType); // ptr
|
||||
if (allOperands.size() >= 2)
|
||||
operandTypes.push_back(triton::getI1SameShape(ptrType)); // mask
|
||||
if (allOperands.size() >= 3)
|
||||
operandTypes.push_back(triton::getPointeeType(ptrType)); // other
|
||||
|
||||
if (parser.resolveOperands(allOperands, operandTypes, allOperandLoc,
|
||||
result.operands))
|
||||
return failure();
|
||||
return success();
|
||||
}
|
||||
|
||||
void printCopyAsyncOp(OpAsmPrinter &printer, CopyAsyncOp copyAsyncOp) {
|
||||
printer << " ";
|
||||
printer << copyAsyncOp.getOperation()->getOperands();
|
||||
printer.printOptionalAttrDict(copyAsyncOp->getAttrs(), /*elidedAttrs=*/{});
|
||||
printer << " : ";
|
||||
printer.printStrippedAttrOrType(copyAsyncOp.ptr().getType());
|
||||
printer << " -> ";
|
||||
printer.printStrippedAttrOrType(copyAsyncOp.result().getType());
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ASM Interface (i.e.: alias)
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -282,8 +351,7 @@ public:
|
||||
os << "slice";
|
||||
return AliasResult::FinalAlias;
|
||||
} */
|
||||
OpAsmDialectInterface::getAlias(attr, os);
|
||||
return AliasResult::FinalAlias;
|
||||
return OpAsmDialectInterface::getAlias(attr, os);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -299,36 +367,6 @@ void TritonGPUDialect::initialize() {
|
||||
addInterfaces<TritonGPUOpAsmInterface>();
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
namespace triton {
|
||||
|
||||
// Type inference
|
||||
static Type getI1SameShape(Type type) {
|
||||
auto i1Type = IntegerType::get(type.getContext(), 1);
|
||||
if (auto tensorType = type.dyn_cast<RankedTensorType>())
|
||||
return RankedTensorType::get(tensorType.getShape(), i1Type,
|
||||
tensorType.getEncoding());
|
||||
return Type();
|
||||
}
|
||||
|
||||
static Type getPointeeType(Type type) {
|
||||
if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
|
||||
// Tensor of pointers
|
||||
auto shape = tensorType.getShape();
|
||||
auto ptrType = tensorType.getElementType().dyn_cast<PointerType>();
|
||||
Type pointeeType = ptrType.getPointeeType();
|
||||
return RankedTensorType::get(shape, pointeeType, tensorType.getEncoding());
|
||||
} else if (auto ptrType = type.dyn_cast<PointerType>()) {
|
||||
// scalar pointer
|
||||
Type pointeeType = ptrType.getPointeeType();
|
||||
return pointeeType;
|
||||
}
|
||||
return Type();
|
||||
}
|
||||
|
||||
} // namespace triton
|
||||
} // namespace mlir
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Verification
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -352,4 +390,4 @@ LogicalResult TritonGPUDialect::verifyOperationAttribute(Operation *op,
|
||||
NamedAttribute attr) {
|
||||
// TODO: fill this.
|
||||
return success();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user