Keren/tensor slice insert alloc (#94)
This branch defines three new triton_gpu operations to partially solve #87. Below is an overview: ``` %tensor = triton_gpu.alloc_tensor : tensor<2x16x16xf16, #A> %b = triton_gpu.insert_slice_async %a_ptr, %tensor, %offset {axis = 0 : i32, cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<16x16x!tt.ptr<f16>, #AL> -> tensor<2x16x16xf16, #A> %c = triton_gpu.extract_slice %b, %offset {axis = 0 : i32} : tensor<2x16x16xf16, #A> -> tensor<16x16xf16, #A> ``` We plan to fully replace `copy_async` with `insert_slice_async`. **This hasn't been done yet.**
This commit is contained in:
@@ -7,7 +7,6 @@ namespace mlir {
|
||||
|
||||
void MembarAnalysis::run() {
|
||||
auto *operation = allocation->getOperation();
|
||||
operation->getContext()->getOrLoadDialect<mlir::gpu::GPUDialect>();
|
||||
RegionInfo regionInfo;
|
||||
OpBuilder builder(operation);
|
||||
dfsOperation(operation, ®ionInfo, &builder);
|
||||
|
Reference in New Issue
Block a user