[BACKEND] Added support for scalars in LoadOp / StoreOp / ElementwiseOp (#814)

Also fixed various errors that showed up in `test_core.py`, and added more TODOs for open (hopefully relatively minor) issues
This commit is contained in:
Philippe Tillet
2022-10-28 01:17:55 -07:00
committed by GitHub
parent 3685194456
commit ac0f6793cc
6 changed files with 269 additions and 419 deletions

View File

@@ -153,7 +153,7 @@ module attributes {"triton_gpu.num-warps" = 2 : i32} {
%13 = tt.addptr %12, %4 : tensor<256x!tt.ptr<f32>, #blocked0>
// Store 4 elements to global with single one vectorized store instruction
// CHECK: @$5 st.global.b32.v4 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
// CHECK: @$5 st.global.v4.b32 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
tt.store %13, %11 : tensor<256xf32, #blocked0>
return
}
@@ -222,8 +222,8 @@ module attributes {"triton_gpu.num-warps" = 1 : i32} {
%13 = tt.addptr %12, %4 : tensor<256x!tt.ptr<f32>, #blocked0>
// Store 8 elements to global with two vectorized store instruction
// CHECK: @$5 st.global.b32.v4 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
// CHECK: @$5 st.global.b32.v4 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
// CHECK: @$5 st.global.v4.b32 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
// CHECK: @$5 st.global.v4.b32 [ ${{.*}} + 0 ], { ${{.*}}, ${{.*}}, ${{.*}}, ${{.*}} };
tt.store %13, %11 : tensor<256xf32, #blocked0>
return
}