[BACKEND][CODEGEN] Faster reduction for scanline layout (#516)

This commit is contained in:
Philippe Tillet
2022-05-14 15:26:13 -07:00
committed by GitHub
parent d1a22a94e6
commit d35617bea1
3 changed files with 65 additions and 47 deletions

View File

@@ -676,9 +676,16 @@ def test_reduce1d(dtype_str, shape, device='cuda'):
np.testing.assert_allclose(z_ref, to_numpy(z_tri), rtol=0.01)
@pytest.mark.parametrize("dtype_str, shape, axis", [
(dtype, (1, 1024), 1) for dtype in ['float32', 'uint32']
])
reduce_configs1 = [
(dtype, (1, 1024), axis) for dtype in ['float32', 'uint32']
for axis in [1]
]
reduce_configs2 = [
('float32', shape, 1) for shape in [(2, 32), (4, 128), (32, 64), (64, 128), (128, 256), (32, 1024)]
]
@pytest.mark.parametrize("dtype_str, shape, axis", reduce_configs1 + reduce_configs2)
def test_reduce2d(dtype_str, shape, axis, device='cuda'):
# triton kernel
@triton.jit