From d3c925db8a81ca74f14680876b9311e7d079c5a1 Mon Sep 17 00:00:00 2001 From: Natalia Gimelshein Date: Tue, 4 Oct 2022 12:44:03 -0700 Subject: [PATCH] [FRONTEND] properly broadcast scalar where condition (#736) --- python/test/unit/language/test_core.py | 12 ++++++++++++ python/triton/language/semantic.py | 2 ++ 2 files changed, 14 insertions(+) diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py index 9c1695746..9cdd7885c 100644 --- a/python/test/unit/language/test_core.py +++ b/python/test/unit/language/test_core.py @@ -421,6 +421,15 @@ def test_where_broadcast(): res = tl.where(mask, vals, 0.) tl.store(out_ptr + yoffsets + BLOCK_SIZE * xoffsets, res) + @triton.jit + def where_scalar_condition(a_ptr, out_ptr, BLOCK_SIZE: tl.constexpr): + xoffsets = tl.reshape(tl.arange(0, BLOCK_SIZE), [BLOCK_SIZE, 1]) + yoffsets = tl.reshape(tl.arange(0, BLOCK_SIZE), [1, BLOCK_SIZE]) + mask = 0 + vals = tl.load(a_ptr + yoffsets + BLOCK_SIZE * xoffsets) + res = tl.where(mask, vals, 0.) + tl.store(out_ptr + yoffsets + BLOCK_SIZE * xoffsets, res) + SIZE = 32 dtype = 'float32' rs = RandomState(17) @@ -432,6 +441,9 @@ def test_where_broadcast(): z_tri = to_triton(np.empty((SIZE, SIZE), dtype=z.dtype), device='cuda', dst_type=dtype) where_kernel[(1,)](cond_tri, x_tri, z_tri, SIZE) assert (z == to_numpy(z_tri)).all() + where_scalar_condition[(1,)](x_tri, z_tri, SIZE) + z = np.where(0, x, 0) + assert (z == to_numpy(z_tri)).all() # --------------- # test unary ops diff --git a/python/triton/language/semantic.py b/python/triton/language/semantic.py index ba0e49e64..caf7e1bc1 100644 --- a/python/triton/language/semantic.py +++ b/python/triton/language/semantic.py @@ -978,6 +978,8 @@ def where(condition: tl.tensor, condition, x = broadcast_impl_value(condition, x, builder) x, y = binary_op_type_checking_impl(x, y, builder, True, True) + if not condition.type.is_block(): + condition, _ = broadcast_impl_value(condition, x, builder) ret_ty = x.type return tl.tensor(builder.create_select(condition.handle, x.handle, y.handle), ret_ty)