Improve ROCm support. (#780)
- updates to support ROCm 5.2 - workarounds in tests where NV tools were used unconditionally - implemented `get_num_blocks()` and `add_memfence()` for AMD GPU - backported from history some atomics - added bf16 support - minor warnings cleanup - added dockerfile to run on a ROCm enabled machine Co-authored-by: B1tway <andrew.shukshov@gmail.com> Co-authored-by: Andrey Shukshov <36711069+B1tway@users.noreply.github.com>
This commit is contained in:
@@ -369,9 +369,6 @@ def test_atomic_rmw(op, dtype_x, mode, device='cuda'):
|
||||
('float32', 'int32', True)
|
||||
])
|
||||
def test_cast(dtype_x, dtype_z, bitcast, device='cuda'):
|
||||
if torch.version.hip is not None:
|
||||
assert 'bfloat' not in dtype_x
|
||||
assert 'bfloat' not in dtype_z
|
||||
|
||||
SIZE = 1024
|
||||
x = triton.testing.random((SIZE, ), dtype=cvt[dtype_x], device=device)
|
||||
|
Reference in New Issue
Block a user