diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py
index 8a695246c..caed06351 100644
--- a/python/test/unit/language/test_core.py
+++ b/python/test/unit/language/test_core.py
@@ -1067,15 +1067,18 @@ def test_permute(dtype_str, shape, perm, device='cuda'):
                          [(epilogue, allow_tf32, dtype)
                           for epilogue in ['none', 'trans', 'add-matrix', 'add-rows', 'add-cols', 'softmax', 'chain-dot']
                           for allow_tf32 in [True, False]
-                          for dtype in ['float16']
+                          for dtype in ['float32', 'float16']
                           if not (allow_tf32 and (dtype in ['float16']))])
 def test_dot(epilogue, allow_tf32, dtype, device='cuda'):
-    cc = _triton.runtime.cc(_triton.runtime.backend.CUDA, torch.cuda.current_device())
-    if cc < 80:
-        if dtype == 'int8':
-            pytest.skip("Only test int8 on devices with sm >= 80")
-        elif dtype == 'float32' and allow_tf32:
-            pytest.skip("Only test tf32 on devices with sm >= 80")
+    if torch.version.hip is not None:
+        pass
+    else:
+        cc = _triton.runtime.cc(_triton.runtime.backend.CUDA, torch.cuda.current_device())
+        if cc < 80:
+            if dtype == 'int8':
+                pytest.skip("Only test int8 on devices with sm >= 80")
+            elif dtype == 'float32' and allow_tf32:
+                pytest.skip("Only test tf32 on devices with sm >= 80")
 
     M, N, K = 128, 128, 64
     num_warps = 8
@@ -1170,15 +1173,18 @@ def test_dot(epilogue, allow_tf32, dtype, device='cuda'):
     # print(z_ref[:,0], z_tri[:,0])
     np.testing.assert_allclose(z_ref, to_numpy(z_tri), rtol=0.01)
     # make sure ld/st are vectorized
-    ptx = pgm.asm['ptx']
-    assert 'ld.global.v4' in ptx
-    assert 'st.global.v4' in ptx
-    if allow_tf32:
-        assert 'mma.sync.aligned.m16n8k8.row.col.f32.tf32.tf32.f32' in ptx
-    elif dtype == 'float32':
-        assert 'mma.sync.aligned.m16n8k8.row.col.f32.tf32.tf32.f32' not in ptx
-    elif dtype == 'int8':
-        assert 'mma.sync.aligned.m16n8k32.row.col.satfinite.s32.s8.s8.s32' in ptx
+    if torch.version.hip is not None:
+        pass
+    else:
+        ptx = pgm.asm['ptx']
+        assert 'ld.global.v4' in ptx
+        assert 'st.global.v4' in ptx
+        if allow_tf32:
+            assert 'mma.sync.aligned.m16n8k8.row.col.f32.tf32.tf32.f32' in ptx
+        elif dtype == 'float32':
+            assert 'mma.sync.aligned.m16n8k8.row.col.f32.tf32.tf32.f32' not in ptx
+        elif dtype == 'int8':
+            assert 'mma.sync.aligned.m16n8k32.row.col.satfinite.s32.s8.s8.s32' in ptx
 
 
 def test_dot_without_load():
diff --git a/scripts/amd/backtrace.sh b/scripts/amd/backtrace.sh
index df8595daf..41d66f985 100644
--- a/scripts/amd/backtrace.sh
+++ b/scripts/amd/backtrace.sh
@@ -7,7 +7,7 @@ sudo apt install gdb -y
 
 gdb -ex "set pagination off" \
     -ex "file python" \
-    -ex 'run -m pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_bin_op[int32-uint32-+]"' \
+    -ex 'run -m pytest --capture=tee-sys --verbose "python/test/unit/language/test_core.py::test_dot"' \
     -ex "backtrace" \
     -ex "set confirm off" \
     -ex "q" \
diff --git a/scripts/amd/test.sh b/scripts/amd/test.sh
index 0d0d660ee..3439a70c1 100755
--- a/scripts/amd/test.sh
+++ b/scripts/amd/test.sh
@@ -26,7 +26,7 @@ rm -rf /tmp/triton
 # python python/test/test_empty.py
 # -ex 'ignore 1 472' \
 
-pytest -rfs --verbose python/test/unit/language/test_core.py 2>&1 | tee /dockerx/triton/test_core.log
+# pytest -rfs --verbose python/test/unit/language/test_core.py 2>&1 | tee /dockerx/triton/test_core.log
 # pytest --verbose python/test/unit/language/test_core.py::test_empty_kernel[float32] 2>&1 | tee /dockerx/triton/test_empty_kernel.log
 # pytest --verbose python/test/unit/language/test_core.py::test_bin_op[int32-uint32-+] 2>&1 | tee /dockerx/triton/test_bin_op.log
 # pytest --verbose python/test/unit/language/test_core.py::test_atomic_rmw 2>&1 | tee /dockerx/triton/test_atomic_rmw.log
@@ -55,7 +55,7 @@ pytest -rfs --verbose python/test/unit/language/test_core.py 2>&1 | tee /dockerx
 # pytest --capture=tee-sys --verbose python/test/unit/language/test_core.py::test_num_programs[float32]
 # pytest --verbose python/test/unit/language/test_core.py::test_unary_op
 # pytest --verbose python/test/unit/language/test_core.py::test_bin_op
-# pytest --verbose "python/test/unit/language/test_core.py::test_dot"
+pytest --verbose "python/test/unit/language/test_core.py::test_dot"
 # pytest --verbose python/test/unit/language/test_core.py::test_cast
 # pytest --verbose python/test/unit/language/test_core.py::test_reduce1d
 # pytest --verbose python/test/unit/language/test_core.py::test_reduce2d