diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 74ae590c4..544bce95b 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -24,6 +24,11 @@ jobs: cd python pip3 install -e . + - name: Unit tests + run: | + cd python/test/unit + pytest -vs . + - name: Regression tests run: | cd python/test/regression @@ -33,7 +38,3 @@ jobs: sudo nvidia-smi -i 0 -rgc sudo nvidia-smi -i 0 -rmc - - name: Unit tests - run: | - cd python/test/unit - pytest -vs . \ No newline at end of file diff --git a/python/test/regression/test_performance.py b/python/test/regression/test_performance.py index a2c9f08ca..e205828d6 100644 --- a/python/test/regression/test_performance.py +++ b/python/test/regression/test_performance.py @@ -52,7 +52,7 @@ def test_matmul(M, N, K): cur_sm_clock = nvsmi(['clocks.current.sm'])[0] ref_sm_clock = 1350 max_gpu_perf = 1e-6*80*8*128*cur_sm_clock - assert abs(cur_sm_clock - ref_sm_clock) < 5, f'GPU SMs must run at {ref_sm_clock} MHz' + assert abs(cur_sm_clock - ref_sm_clock) < 10, f'GPU SMs must run at {ref_sm_clock} MHz' a = torch.randn((M, K), dtype=torch.float16, device='cuda') b = torch.randn((K, N), dtype=torch.float16, device='cuda') fn = lambda: triton.ops.matmul(a, b) @@ -95,7 +95,7 @@ def test_elementwise(N): cur_mem_clock = nvsmi(['clocks.current.memory'])[0] ref_mem_clock = 877 max_gpu_perf = 512*2*ref_mem_clock*1e-3 - assert abs(cur_mem_clock - ref_mem_clock) < 5, f'GPU memmory must run at {ref_mem_clock} MHz' + assert abs(cur_mem_clock - ref_mem_clock) < 10, f'GPU memmory must run at {ref_mem_clock} MHz' z = torch.empty((N, ), dtype=torch.float16, device='cuda') x = torch.randn_like(z) y = torch.randn_like(z) diff --git a/python/triton/code_gen.py b/python/triton/code_gen.py index 10561898f..2eec6248d 100644 --- a/python/triton/code_gen.py +++ b/python/triton/code_gen.py @@ -709,7 +709,7 @@ class JITFunction: return # create cache directory if not os.path.exists(cache_dir): - os.makedirs(cache_dir) + os.makedirs(cache_dir, exist_ok=True) # create md5 hash of src md5 = hashlib.md5() md5.update(self.src.encode('utf-8'))