From 2b0f877fad1db77ed1e5d0a20f236efa7cc63b14 Mon Sep 17 00:00:00 2001 From: fdrocha <99990201+fdrocha@users.noreply.github.com> Date: Mon, 3 Oct 2022 19:36:24 +0100 Subject: [PATCH] [RUNTIME] Support environments with multiple cudalibs (#733) --- python/test/unit/language/test_core.py | 2 +- python/triton/compiler.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/test/unit/language/test_core.py b/python/test/unit/language/test_core.py index a4e74c270..9c1695746 100644 --- a/python/test/unit/language/test_core.py +++ b/python/test/unit/language/test_core.py @@ -666,7 +666,7 @@ def test_atomic_cas(): Lock = torch.zeros((1,), device='cuda', dtype=torch.int32) change_value[(1,)](Lock) - assert(Lock[0] == 1) + assert (Lock[0] == 1) # 2. only one block enters the critical section @triton.jit diff --git a/python/triton/compiler.py b/python/triton/compiler.py index 43e69035f..d14c76981 100644 --- a/python/triton/compiler.py +++ b/python/triton/compiler.py @@ -1102,9 +1102,9 @@ class CacheManager: @functools.lru_cache() -def libcuda_dir(): - loc = subprocess.check_output(["whereis", "libcuda.so"]).decode().strip().split()[-1] - return os.path.dirname(loc) +def libcuda_dirs(): + locs = subprocess.check_output(["whereis", "libcuda.so"]).decode().strip().split()[1:] + return [os.path.dirname(loc) for loc in locs] @contextlib.contextmanager @@ -1118,7 +1118,7 @@ def quiet(): def _build(name, src, srcdir): - cuda_lib_dir = libcuda_dir() + cuda_lib_dirs = libcuda_dirs() cu_include_dir = "/usr/local/cuda/include" suffix = sysconfig.get_config_var('EXT_SUFFIX') so = os.path.join(srcdir, '{name}{suffix}'.format(name=name, suffix=suffix)) @@ -1130,12 +1130,14 @@ def _build(name, src, srcdir): gcc = shutil.which("gcc") cc = gcc if gcc is not None else clang py_include_dir = get_paths()["include"] - ret = subprocess.check_call([cc, src, "-O3", f"-I{cu_include_dir}", f"-I{py_include_dir}", f"-I{srcdir}", "-shared", "-fPIC", f"-L{cuda_lib_dir}", "-lcuda", "-o", so]) + cc_cmd = [cc, src, "-O3", f"-I{cu_include_dir}", f"-I{py_include_dir}", f"-I{srcdir}", "-shared", "-fPIC", "-lcuda", "-o", so] + cc_cmd += [f"-L{dir}" for dir in cuda_lib_dirs] + ret = subprocess.check_call(cc_cmd) if ret == 0: return so # fallback on setuptools extra_compile_args = [] - library_dirs = [cuda_lib_dir] + library_dirs = cuda_lib_dirs include_dirs = [srcdir, cu_include_dir] libraries = ['cuda'] # extra arguments