[GENERAL] Some minor improvements here and there to build systems and docs (#148)

2021-07-28 01:51:17 -07:00
parent 57c1fd3366
commit acd5e44611
7 changed files with 60 additions and 49 deletions
--- a/python/setup.py
+++ b/python/setup.py
@@ -23,12 +23,13 @@ def get_llvm():
    paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
    paths = [p for p in paths if p is not None]
    if paths:
-      return paths[0]
+      return '', ''
    # download if nothing is installed
    name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
    dir = '/tmp'
-    llvm_config = '{dir}/{name}/bin/llvm-config'.format(dir=dir, name=name)
-    if not os.path.exists(llvm_config):
+    llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
+    llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
+    if not os.path.exists(llvm_library_dir):
        try:
            shutil.rmtree(os.path.join(dir, name))
        except:
@@ -38,7 +39,7 @@ def get_llvm():
        ftpstream = urllib.request.urlopen(url)
        file = tarfile.open(fileobj=ftpstream, mode="r|xz")
        file.extractall(path=dir)
-    return llvm_config
+    return llvm_include_dir, llvm_library_dir


 class CMakeExtension(Extension):
@@ -76,7 +77,7 @@ class CMakeBuild(build_ext):
            self.build_extension(ext)

    def build_extension(self, ext):
-        llvm_config = get_llvm()
+        llvm_include_dir, llvm_library_dir = get_llvm()
        # self.debug = True
        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
        # create build directories
@@ -88,12 +89,12 @@ class CMakeBuild(build_ext):
            os.makedirs(llvm_build_dir)
        # python directories
        python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
-        python_lib_dirs = distutils.sysconfig.get_config_var("LIBDIR")
        cmake_args = [
            "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
            "-DBUILD_TUTORIALS=OFF",
            "-DBUILD_PYTHON_MODULE=ON",
-            "-DLLVM_CONFIG=" + llvm_config,
+            "-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
+            "-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
            #'-DPYTHON_EXECUTABLE=' + sys.executable,
            #'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
            "-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
@@ -126,7 +127,7 @@ setup(
    description="A language and compiler for custom Deep Learning operations",
    long_description="",
    packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
-    install_requires=["numpy", "torch"],
+    install_requires=["torch"],
    package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
    include_package_data=True,
    ext_modules=[CMakeExtension("triton", "triton/_C/")],
--- a/python/tutorials/01-vector-add.py
+++ b/python/tutorials/01-vector-add.py
@@ -41,8 +41,8 @@ def _add(


 # %%
-# Let's also declare a helper function that to (1) allocate the output vector
-# and (2) enqueueing the above kernel.
+# Let's also declare a helper function to (1) allocate the `z` tensor
+# and (2) enqueue the above kernel with appropriate grid/block sizes.


 def add(x, y):
@@ -80,7 +80,7 @@ print(f'The maximum difference between torch and triton is ' f'{torch.max(torch.
 # %%
 # Benchmark
 # -----------
-# We can now benchmark our custom op for vectors of increasing sizes to get a sense of how it does relative to PyTorch.
+# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
 # To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
 # for different problem sizes.

@@ -111,6 +111,6 @@ def benchmark(size, provider):


 # %%
-# We can now run the decorated function above. Pass `show_plots=True` to see the plots and/or
+# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
 # `save_path='/path/to/results/' to save them to disk along with raw CSV data
 benchmark.run(print_data=True, show_plots=True)