[GENERAL] Some minor improvements here and there to build systems and docs (#148)

2021-07-28 01:51:17 -07:00
parent 57c1fd3366
commit acd5e44611
7 changed files with 60 additions and 49 deletions
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -1,4 +1,4 @@
-name: Build Website
+name: Documentation
 on:
  workflow_dispatch:
  schedule:    
@@ -6,7 +6,7 @@ on:
 jobs:
-  Build-Website:
+  Build-Documentation:
    runs-on: self-hosted
--- a/.github/workflows/build-wheels.yml
+++ b/.github/workflows/build-wheels.yml
@@ -1,4 +1,4 @@
-name: Build Wheels
+name: Wheels
 on:
  workflow_dispatch:
  schedule:    
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,21 +25,28 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS  -std=gnu++17")
 # if(APPLE)
 #     set(CMAKE_OSX_SYSROOT "/")
 #     set(CMAKE_OSX_DEPLOYMENT_TARGET "")
 # endif()
 ##########
 # LLVM
 ##########
-find_package(LLVM 11 REQUIRED COMPONENTS "nvptx")
+if("${LLVM_LIBRARY_DIR}" STREQUAL "")
-message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+    find_package(LLVM 11 REQUIRED COMPONENTS "nvptx")
-include_directories("${LLVM_INCLUDE_DIRS}")
+    message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
-if(APPLE)
+    if(APPLE)
-  set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
+      set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
    endif()
 # sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
 else()
    set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
    set(LLVM_LIBRARIES libLLVMNVPTXCodeGen.a libLLVMSelectionDAG.a libLLVMipo.a libLLVMInstrumentation.a
                       libLLVMVectorize.a libLLVMLinker.a libLLVMIRReader.a libLLVMAsmParser.a libLLVMFrontendOpenMP.a
                       libLLVMAsmPrinter.a libLLVMDebugInfoDWARF.a libLLVMCodeGen.a libLLVMTarget.a libLLVMScalarOpts.a
                       libLLVMInstCombine.a libLLVMAggressiveInstCombine.a libLLVMTransformUtils.a libLLVMBitWriter.a
                       libLLVMAnalysis.a libLLVMProfileData.a libLLVMObject.a libLLVMTextAPI.a libLLVMMCParser.a
                       libLLVMBitReader.a libLLVMCore.a libLLVMRemarks.a libLLVMBitstreamReader.a libLLVMNVPTXDesc.a
                       libLLVMMC.a libLLVMDebugInfoCodeView.a libLLVMDebugInfoMSF.a libLLVMBinaryFormat.a libLLVMNVPTXInfo.a
                       libLLVMSupport.a libLLVMDemangle.a)
 endif()
 include_directories("${LLVM_INCLUDE_DIRS}")
 # Python module
 if(BUILD_PYTHON_MODULE)
--- a/README.md
+++ b/README.md
@@ -1,12 +1,20 @@
-<img src="https://cdn.openai.com/triton/assets/triton-logo.png" alt="Triton logo" width="80" height="91">
+<div align="center">
  <img src="https://cdn.openai.com/triton/assets/triton-logo.png" alt="Triton logo" width="88" height="100">
 </div>
 [![Wheels](https://github.com/openai/triton/actions/workflows/wheels.yml/badge.svg?branch=master)](https://github.com/openai/triton/actions/workflows/wheels.yml)
 **`Documentation`** |
 ------------------- |
 [![Documentation](https://github.com/openai/triton/actions/workflows/documentation.yml/badge.svg)](https://triton-lang.org/)
 # Triton
 This is the development repository of Triton, a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs.
-[![Build Status](https://dev.azure.com/triton-lang/Triton/_apis/build/status/ptillet.triton?branchName=master)](https://dev.azure.com/triton-lang/Triton/_build/latest?definitionId=10&branchName=master)
+The foundations of this project are described in the following MAPL2019 publication: [Triton: An Intermediate Language and Compiler for Tiled Neural Network Computations](http://www.eecs.harvard.edu/~htk/publication/2019-mapl-tillet-kung-cox.pdf). Please consider citing this work if you use Triton!
 The foundations of this project are described in the following MAPL2019 publication: [Triton: An Intermediate Language and Compiler for Tiled Neural Network Computations](http://www.eecs.harvard.edu/~htk/publication/2019-mapl-tillet-kung-cox.pdf). Please consider citing us if you use our work!
 The [official documentation](https://triton-lang.org) contains installation instructions and tutorials.
@@ -18,3 +26,7 @@ Supported Platforms:
 Supported Hardware:
  * NVIDIA GPUs (Compute Capability 7.0+)
  * Under development: AMD GPUs, CPUs
 # Disclaimer
 Triton is a fairly recent project, and it is under active development. We expect it to be pretty useful in a wide variety of cases, but don't be surprised if it's a bit rough around the edges :)
--- a/docs/getting-started/installation.rst
+++ b/docs/getting-started/installation.rst
@@ -6,7 +6,13 @@ Installation
 Binary Distributions
 ---------------------
-You can install the latest nightly release of Triton from pip:
+You can install the latest stable release of Triton from pip:
      pip install triton
 Binary wheels are available for CPython 3.6-3.9 and PyPy 3.6-3.7.
 And the latest nightly release:
 .. code-block:: bash
@@ -27,9 +33,10 @@ You can install the Python package from source by running the following commands
      git clone https://github.com/ptillet/triton.git;
      cd triton/python;
      pip install cmake; # build time dependency
      pip install -e .
-This may take a while (10-20 minutes) as it will download and compile LLVM from source.
+Note that, if llvm-11 is not present on your system, the setup.py script will download LLVM static libraries on the web and link against that.
 You can then test your installation by running the unit tests:
@@ -43,19 +50,3 @@ and the benchmarks
      cd bench/
      python -m run --with-plots --result-dir /tmp/triton-bench
 +++++++++++++++
 C++ Package
 +++++++++++++++
 Those not interested in Python integration may want to use the internals of Triton (i.e, runtime, parser, codegen, driver, intermediate representation) directly. This can be done by running the following commands:
 .. code-block:: bash
      git clone https://github.com/ptillet/triton.git;
      mkdir build;
      cd build;
      cmake ../;
      make -j8;
 Note that while direct usage of the C++ API is not officially supported, a usage tutorial can be found  `here <https://github.com/ptillet/triton/blob/master/tutorials/01-matmul.cc>`_
--- a/python/setup.py
+++ b/python/setup.py
@@ -23,12 +23,13 @@ def get_llvm():
    paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
    paths = [p for p in paths if p is not None]
    if paths:
-      return paths[0]
+      return '', ''
    # download if nothing is installed
    name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
    dir = '/tmp'
-    llvm_config = '{dir}/{name}/bin/llvm-config'.format(dir=dir, name=name)
+    llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
-    if not os.path.exists(llvm_config):
+    llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
    if not os.path.exists(llvm_library_dir):
        try:
            shutil.rmtree(os.path.join(dir, name))
        except:
@@ -38,7 +39,7 @@ def get_llvm():
        ftpstream = urllib.request.urlopen(url)
        file = tarfile.open(fileobj=ftpstream, mode="r|xz")
        file.extractall(path=dir)
-    return llvm_config
+    return llvm_include_dir, llvm_library_dir
 class CMakeExtension(Extension):
@@ -76,7 +77,7 @@ class CMakeBuild(build_ext):
            self.build_extension(ext)
    def build_extension(self, ext):
-        llvm_config = get_llvm()
+        llvm_include_dir, llvm_library_dir = get_llvm()
        # self.debug = True
        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
        # create build directories
@@ -88,12 +89,12 @@ class CMakeBuild(build_ext):
            os.makedirs(llvm_build_dir)
        # python directories
        python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
        python_lib_dirs = distutils.sysconfig.get_config_var("LIBDIR")
        cmake_args = [
            "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
            "-DBUILD_TUTORIALS=OFF",
            "-DBUILD_PYTHON_MODULE=ON",
-            "-DLLVM_CONFIG=" + llvm_config,
+            "-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
            "-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
            #'-DPYTHON_EXECUTABLE=' + sys.executable,
            #'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
            "-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
@@ -126,7 +127,7 @@ setup(
    description="A language and compiler for custom Deep Learning operations",
    long_description="",
    packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
-    install_requires=["numpy", "torch"],
+    install_requires=["torch"],
    package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
    include_package_data=True,
    ext_modules=[CMakeExtension("triton", "triton/_C/")],
--- a/python/tutorials/01-vector-add.py
+++ b/python/tutorials/01-vector-add.py
@@ -41,8 +41,8 @@ def _add(
 # %%
-# Let's also declare a helper function that to (1) allocate the output vector
+# Let's also declare a helper function to (1) allocate the `z` tensor
-# and (2) enqueueing the above kernel.
+# and (2) enqueue the above kernel with appropriate grid/block sizes.
 def add(x, y):
@@ -80,7 +80,7 @@ print(f'The maximum difference between torch and triton is ' f'{torch.max(torch.
 # %%
 # Benchmark
 # -----------
-# We can now benchmark our custom op for vectors of increasing sizes to get a sense of how it does relative to PyTorch.
+# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
 # To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
 # for different problem sizes.
@@ -111,6 +111,6 @@ def benchmark(size, provider):
 # %%
-# We can now run the decorated function above. Pass `show_plots=True` to see the plots and/or
+# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
 # `save_path='/path/to/results/' to save them to disk along with raw CSV data
 benchmark.run(print_data=True, show_plots=True)