[GENERAL] Some minor improvements here and there to build systems and docs (#148)
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
name: Build Website
|
name: Documentation
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
@@ -6,7 +6,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
||||||
Build-Website:
|
Build-Documentation:
|
||||||
|
|
||||||
runs-on: self-hosted
|
runs-on: self-hosted
|
||||||
|
|
@@ -1,4 +1,4 @@
|
|||||||
name: Build Wheels
|
name: Wheels
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
@@ -25,21 +25,28 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||||
|
|
||||||
|
|
||||||
# if(APPLE)
|
|
||||||
# set(CMAKE_OSX_SYSROOT "/")
|
|
||||||
# set(CMAKE_OSX_DEPLOYMENT_TARGET "")
|
|
||||||
# endif()
|
|
||||||
|
|
||||||
|
|
||||||
##########
|
##########
|
||||||
# LLVM
|
# LLVM
|
||||||
##########
|
##########
|
||||||
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx")
|
if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||||
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx")
|
||||||
include_directories("${LLVM_INCLUDE_DIRS}")
|
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
||||||
|
endif()
|
||||||
|
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
|
||||||
|
else()
|
||||||
|
set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
|
||||||
|
set(LLVM_LIBRARIES libLLVMNVPTXCodeGen.a libLLVMSelectionDAG.a libLLVMipo.a libLLVMInstrumentation.a
|
||||||
|
libLLVMVectorize.a libLLVMLinker.a libLLVMIRReader.a libLLVMAsmParser.a libLLVMFrontendOpenMP.a
|
||||||
|
libLLVMAsmPrinter.a libLLVMDebugInfoDWARF.a libLLVMCodeGen.a libLLVMTarget.a libLLVMScalarOpts.a
|
||||||
|
libLLVMInstCombine.a libLLVMAggressiveInstCombine.a libLLVMTransformUtils.a libLLVMBitWriter.a
|
||||||
|
libLLVMAnalysis.a libLLVMProfileData.a libLLVMObject.a libLLVMTextAPI.a libLLVMMCParser.a
|
||||||
|
libLLVMBitReader.a libLLVMCore.a libLLVMRemarks.a libLLVMBitstreamReader.a libLLVMNVPTXDesc.a
|
||||||
|
libLLVMMC.a libLLVMDebugInfoCodeView.a libLLVMDebugInfoMSF.a libLLVMBinaryFormat.a libLLVMNVPTXInfo.a
|
||||||
|
libLLVMSupport.a libLLVMDemangle.a)
|
||||||
endif()
|
endif()
|
||||||
|
include_directories("${LLVM_INCLUDE_DIRS}")
|
||||||
|
|
||||||
# Python module
|
# Python module
|
||||||
if(BUILD_PYTHON_MODULE)
|
if(BUILD_PYTHON_MODULE)
|
||||||
|
20
README.md
20
README.md
@@ -1,12 +1,20 @@
|
|||||||
<img src="https://cdn.openai.com/triton/assets/triton-logo.png" alt="Triton logo" width="80" height="91">
|
<div align="center">
|
||||||
|
<img src="https://cdn.openai.com/triton/assets/triton-logo.png" alt="Triton logo" width="88" height="100">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
[](https://github.com/openai/triton/actions/workflows/wheels.yml)
|
||||||
|
|
||||||
|
|
||||||
|
**`Documentation`** |
|
||||||
|
------------------- |
|
||||||
|
[](https://triton-lang.org/)
|
||||||
|
|
||||||
|
|
||||||
# Triton
|
# Triton
|
||||||
|
|
||||||
This is the development repository of Triton, a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs.
|
This is the development repository of Triton, a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs.
|
||||||
|
|
||||||
[](https://dev.azure.com/triton-lang/Triton/_build/latest?definitionId=10&branchName=master)
|
The foundations of this project are described in the following MAPL2019 publication: [Triton: An Intermediate Language and Compiler for Tiled Neural Network Computations](http://www.eecs.harvard.edu/~htk/publication/2019-mapl-tillet-kung-cox.pdf). Please consider citing this work if you use Triton!
|
||||||
|
|
||||||
The foundations of this project are described in the following MAPL2019 publication: [Triton: An Intermediate Language and Compiler for Tiled Neural Network Computations](http://www.eecs.harvard.edu/~htk/publication/2019-mapl-tillet-kung-cox.pdf). Please consider citing us if you use our work!
|
|
||||||
|
|
||||||
The [official documentation](https://triton-lang.org) contains installation instructions and tutorials.
|
The [official documentation](https://triton-lang.org) contains installation instructions and tutorials.
|
||||||
|
|
||||||
@@ -18,3 +26,7 @@ Supported Platforms:
|
|||||||
Supported Hardware:
|
Supported Hardware:
|
||||||
* NVIDIA GPUs (Compute Capability 7.0+)
|
* NVIDIA GPUs (Compute Capability 7.0+)
|
||||||
* Under development: AMD GPUs, CPUs
|
* Under development: AMD GPUs, CPUs
|
||||||
|
|
||||||
|
# Disclaimer
|
||||||
|
|
||||||
|
Triton is a fairly recent project, and it is under active development. We expect it to be pretty useful in a wide variety of cases, but don't be surprised if it's a bit rough around the edges :)
|
@@ -6,7 +6,13 @@ Installation
|
|||||||
Binary Distributions
|
Binary Distributions
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
You can install the latest nightly release of Triton from pip:
|
You can install the latest stable release of Triton from pip:
|
||||||
|
|
||||||
|
pip install triton
|
||||||
|
|
||||||
|
Binary wheels are available for CPython 3.6-3.9 and PyPy 3.6-3.7.
|
||||||
|
|
||||||
|
And the latest nightly release:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@@ -27,9 +33,10 @@ You can install the Python package from source by running the following commands
|
|||||||
|
|
||||||
git clone https://github.com/ptillet/triton.git;
|
git clone https://github.com/ptillet/triton.git;
|
||||||
cd triton/python;
|
cd triton/python;
|
||||||
|
pip install cmake; # build time dependency
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
This may take a while (10-20 minutes) as it will download and compile LLVM from source.
|
Note that, if llvm-11 is not present on your system, the setup.py script will download LLVM static libraries on the web and link against that.
|
||||||
|
|
||||||
You can then test your installation by running the unit tests:
|
You can then test your installation by running the unit tests:
|
||||||
|
|
||||||
@@ -43,19 +50,3 @@ and the benchmarks
|
|||||||
|
|
||||||
cd bench/
|
cd bench/
|
||||||
python -m run --with-plots --result-dir /tmp/triton-bench
|
python -m run --with-plots --result-dir /tmp/triton-bench
|
||||||
|
|
||||||
+++++++++++++++
|
|
||||||
C++ Package
|
|
||||||
+++++++++++++++
|
|
||||||
|
|
||||||
Those not interested in Python integration may want to use the internals of Triton (i.e, runtime, parser, codegen, driver, intermediate representation) directly. This can be done by running the following commands:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
git clone https://github.com/ptillet/triton.git;
|
|
||||||
mkdir build;
|
|
||||||
cd build;
|
|
||||||
cmake ../;
|
|
||||||
make -j8;
|
|
||||||
|
|
||||||
Note that while direct usage of the C++ API is not officially supported, a usage tutorial can be found `here <https://github.com/ptillet/triton/blob/master/tutorials/01-matmul.cc>`_
|
|
||||||
|
@@ -23,12 +23,13 @@ def get_llvm():
|
|||||||
paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
|
paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
|
||||||
paths = [p for p in paths if p is not None]
|
paths = [p for p in paths if p is not None]
|
||||||
if paths:
|
if paths:
|
||||||
return paths[0]
|
return '', ''
|
||||||
# download if nothing is installed
|
# download if nothing is installed
|
||||||
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
|
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
|
||||||
dir = '/tmp'
|
dir = '/tmp'
|
||||||
llvm_config = '{dir}/{name}/bin/llvm-config'.format(dir=dir, name=name)
|
llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
|
||||||
if not os.path.exists(llvm_config):
|
llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
|
||||||
|
if not os.path.exists(llvm_library_dir):
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(os.path.join(dir, name))
|
shutil.rmtree(os.path.join(dir, name))
|
||||||
except:
|
except:
|
||||||
@@ -38,7 +39,7 @@ def get_llvm():
|
|||||||
ftpstream = urllib.request.urlopen(url)
|
ftpstream = urllib.request.urlopen(url)
|
||||||
file = tarfile.open(fileobj=ftpstream, mode="r|xz")
|
file = tarfile.open(fileobj=ftpstream, mode="r|xz")
|
||||||
file.extractall(path=dir)
|
file.extractall(path=dir)
|
||||||
return llvm_config
|
return llvm_include_dir, llvm_library_dir
|
||||||
|
|
||||||
|
|
||||||
class CMakeExtension(Extension):
|
class CMakeExtension(Extension):
|
||||||
@@ -76,7 +77,7 @@ class CMakeBuild(build_ext):
|
|||||||
self.build_extension(ext)
|
self.build_extension(ext)
|
||||||
|
|
||||||
def build_extension(self, ext):
|
def build_extension(self, ext):
|
||||||
llvm_config = get_llvm()
|
llvm_include_dir, llvm_library_dir = get_llvm()
|
||||||
# self.debug = True
|
# self.debug = True
|
||||||
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
||||||
# create build directories
|
# create build directories
|
||||||
@@ -88,12 +89,12 @@ class CMakeBuild(build_ext):
|
|||||||
os.makedirs(llvm_build_dir)
|
os.makedirs(llvm_build_dir)
|
||||||
# python directories
|
# python directories
|
||||||
python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
|
python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
|
||||||
python_lib_dirs = distutils.sysconfig.get_config_var("LIBDIR")
|
|
||||||
cmake_args = [
|
cmake_args = [
|
||||||
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
|
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
|
||||||
"-DBUILD_TUTORIALS=OFF",
|
"-DBUILD_TUTORIALS=OFF",
|
||||||
"-DBUILD_PYTHON_MODULE=ON",
|
"-DBUILD_PYTHON_MODULE=ON",
|
||||||
"-DLLVM_CONFIG=" + llvm_config,
|
"-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
|
||||||
|
"-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
|
||||||
#'-DPYTHON_EXECUTABLE=' + sys.executable,
|
#'-DPYTHON_EXECUTABLE=' + sys.executable,
|
||||||
#'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
|
#'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
|
||||||
"-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
|
"-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
|
||||||
@@ -126,7 +127,7 @@ setup(
|
|||||||
description="A language and compiler for custom Deep Learning operations",
|
description="A language and compiler for custom Deep Learning operations",
|
||||||
long_description="",
|
long_description="",
|
||||||
packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
|
packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
|
||||||
install_requires=["numpy", "torch"],
|
install_requires=["torch"],
|
||||||
package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
|
package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
ext_modules=[CMakeExtension("triton", "triton/_C/")],
|
ext_modules=[CMakeExtension("triton", "triton/_C/")],
|
||||||
|
@@ -41,8 +41,8 @@ def _add(
|
|||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Let's also declare a helper function that to (1) allocate the output vector
|
# Let's also declare a helper function to (1) allocate the `z` tensor
|
||||||
# and (2) enqueueing the above kernel.
|
# and (2) enqueue the above kernel with appropriate grid/block sizes.
|
||||||
|
|
||||||
|
|
||||||
def add(x, y):
|
def add(x, y):
|
||||||
@@ -80,7 +80,7 @@ print(f'The maximum difference between torch and triton is ' f'{torch.max(torch.
|
|||||||
# %%
|
# %%
|
||||||
# Benchmark
|
# Benchmark
|
||||||
# -----------
|
# -----------
|
||||||
# We can now benchmark our custom op for vectors of increasing sizes to get a sense of how it does relative to PyTorch.
|
# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
|
||||||
# To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
|
# To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
|
||||||
# for different problem sizes.
|
# for different problem sizes.
|
||||||
|
|
||||||
@@ -111,6 +111,6 @@ def benchmark(size, provider):
|
|||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# We can now run the decorated function above. Pass `show_plots=True` to see the plots and/or
|
# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
|
||||||
# `save_path='/path/to/results/' to save them to disk along with raw CSV data
|
# `save_path='/path/to/results/' to save them to disk along with raw CSV data
|
||||||
benchmark.run(print_data=True, show_plots=True)
|
benchmark.run(print_data=True, show_plots=True)
|
Reference in New Issue
Block a user