[GENERAL] Some minor improvements here and there to build systems and docs (#148)

This commit is contained in:
Philippe Tillet
2021-07-28 01:51:17 -07:00
committed by GitHub
parent 57c1fd3366
commit acd5e44611
7 changed files with 60 additions and 49 deletions

View File

@@ -23,12 +23,13 @@ def get_llvm():
paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
paths = [p for p in paths if p is not None]
if paths:
return paths[0]
return '', ''
# download if nothing is installed
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
dir = '/tmp'
llvm_config = '{dir}/{name}/bin/llvm-config'.format(dir=dir, name=name)
if not os.path.exists(llvm_config):
llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
if not os.path.exists(llvm_library_dir):
try:
shutil.rmtree(os.path.join(dir, name))
except:
@@ -38,7 +39,7 @@ def get_llvm():
ftpstream = urllib.request.urlopen(url)
file = tarfile.open(fileobj=ftpstream, mode="r|xz")
file.extractall(path=dir)
return llvm_config
return llvm_include_dir, llvm_library_dir
class CMakeExtension(Extension):
@@ -76,7 +77,7 @@ class CMakeBuild(build_ext):
self.build_extension(ext)
def build_extension(self, ext):
llvm_config = get_llvm()
llvm_include_dir, llvm_library_dir = get_llvm()
# self.debug = True
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
# create build directories
@@ -88,12 +89,12 @@ class CMakeBuild(build_ext):
os.makedirs(llvm_build_dir)
# python directories
python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
python_lib_dirs = distutils.sysconfig.get_config_var("LIBDIR")
cmake_args = [
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
"-DBUILD_TUTORIALS=OFF",
"-DBUILD_PYTHON_MODULE=ON",
"-DLLVM_CONFIG=" + llvm_config,
"-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
"-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
#'-DPYTHON_EXECUTABLE=' + sys.executable,
#'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
"-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
@@ -126,7 +127,7 @@ setup(
description="A language and compiler for custom Deep Learning operations",
long_description="",
packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
install_requires=["numpy", "torch"],
install_requires=["torch"],
package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
include_package_data=True,
ext_modules=[CMakeExtension("triton", "triton/_C/")],

View File

@@ -41,8 +41,8 @@ def _add(
# %%
# Let's also declare a helper function that to (1) allocate the output vector
# and (2) enqueueing the above kernel.
# Let's also declare a helper function to (1) allocate the `z` tensor
# and (2) enqueue the above kernel with appropriate grid/block sizes.
def add(x, y):
@@ -80,7 +80,7 @@ print(f'The maximum difference between torch and triton is ' f'{torch.max(torch.
# %%
# Benchmark
# -----------
# We can now benchmark our custom op for vectors of increasing sizes to get a sense of how it does relative to PyTorch.
# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
# To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
# for different problem sizes.
@@ -111,6 +111,6 @@ def benchmark(size, provider):
# %%
# We can now run the decorated function above. Pass `show_plots=True` to see the plots and/or
# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
# `save_path='/path/to/results/' to save them to disk along with raw CSV data
benchmark.run(print_data=True, show_plots=True)