[GENERAL] Some minor improvements here and there to build systems and docs (#148)
This commit is contained in:
@@ -23,12 +23,13 @@ def get_llvm():
|
||||
paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
|
||||
paths = [p for p in paths if p is not None]
|
||||
if paths:
|
||||
return paths[0]
|
||||
return '', ''
|
||||
# download if nothing is installed
|
||||
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
|
||||
dir = '/tmp'
|
||||
llvm_config = '{dir}/{name}/bin/llvm-config'.format(dir=dir, name=name)
|
||||
if not os.path.exists(llvm_config):
|
||||
llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
|
||||
llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
|
||||
if not os.path.exists(llvm_library_dir):
|
||||
try:
|
||||
shutil.rmtree(os.path.join(dir, name))
|
||||
except:
|
||||
@@ -38,7 +39,7 @@ def get_llvm():
|
||||
ftpstream = urllib.request.urlopen(url)
|
||||
file = tarfile.open(fileobj=ftpstream, mode="r|xz")
|
||||
file.extractall(path=dir)
|
||||
return llvm_config
|
||||
return llvm_include_dir, llvm_library_dir
|
||||
|
||||
|
||||
class CMakeExtension(Extension):
|
||||
@@ -76,7 +77,7 @@ class CMakeBuild(build_ext):
|
||||
self.build_extension(ext)
|
||||
|
||||
def build_extension(self, ext):
|
||||
llvm_config = get_llvm()
|
||||
llvm_include_dir, llvm_library_dir = get_llvm()
|
||||
# self.debug = True
|
||||
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
||||
# create build directories
|
||||
@@ -88,12 +89,12 @@ class CMakeBuild(build_ext):
|
||||
os.makedirs(llvm_build_dir)
|
||||
# python directories
|
||||
python_include_dirs = [distutils.sysconfig.get_python_inc()] + ['/usr/local/cuda/include']
|
||||
python_lib_dirs = distutils.sysconfig.get_config_var("LIBDIR")
|
||||
cmake_args = [
|
||||
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
|
||||
"-DBUILD_TUTORIALS=OFF",
|
||||
"-DBUILD_PYTHON_MODULE=ON",
|
||||
"-DLLVM_CONFIG=" + llvm_config,
|
||||
"-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
|
||||
"-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
|
||||
#'-DPYTHON_EXECUTABLE=' + sys.executable,
|
||||
#'-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
|
||||
"-DTRITON_LLVM_BUILD_DIR=" + llvm_build_dir,
|
||||
@@ -126,7 +127,7 @@ setup(
|
||||
description="A language and compiler for custom Deep Learning operations",
|
||||
long_description="",
|
||||
packages=["triton", "triton/_C", "triton/tools", "triton/ops", "triton/ops/blocksparse"],
|
||||
install_requires=["numpy", "torch"],
|
||||
install_requires=["torch"],
|
||||
package_data={"triton/ops": ["*.c"], "triton/ops/blocksparse": ["*.c"]},
|
||||
include_package_data=True,
|
||||
ext_modules=[CMakeExtension("triton", "triton/_C/")],
|
||||
|
@@ -41,8 +41,8 @@ def _add(
|
||||
|
||||
|
||||
# %%
|
||||
# Let's also declare a helper function that to (1) allocate the output vector
|
||||
# and (2) enqueueing the above kernel.
|
||||
# Let's also declare a helper function to (1) allocate the `z` tensor
|
||||
# and (2) enqueue the above kernel with appropriate grid/block sizes.
|
||||
|
||||
|
||||
def add(x, y):
|
||||
@@ -80,7 +80,7 @@ print(f'The maximum difference between torch and triton is ' f'{torch.max(torch.
|
||||
# %%
|
||||
# Benchmark
|
||||
# -----------
|
||||
# We can now benchmark our custom op for vectors of increasing sizes to get a sense of how it does relative to PyTorch.
|
||||
# We can now benchmark our custom op on vectors of increasing sizes to get a sense of how it does relative to PyTorch.
|
||||
# To make things easier, Triton has a set of built-in utilities that allow us to concisely plot the performance of your custom ops
|
||||
# for different problem sizes.
|
||||
|
||||
@@ -111,6 +111,6 @@ def benchmark(size, provider):
|
||||
|
||||
|
||||
# %%
|
||||
# We can now run the decorated function above. Pass `show_plots=True` to see the plots and/or
|
||||
# We can now run the decorated function above. Pass `print_data=True` to see the performance number, `show_plots=True` to plot them, and/or
|
||||
# `save_path='/path/to/results/' to save them to disk along with raw CSV data
|
||||
benchmark.run(print_data=True, show_plots=True)
|
Reference in New Issue
Block a user