diff --git a/python/examples/dot.py b/python/examples/dot.py index 29d6f9470..6c79e846c 100644 --- a/python/examples/dot.py +++ b/python/examples/dot.py @@ -6,6 +6,9 @@ import setuptools.command.build_ext import setuptools import numpy as np import os +import tempfile +import shutil +import hashlib src = """ const tunable int TM = {128}; @@ -46,55 +49,94 @@ void matmul(restrict read_only align(16) half *A, } """ + extra_ops = tf.load_op_library('/home/philippe/development/triton/python/build/lib.linux-x86_64-3.6/libextra_tf_ops.so') -with open('test.cpp', 'w+') as test: - src = libtriton.make_tensorflow_src(src, [2], '(M + #TM - 1)/#TM, (N + #TN - 1)/#TN, 1') - test.writelines(src) +def make_bindings(src, outputs, grids): + return libtriton.make_tensorflow_src(src, outputs, grids) -triton_include_dirs = ['/home/philippe/development/triton/include'] -tensorflow_include_dirs = [tf.sysconfig.get_include()] -cuda_include_dirs = ['/usr/local/cuda-10.1/targets/x86_64-linux/include/'] +def make_cache_path(src): + md5 = hashlib.sha1(src.encode()) + hexhash = md5.hexdigest() + home = os.path.expanduser('~') + cacheroot = os.path.join(home, '.triton', 'cache') + cachepath = os.path.join(cacheroot, str(hexhash)) + if not os.path.exists(cachepath): + os.makedirs(cachepath) + print(cachepath) + return cachepath -triton_library_dirs = [os.path.realpath(os.path.join(libtriton.__file__, os.path.pardir))] -tensorflow_library_dirs = [tf.sysconfig.get_lib()] +def write_bindings(src, root): + cpp = os.path.join(root, 'tensorflow.cpp') + so = os.path.join(root, 'tensorflow.so') + recompile = False + # recompile if .so does not exist + if not os.path.exists(cpp) or not os.path.exists(so): + recompile = True + # recompile if cpp was modified after .so + elif max(cpp, so, key=os.path.getctime) == cpp: + recompile = True + # write cpp file + if recompile: + with open(cpp, 'w+') as handle: + handle.writelines(src) + # return path of cpp file + return cpp + +def build(src, path): + # include directories + triton_include_dirs = ['/home/philippe/development/triton/include'] + tensorflow_include_dirs = [tf.sysconfig.get_include()] + cuda_include_dirs = ['/usr/local/cuda-10.1/targets/x86_64-linux/include/'] + include_dirs = triton_include_dirs + tensorflow_include_dirs + cuda_include_dirs + # library directories + triton_library_dirs = [os.path.realpath(os.path.join(libtriton.__file__, os.path.pardir))] + tensorflow_library_dirs = [tf.sysconfig.get_lib()] + library_dirs = triton_library_dirs + tensorflow_library_dirs + # libraries + libraries = ['tensorflow_framework', 'triton'] + # extra arguments + extra_compile_args = [] + extra_link_args = [] + # create extension module + ext = setuptools.Extension( + name = 'test', + language = 'c++', + sources = [src], + include_dirs = include_dirs, + extra_compile_args = extra_compile_args, + extra_link_args = extra_link_args, + library_dirs = library_dirs, + libraries = libraries + ) + # build extension module + args = ['build_ext'] + tmp = tempfile.mkdtemp() + args.append('--build-temp=' + tmp) + args.append('--build-lib=' + path) + args.append('-q') + args = dict( + name = 'test', + ext_modules = [ext], + script_args = args, + ) + setuptools.setup(**args) + shutil.rmtree(tmp) -include_dirs = triton_include_dirs + tensorflow_include_dirs + cuda_include_dirs -extra_compile_args = [] -extra_link_args = [] -library_dirs = triton_library_dirs + tensorflow_library_dirs -libraries = ['tensorflow_framework', 'triton'] +def make_tensorflow_op(src, outputs, grids): + bindings = make_bindings(src, outputs, grids) + cache_path = make_cache_path(bindings) + cpp = write_bindings(bindings, cache_path) + build(cpp, cache_path) + result = tf.load_op_library(os.path.join(cache_path, 'test.cpython-36m-x86_64-linux-gnu.so')) + return result -ext = setuptools.Extension( - name = 'test', - language = 'c++', - sources = ['/home/philippe/development/triton/python/examples/test.cpp'], - include_dirs = include_dirs, - extra_compile_args = extra_compile_args, - extra_link_args = extra_link_args, - library_dirs = library_dirs, - libraries = libraries -) -build_path = '.' -args = ['build_ext'] -#args.append('--build-temp=' + build_path) -#args.append('--build-lib=' + build_path) -args.append('-q') -args = dict( - name = 'test', - ext_modules = [ext], - script_args = args, - cmdclass = { - 'build_ext': setuptools.command.build_ext.build_ext - } - -) - -setuptools.setup(**args) library_dir = os.path.dirname(os.path.realpath(__file__)) -module = tf.load_op_library(os.path.join(library_dir, 'build/lib.linux-x86_64-3.6/test.cpython-36m-x86_64-linux-gnu.so')) +module = make_tensorflow_op(src, ['C'], ['(M + #TM - 1)/#TM', '(N + #TN - 1)/#TN']) +print(module.matmul) + class dot: diff --git a/python/src/tensorflow.cpp b/python/src/tensorflow.cpp index c1c224916..40810fc75 100644 --- a/python/src/tensorflow.cpp +++ b/python/src/tensorflow.cpp @@ -75,8 +75,8 @@ inline std::unique_ptr make_ir(ir::context& ctx, triton::lang::trans } std::string make_tensorflow_src(const std::string src, - const std::vector& outputs, - const std::string& macro) { + const std::vector& outputs, + const std::vector& macros) { triton::lang::translation_unit *ast = make_ast(src.c_str()); triton::ir::context context; std::unique_ptr ir = make_ir(context, ast); @@ -108,7 +108,12 @@ std::string make_tensorflow_src(const std::string src, std::transform(fn_ty->params_begin(), fn_ty->params_end(), std::back_inserter(tf_scalar_tys), to_tf_scalar_ty); std::vector tf_cref_tys; std::transform(fn_ty->params_begin(), fn_ty->params_end(), std::back_inserter(tf_cref_tys), ref_to_tf_ty); - + // output indices + std::vector out_idx; + for(const std::string &name : outputs){ + auto it = std::find(arg_names.begin(), arg_names.end(), name); + out_idx.push_back(std::distance(arg_names.begin(), it)); + } std::ostringstream oss; std::string result = R"( @@ -161,7 +166,7 @@ result += R"( // extract outputs)"; for(unsigned i = 0; i < n_outputs; i++) result += R"( - context->set_output()" + str_i[i] + ", " + arg_names[outputs[i]] + ");"; + context->set_output()" + str_i[i] + ", " + outputs[i] + ");"; result += R"( @@ -172,12 +177,21 @@ result += R"( std::regex regex("#([a-zA-Z]([a-zA-Z]|[0-9])*)"); -std::string grid_str = std::regex_replace(macro, regex, "x.at(\"$1\")"); +std::vector grids; +for(size_t i = macros.size(); i < 3; i++) + grids.push_back("1"); +std::string grid = "rt::grid_t{"; +for(size_t i = 0; i < grids.size(); i++){ + if(i > 0) + grid += ", "; + grid += std::regex_replace(grids[i], regex, "x.at(\"$1\")"); +} +grid += "}"; result += R"( // create launch grid; - auto grid = [&](const rt::params_t& x) { return rt::grid_t{)" + grid_str + R"(}; };)"; + auto grid = [&](const rt::params_t& x) { return )" + grid + R"(; };)"; result += R"( @@ -213,14 +227,12 @@ result += ", " + classname + R"(); REGISTER_OP(")" + name + "\")\n"; for(size_t i = 0; i < tf_scalar_tys.size(); i++){ - bool is_output = std::find(outputs.begin(), outputs.end(), i) != outputs.end(); - std::string mode = is_output ? "Input" : "Input" ; std::string arg_name = arg_names[i]; std::transform(arg_name.begin(), arg_name.end(), arg_name.begin(), [](char c) { return std::tolower(c);}); result += " .Input(\"" + arg_name + ": " + tf_scalar_tys[i] + "\")\n"; } for(size_t i = 0; i < outputs.size(); i++){ - result += " .Output(\"out: " + tf_scalar_tys[outputs[i]] + "\")\n"; + result += " .Output(\"out" + std::to_string(i) + ": " + tf_scalar_tys[out_idx[i]] + "\")\n"; } result += ";\n";