[GENERAL] Merged einsum feature branch. Various feature, performance

improvements and bugfixes: * Added preliminary support for extended Einstein summation in PyTriton * Significant performance improvement on FP32 kernels containing matrix multiplication * Added re-coalescing pass for FP16 kernels containing matrix multiplication * Various bugfixes
2020-01-16 12:09:50 -05:00
parent 50a52df489
commit f278d9741a
49 changed files with 1923 additions and 994 deletions
--- a/python/setup.py
+++ b/python/setup.py
@@ -77,7 +77,7 @@ class CMakeBuild(build_ext):
            pass

        cfg = 'Debug' if self.debug else 'Release'
-        #cfg = 'Release'
+        cfg = 'Release'
        build_args = ['--config', cfg]

        if platform.system() == "Windows":