Merge triton-mlir
branch - Complete rewrite of the backend from scratch (#1004)
This PR merges the `triton-mlir` branch, in which we have been quietly rewriting the Triton backend from scratch to increase maintainability, stability and ultimately performance. Changes to the runtime are minimal, and this new version aims to remain backward-compatible with the previous commit. The legacy backend is now officially deprecated, but can still be accessed via the `legacy-backend` tag. Co-authored-by: Keren Zhou <kerenzhou@openai.com> Co-authored-by: Yan Chunwei <yanchunwei@outlook.com> Co-authored-by: goostavz <109190422+goostavz@users.noreply.github.com> Co-authored-by: Shintaro Iwasaki <siwasaki@fb.com> Co-authored-by: Yan Da <dyanab@connect.ust.hk> Co-authored-by: Jun Yang <yangjunpro@gmail.com> Co-authored-by: Ian Bearman <ianb@microsoft.com> Co-authored-by: Jason Ansel <jansel@jansel.net> Co-authored-by: Qingyi Liu <qingyil@nvidia.com> Co-authored-by: ben-zhang-609 <110140741+ben-zhang-609@users.noreply.github.com> Co-authored-by: Chenggang Zhao <lyricz@yeah.net> Co-authored-by: ben-zhang-609 <benzh609@gmail.com> Co-authored-by: dongdongl <dongdongl@nvidia.com>
This commit is contained in:
238
CMakeLists.txt
238
CMakeLists.txt
@@ -3,6 +3,8 @@ include(ExternalProject)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
project(triton)
|
||||
include(CTest)
|
||||
if(NOT WIN32)
|
||||
@@ -10,8 +12,16 @@ if(NOT WIN32)
|
||||
endif()
|
||||
|
||||
# Options
|
||||
option(BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
|
||||
option(BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
|
||||
option(TRITON_BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
|
||||
option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
|
||||
|
||||
# Ensure Python3 vars are set correctly
|
||||
# used conditionally in this file and by lit tests
|
||||
find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
|
||||
|
||||
# Customized release build type with assertions: TritonRelBuildWithAsserts
|
||||
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
|
||||
set(CMAKE_CXX_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
|
||||
|
||||
# Default build type
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
@@ -35,13 +45,18 @@ if(WIN32)
|
||||
add_subdirectory(deps/dlfcn-win32/src ${CMAKE_BINARY_DIR}/dlfcn-win32)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -fPIC -std=gnu++17 -fvisibility=hidden -fvisibility-inlines-hidden")
|
||||
if(APPLE)
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 11.6)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
##########
|
||||
# LLVM
|
||||
##########
|
||||
if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||
if (NOT MLIR_DIR)
|
||||
if(NOT LLVM_LIBRARY_DIR)
|
||||
if(WIN32)
|
||||
find_package(LLVM 13 REQUIRED COMPONENTS nvptx amdgpu)
|
||||
|
||||
@@ -60,95 +75,148 @@ if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||
if(APPLE)
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
||||
endif()
|
||||
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
|
||||
else()
|
||||
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
|
||||
else()
|
||||
set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
|
||||
set(LLVM_LIBRARIES
|
||||
libLLVMNVPTXCodeGen.a
|
||||
libLLVMNVPTXDesc.a
|
||||
libLLVMNVPTXInfo.a
|
||||
libLLVMAMDGPUDisassembler.a
|
||||
libLLVMMCDisassembler.a
|
||||
libLLVMAMDGPUCodeGen.a
|
||||
libLLVMMIRParser.a
|
||||
libLLVMGlobalISel.a
|
||||
libLLVMSelectionDAG.a
|
||||
libLLVMipo.a
|
||||
libLLVMInstrumentation.a
|
||||
libLLVMVectorize.a
|
||||
libLLVMLinker.a
|
||||
libLLVMIRReader.a
|
||||
libLLVMAsmParser.a
|
||||
libLLVMFrontendOpenMP.a
|
||||
libLLVMAsmPrinter.a
|
||||
libLLVMDebugInfoDWARF.a
|
||||
libLLVMCodeGen.a
|
||||
libLLVMTarget.a
|
||||
libLLVMScalarOpts.a
|
||||
libLLVMInstCombine.a
|
||||
libLLVMAggressiveInstCombine.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMBitWriter.a
|
||||
libLLVMAnalysis.a
|
||||
libLLVMProfileData.a
|
||||
libLLVMObject.a
|
||||
libLLVMTextAPI.a
|
||||
libLLVMBitReader.a
|
||||
libLLVMAMDGPUAsmParser.a
|
||||
libLLVMMCParser.a
|
||||
libLLVMAMDGPUDesc.a
|
||||
libLLVMAMDGPUUtils.a
|
||||
libLLVMMC.a
|
||||
libLLVMDebugInfoCodeView.a
|
||||
libLLVMDebugInfoMSF.a
|
||||
libLLVMCore.a
|
||||
libLLVMRemarks.a
|
||||
libLLVMBitstreamReader.a
|
||||
libLLVMBinaryFormat.a
|
||||
libLLVMAMDGPUInfo.a
|
||||
libLLVMSupport.a
|
||||
libLLVMDemangle.a
|
||||
libLLVMPasses.a
|
||||
libLLVMAnalysis.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMScalarOpts.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMipo.a
|
||||
libLLVMObjCARCOpts.a
|
||||
libLLVMCoroutines.a
|
||||
libLLVMAnalysis.a
|
||||
)
|
||||
libLLVMNVPTXCodeGen.a
|
||||
libLLVMNVPTXDesc.a
|
||||
libLLVMNVPTXInfo.a
|
||||
libLLVMAMDGPUDisassembler.a
|
||||
libLLVMMCDisassembler.a
|
||||
libLLVMAMDGPUCodeGen.a
|
||||
libLLVMMIRParser.a
|
||||
libLLVMGlobalISel.a
|
||||
libLLVMSelectionDAG.a
|
||||
libLLVMipo.a
|
||||
libLLVMInstrumentation.a
|
||||
libLLVMVectorize.a
|
||||
libLLVMLinker.a
|
||||
libLLVMIRReader.a
|
||||
libLLVMAsmParser.a
|
||||
libLLVMFrontendOpenMP.a
|
||||
libLLVMAsmPrinter.a
|
||||
libLLVMDebugInfoDWARF.a
|
||||
libLLVMCodeGen.a
|
||||
libLLVMTarget.a
|
||||
libLLVMScalarOpts.a
|
||||
libLLVMInstCombine.a
|
||||
libLLVMAggressiveInstCombine.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMBitWriter.a
|
||||
libLLVMAnalysis.a
|
||||
libLLVMProfileData.a
|
||||
libLLVMObject.a
|
||||
libLLVMTextAPI.a
|
||||
libLLVMBitReader.a
|
||||
libLLVMAMDGPUAsmParser.a
|
||||
libLLVMMCParser.a
|
||||
libLLVMAMDGPUDesc.a
|
||||
libLLVMAMDGPUUtils.a
|
||||
libLLVMMC.a
|
||||
libLLVMDebugInfoCodeView.a
|
||||
libLLVMDebugInfoMSF.a
|
||||
libLLVMCore.a
|
||||
libLLVMRemarks.a
|
||||
libLLVMBitstreamReader.a
|
||||
libLLVMBinaryFormat.a
|
||||
libLLVMAMDGPUInfo.a
|
||||
libLLVMSupport.a
|
||||
libLLVMDemangle.a
|
||||
libLLVMPasses.a
|
||||
libLLVMAnalysis.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMScalarOpts.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMipo.a
|
||||
libLLVMObjCARCOpts.a
|
||||
libLLVMCoroutines.a
|
||||
libLLVMAnalysis.a
|
||||
)
|
||||
endif()
|
||||
set (MLIR_DIR ${LLVM_LIBRARY_DIR}/cmake/mlir)
|
||||
endif()
|
||||
include_directories("${LLVM_INCLUDE_DIRS}")
|
||||
|
||||
# Python module
|
||||
if(BUILD_PYTHON_MODULE)
|
||||
if(TRITON_BUILD_PYTHON_MODULE)
|
||||
message(STATUS "Adding Python module")
|
||||
# Build CUTLASS python wrapper if requested
|
||||
set(PYTHON_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/python/src)
|
||||
set(CUTLASS_INCLUDE_DIR "$ENV{CUTLASS_INCLUDE_DIR}")
|
||||
set(CUTLASS_LIBRARY_DIR "$ENV{CUTLASS_LIBRARY_DIR}")
|
||||
if(NOT("${CUTLASS_INCLUDE_DIR}" STREQUAL "") AND NOT("${CUTLASS_LIBRARY_DIR}" STREQUAL ""))
|
||||
set(CUTLASS_SRC ${PYTHON_SRC_PATH}/cutlass.cc)
|
||||
add_definitions(-DWITH_CUTLASS_BINDINGS)
|
||||
set(CUTLASS_LIBRARIES "cutlass.a")
|
||||
set(PYTHON_SRC ${PYTHON_SRC_PATH}/main.cc ${PYTHON_SRC_PATH}/triton.cc)
|
||||
include_directories("." ${PYTHON_SRC_PATH})
|
||||
if (PYTHON_INCLUDE_DIRS)
|
||||
include_directories(${PYTHON_INCLUDE_DIRS})
|
||||
else()
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
link_directories(${Python3_LIBRARY_DIRS})
|
||||
link_libraries(${Python3_LIBRARIES})
|
||||
add_link_options(${Python3_LINK_OPTIONS})
|
||||
endif()
|
||||
include_directories("." ${PYTHON_SRC_PATH} ${PYTHON_INCLUDE_DIRS} ${CUTLASS_INCLUDE_DIR})
|
||||
link_directories(${PYTHON_LINK_DIRS} ${CUTLASS_LIBRARY_DIR})
|
||||
set(PYTHON_SRC ${PYTHON_SRC_PATH}/main.cc ${PYTHON_SRC_PATH}/triton.cc ${PYTHON_SRC_PATH}/superblock.cc ${CUTLASS_SRC})
|
||||
endif()
|
||||
|
||||
|
||||
# Triton
|
||||
file(GLOB_RECURSE LIBTRITON_SRC lib/*.cc)
|
||||
if (WIN32 AND BUILD_PYTHON_MODULE)
|
||||
find_package(Python3 REQUIRED COMPONENTS Development)
|
||||
Python3_add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
set_target_properties(triton PROPERTIES SUFFIX ".pyd")
|
||||
set_target_properties(triton PROPERTIES PREFIX "lib")
|
||||
else()
|
||||
add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
endif()
|
||||
# # Triton
|
||||
# file(GLOB_RECURSE LIBTRITON_SRC lib/*.cc)
|
||||
# if (WIN32 AND TRITON_BUILD_PYTHON_MODULE)
|
||||
# Python3_add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
# set_target_properties(triton PROPERTIES SUFFIX ".pyd")
|
||||
# set_target_properties(triton PROPERTIES PREFIX "lib")
|
||||
# else()
|
||||
# add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
# endif()
|
||||
|
||||
|
||||
# MLIR
|
||||
find_package(MLIR REQUIRED CONFIG PATHS ${MLIR_DIR})
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
|
||||
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
|
||||
|
||||
include(TableGen) # required by AddMLIR
|
||||
include(AddLLVM)
|
||||
include(AddMLIR)
|
||||
|
||||
# Disable warnings that show up in external code (gtest;pybind11)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-covered-switch-default")
|
||||
|
||||
include_directories(${MLIR_INCLUDE_DIRS})
|
||||
include_directories(${LLVM_INCLUDE_DIRS})
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
include_directories(${PROJECT_BINARY_DIR}/include) # Tablegen'd files
|
||||
# link_directories(${LLVM_LIBRARY_DIR})
|
||||
|
||||
add_subdirectory(include)
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(bin)
|
||||
|
||||
add_library(triton SHARED ${PYTHON_SRC})
|
||||
|
||||
# find_package(PythonLibs REQUIRED)
|
||||
|
||||
set(TRITON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(TRITON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
|
||||
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
|
||||
|
||||
target_link_libraries(triton
|
||||
TritonAnalysis
|
||||
TritonTransforms
|
||||
TritonGPUTransforms
|
||||
TritonLLVMIR
|
||||
TritonPTX
|
||||
${dialect_libs}
|
||||
${conversion_libs}
|
||||
# optimizations
|
||||
MLIRPass
|
||||
MLIRTransforms
|
||||
MLIRLLVMIR
|
||||
MLIRSupport
|
||||
MLIRTargetLLVMIRExport
|
||||
MLIRExecutionEngine
|
||||
MLIRMathToLLVM
|
||||
MLIRNVVMToLLVMIRTranslation
|
||||
MLIRIR
|
||||
)
|
||||
|
||||
target_link_options(triton PRIVATE ${LLVM_LDFLAGS})
|
||||
|
||||
@@ -159,7 +227,7 @@ else()
|
||||
endif()
|
||||
|
||||
|
||||
if(BUILD_PYTHON_MODULE AND NOT WIN32)
|
||||
if(TRITON_BUILD_PYTHON_MODULE AND NOT WIN32)
|
||||
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
|
||||
# Check if the platform is MacOS
|
||||
if(APPLE)
|
||||
@@ -167,3 +235,7 @@ if(BUILD_PYTHON_MODULE AND NOT WIN32)
|
||||
endif()
|
||||
target_link_libraries(triton ${CUTLASS_LIBRARIES} ${PYTHON_LDFLAGS})
|
||||
endif()
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
add_subdirectory(unittest)
|
||||
|
Reference in New Issue
Block a user