2021-03-22 20:03:37 -04:00
|
|
|
cmake_minimum_required(VERSION 3.6)
|
|
|
|
include(ExternalProject)
|
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
|
|
|
2022-10-09 11:09:49 -07:00
|
|
|
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
|
|
|
|
2021-07-27 12:38:38 -07:00
|
|
|
project(triton)
|
|
|
|
include(CTest)
|
2021-12-07 14:10:58 -08:00
|
|
|
if(NOT WIN32)
|
|
|
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
|
|
|
endif()
|
2021-07-27 12:38:38 -07:00
|
|
|
|
|
|
|
# Options
|
2022-10-09 10:55:17 -07:00
|
|
|
option(TRITON_BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
|
|
|
|
option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
|
2021-07-27 12:38:38 -07:00
|
|
|
|
2022-11-08 09:44:19 -08:00
|
|
|
# Ensure Python3 vars are set correctly
|
|
|
|
# used conditionally in this file and by lit tests
|
|
|
|
find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
|
|
|
|
|
2021-07-27 12:38:38 -07:00
|
|
|
# Default build type
|
|
|
|
if(NOT CMAKE_BUILD_TYPE)
|
|
|
|
message(STATUS "Default build type: Release")
|
|
|
|
set(CMAKE_BUILD_TYPE "Release")
|
|
|
|
endif()
|
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
if(NOT WIN32)
|
|
|
|
find_library(TERMINFO_LIBRARY tinfo)
|
|
|
|
endif()
|
2021-07-31 17:09:49 -07:00
|
|
|
|
2021-07-27 12:38:38 -07:00
|
|
|
# Compiler flags
|
|
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
2021-12-07 14:10:58 -08:00
|
|
|
|
2022-10-23 18:52:48 -07:00
|
|
|
# Third-party
|
|
|
|
include_directories(${PYBIND11_INCLUDE_DIR})
|
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
if(WIN32)
|
|
|
|
SET(BUILD_SHARED_LIBS OFF)
|
|
|
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/deps/dlfcn-win32/src)
|
|
|
|
add_subdirectory(deps/dlfcn-win32/src ${CMAKE_BINARY_DIR}/dlfcn-win32)
|
|
|
|
endif()
|
|
|
|
|
2022-07-27 01:32:10 -07:00
|
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -D__STDC_FORMAT_MACROS -fPIC -std=gnu++17 -fvisibility=hidden -fvisibility-inlines-hidden")
|
|
|
|
if(APPLE)
|
|
|
|
set(CMAKE_OSX_DEPLOYMENT_TARGET 11.6)
|
|
|
|
endif()
|
|
|
|
|
2021-07-27 12:38:38 -07:00
|
|
|
|
2021-03-22 20:03:37 -04:00
|
|
|
|
|
|
|
##########
|
|
|
|
# LLVM
|
|
|
|
##########
|
2022-10-09 13:11:20 -07:00
|
|
|
if (NOT MLIR_DIR)
|
|
|
|
if(NOT LLVM_LIBRARY_DIR)
|
2021-12-07 14:10:58 -08:00
|
|
|
if(WIN32)
|
|
|
|
find_package(LLVM 13 REQUIRED COMPONENTS nvptx amdgpu)
|
|
|
|
|
|
|
|
include_directories(${LLVM_INCLUDE_DIRS})
|
|
|
|
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
|
|
|
|
add_definitions(${LLVM_DEFINITIONS_LIST})
|
|
|
|
|
|
|
|
llvm_map_components_to_libnames(LLVM_LIBRARIES support core
|
|
|
|
NVPTXInfo nvptxcodegen
|
|
|
|
AMDGPUInfo AMDGPUcodegen
|
|
|
|
)
|
|
|
|
else()
|
|
|
|
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx;amdgpu")
|
|
|
|
endif()
|
2021-07-28 01:51:17 -07:00
|
|
|
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
|
|
|
if(APPLE)
|
|
|
|
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
|
|
|
endif()
|
2022-10-09 13:11:20 -07:00
|
|
|
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
|
|
|
|
else()
|
2021-07-28 01:51:17 -07:00
|
|
|
set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
|
2022-08-18 01:42:48 +08:00
|
|
|
set(LLVM_LIBRARIES
|
2022-10-09 13:11:20 -07:00
|
|
|
libLLVMNVPTXCodeGen.a
|
|
|
|
libLLVMNVPTXDesc.a
|
|
|
|
libLLVMNVPTXInfo.a
|
|
|
|
libLLVMAMDGPUDisassembler.a
|
|
|
|
libLLVMMCDisassembler.a
|
|
|
|
libLLVMAMDGPUCodeGen.a
|
|
|
|
libLLVMMIRParser.a
|
|
|
|
libLLVMGlobalISel.a
|
|
|
|
libLLVMSelectionDAG.a
|
|
|
|
libLLVMipo.a
|
|
|
|
libLLVMInstrumentation.a
|
|
|
|
libLLVMVectorize.a
|
|
|
|
libLLVMLinker.a
|
|
|
|
libLLVMIRReader.a
|
|
|
|
libLLVMAsmParser.a
|
|
|
|
libLLVMFrontendOpenMP.a
|
|
|
|
libLLVMAsmPrinter.a
|
|
|
|
libLLVMDebugInfoDWARF.a
|
|
|
|
libLLVMCodeGen.a
|
|
|
|
libLLVMTarget.a
|
|
|
|
libLLVMScalarOpts.a
|
|
|
|
libLLVMInstCombine.a
|
|
|
|
libLLVMAggressiveInstCombine.a
|
|
|
|
libLLVMTransformUtils.a
|
|
|
|
libLLVMBitWriter.a
|
|
|
|
libLLVMAnalysis.a
|
|
|
|
libLLVMProfileData.a
|
|
|
|
libLLVMObject.a
|
|
|
|
libLLVMTextAPI.a
|
|
|
|
libLLVMBitReader.a
|
|
|
|
libLLVMAMDGPUAsmParser.a
|
|
|
|
libLLVMMCParser.a
|
|
|
|
libLLVMAMDGPUDesc.a
|
|
|
|
libLLVMAMDGPUUtils.a
|
|
|
|
libLLVMMC.a
|
|
|
|
libLLVMDebugInfoCodeView.a
|
|
|
|
libLLVMDebugInfoMSF.a
|
|
|
|
libLLVMCore.a
|
|
|
|
libLLVMRemarks.a
|
|
|
|
libLLVMBitstreamReader.a
|
|
|
|
libLLVMBinaryFormat.a
|
|
|
|
libLLVMAMDGPUInfo.a
|
|
|
|
libLLVMSupport.a
|
|
|
|
libLLVMDemangle.a
|
|
|
|
libLLVMPasses.a
|
|
|
|
libLLVMAnalysis.a
|
|
|
|
libLLVMTransformUtils.a
|
|
|
|
libLLVMScalarOpts.a
|
|
|
|
libLLVMTransformUtils.a
|
|
|
|
libLLVMipo.a
|
|
|
|
libLLVMObjCARCOpts.a
|
|
|
|
libLLVMCoroutines.a
|
|
|
|
libLLVMAnalysis.a
|
|
|
|
)
|
|
|
|
endif()
|
|
|
|
set (MLIR_DIR ${LLVM_LIBRARY_DIR}/cmake/mlir)
|
2021-07-22 11:41:51 -07:00
|
|
|
endif()
|
2021-07-27 12:38:38 -07:00
|
|
|
|
|
|
|
# Python module
|
2022-10-09 10:55:17 -07:00
|
|
|
if(TRITON_BUILD_PYTHON_MODULE)
|
2021-07-27 12:38:38 -07:00
|
|
|
message(STATUS "Adding Python module")
|
2022-10-28 12:36:09 -07:00
|
|
|
set(PYTHON_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/python/src)
|
2022-11-08 09:44:19 -08:00
|
|
|
set(PYTHON_SRC ${PYTHON_SRC_PATH}/main.cc ${PYTHON_SRC_PATH}/triton.cc)
|
2022-10-28 12:36:09 -07:00
|
|
|
include_directories("." ${PYTHON_SRC_PATH})
|
2022-10-09 12:30:44 -07:00
|
|
|
if (PYTHON_INCLUDE_DIRS)
|
2022-10-28 12:36:09 -07:00
|
|
|
include_directories(${PYTHON_INCLUDE_DIRS})
|
2022-10-09 12:30:44 -07:00
|
|
|
else()
|
2022-10-28 12:36:09 -07:00
|
|
|
include_directories(${Python3_INCLUDE_DIRS})
|
2022-10-09 12:30:44 -07:00
|
|
|
link_directories(${Python3_LIBRARY_DIRS})
|
|
|
|
link_libraries(${Python3_LIBRARIES})
|
|
|
|
add_link_options(${Python3_LINK_OPTIONS})
|
|
|
|
endif()
|
2021-07-27 12:38:38 -07:00
|
|
|
endif()
|
|
|
|
|
|
|
|
|
2022-03-20 16:41:48 +08:00
|
|
|
# # Triton
|
|
|
|
# file(GLOB_RECURSE LIBTRITON_SRC lib/*.cc)
|
2022-10-09 10:55:17 -07:00
|
|
|
# if (WIN32 AND TRITON_BUILD_PYTHON_MODULE)
|
2022-03-20 16:41:48 +08:00
|
|
|
# Python3_add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
|
|
|
# set_target_properties(triton PROPERTIES SUFFIX ".pyd")
|
|
|
|
# set_target_properties(triton PROPERTIES PREFIX "lib")
|
|
|
|
# else()
|
|
|
|
# add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
|
|
|
# endif()
|
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
|
2022-03-17 20:40:55 +08:00
|
|
|
# MLIR
|
2022-10-09 13:11:20 -07:00
|
|
|
find_package(MLIR REQUIRED CONFIG PATHS ${MLIR_DIR})
|
2022-03-20 16:41:48 +08:00
|
|
|
|
2022-10-09 13:11:20 -07:00
|
|
|
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
|
|
|
|
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
|
2022-03-20 16:41:48 +08:00
|
|
|
|
2022-03-17 20:40:55 +08:00
|
|
|
include(TableGen) # required by AddMLIR
|
|
|
|
include(AddLLVM)
|
|
|
|
include(AddMLIR)
|
2022-10-28 12:36:09 -07:00
|
|
|
|
|
|
|
# Disable warnings that show up in external code (gtest;pybind11)
|
2022-10-28 17:02:49 -07:00
|
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-covered-switch-default")
|
2022-03-17 20:40:55 +08:00
|
|
|
|
|
|
|
include_directories(${MLIR_INCLUDE_DIRS})
|
2022-10-09 13:11:20 -07:00
|
|
|
include_directories(${LLVM_INCLUDE_DIRS})
|
2022-03-17 20:40:55 +08:00
|
|
|
include_directories(${PROJECT_SOURCE_DIR}/include)
|
|
|
|
include_directories(${PROJECT_BINARY_DIR}/include) # Tablegen'd files
|
2022-03-20 16:41:48 +08:00
|
|
|
# link_directories(${LLVM_LIBRARY_DIR})
|
2022-03-17 20:40:55 +08:00
|
|
|
|
2022-03-20 16:41:48 +08:00
|
|
|
add_subdirectory(include)
|
2022-03-17 20:40:55 +08:00
|
|
|
add_subdirectory(lib)
|
2022-06-04 22:10:00 +08:00
|
|
|
add_subdirectory(bin)
|
2022-03-20 16:41:48 +08:00
|
|
|
|
|
|
|
add_library(triton SHARED ${PYTHON_SRC})
|
|
|
|
|
2022-04-25 11:38:16 -07:00
|
|
|
# find_package(PythonLibs REQUIRED)
|
2022-03-22 21:53:22 +08:00
|
|
|
|
2022-06-12 15:14:45 +08:00
|
|
|
set(TRITON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
|
|
|
set(TRITON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
|
|
|
|
2022-08-07 13:09:12 -07:00
|
|
|
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
|
|
|
|
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
|
|
|
|
|
2022-03-17 20:40:55 +08:00
|
|
|
target_link_libraries(triton
|
2022-07-19 13:38:48 -07:00
|
|
|
TritonAnalysis
|
2022-04-27 19:28:21 +08:00
|
|
|
TritonTransforms
|
2022-05-11 16:13:53 +08:00
|
|
|
TritonGPUTransforms
|
2022-08-18 01:42:48 +08:00
|
|
|
TritonLLVMIR
|
|
|
|
TritonPTX
|
2022-08-07 13:09:12 -07:00
|
|
|
${dialect_libs}
|
|
|
|
${conversion_libs}
|
2022-04-15 14:41:57 +08:00
|
|
|
# optimizations
|
|
|
|
MLIRPass
|
|
|
|
MLIRTransforms
|
2022-08-18 01:42:48 +08:00
|
|
|
MLIRLLVMIR
|
|
|
|
MLIRSupport
|
|
|
|
MLIRTargetLLVMIRExport
|
|
|
|
MLIRExecutionEngine
|
[Triton] Support math and libdevice ops (#91)
This PR adds basic math ops by using `MathDialect` and `libdevice` ops by using `extern_elementwise`. This is needed to compile some tutorial code (e.g., `softmax`). This PR implements only interface till PTX (so from frontend to TritonGPU-MLIR)
- Currently till TritonGPU. It cannot be lowered to PTX now.
- No special optimizations (e.g., constant folding etc) are applied.
- 14.x does not define folders for many operators for math ops, but 15.x seems to increase its coverage: https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0-rc3/mlir/include/mlir/Dialect/Math/IR/MathOps.td
- No constant folding etc for `libdevice` ops.
```py
import triton
import triton.language as tl
import sys
@triton.jit
def add_kernel(
x_ptr,
y_ptr,
BLOCK_SIZE: tl.constexpr,
):
offsets = tl.arange(0, BLOCK_SIZE)
x = tl.load(x_ptr + offsets)
x = tl.sin(x)
output = tl.libdevice.sin(x)
output = tl.libdevice.fdiv_rn(output, output)
output = tl.libdevice.fmaf_rd(output, output, output)
tl.store(y_ptr + offsets, output)
if __name__ == "__main__" and len(sys.argv) >= 2:
signature = "*fp32,*fp32"
constants = {'BLOCK_SIZE': 1024}
output = triton.compile(add_kernel, signature, device=0, constants=constants, output="ttgir")
print(output)
```
->
```llvm
#blocked = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
module attributes {"triton_gpu.num-warps" = 4 : i32} {
func @add_kernel__Pfp32_Pfp32__2c1024(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>) {
%0 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked>
%1 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked>
%2 = tt.getelementptr %1, %0 : tensor<1024x!tt.ptr<f32>, #blocked>
%3 = tt.load %2 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1024xf32, #blocked>
%4 = math.sin %3 : tensor<1024xf32, #blocked>
%5 = tt.ext_elemwise %4 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_sinf"} : tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
%6 = tt.ext_elemwise %5, %5 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fdiv_rn"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
%7 = tt.ext_elemwise %6, %6, %6 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fmaf_rd"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
%8 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked>
%9 = tt.getelementptr %8, %0 : tensor<1024x!tt.ptr<f32>, #blocked>
tt.store %9, %7 : tensor<1024xf32, #blocked>
return
}
}
```
2022-09-01 16:34:27 -07:00
|
|
|
MLIRMathToLLVM
|
2022-08-18 20:46:45 +08:00
|
|
|
MLIRNVVMToLLVMIRTranslation
|
2022-08-31 18:55:32 -07:00
|
|
|
MLIRIR
|
2022-03-17 20:40:55 +08:00
|
|
|
)
|
|
|
|
|
2021-07-22 11:41:51 -07:00
|
|
|
target_link_options(triton PRIVATE ${LLVM_LDFLAGS})
|
2021-07-22 18:03:51 -07:00
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
if(WIN32)
|
|
|
|
target_link_libraries(triton PRIVATE ${LLVM_LIBRARIES} dl) # dl is from dlfcn-win32
|
|
|
|
else()
|
2022-02-24 14:56:24 -08:00
|
|
|
target_link_libraries(triton ${LLVM_LIBRARIES} z)
|
2021-12-07 14:10:58 -08:00
|
|
|
endif()
|
2021-07-27 12:38:38 -07:00
|
|
|
|
2021-12-07 14:10:58 -08:00
|
|
|
|
2022-10-09 10:55:17 -07:00
|
|
|
if(TRITON_BUILD_PYTHON_MODULE AND NOT WIN32)
|
2021-07-22 11:41:51 -07:00
|
|
|
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
|
|
|
|
# Check if the platform is MacOS
|
|
|
|
if(APPLE)
|
|
|
|
set(PYTHON_LDFLAGS "-undefined dynamic_lookup -flto")
|
|
|
|
endif()
|
|
|
|
target_link_libraries(triton ${CUTLASS_LIBRARIES} ${PYTHON_LDFLAGS})
|
2021-09-09 00:04:28 -07:00
|
|
|
endif()
|
2022-06-10 21:37:56 +08:00
|
|
|
|
|
|
|
add_subdirectory(test)
|
2022-09-04 12:50:27 +08:00
|
|
|
|
|
|
|
add_subdirectory(unittest)
|