triton/bin/CMakeLists.txt

add_subdirectory(FileCheck)
# add_llvm_executable(FileCheck FileCheck/FileCheck.cpp)
# target_link_libraries(FileCheck PRIVATE LLVMFileCheck LLVMSupport)

get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)

add_llvm_executable(triton-opt triton-opt.cpp PARTIAL_SOURCES_INTENDED)

# TODO: what's this?
llvm_update_compile_flags(triton-opt)
target_link_libraries(triton-opt PRIVATE
  TritonAnalysis
  TritonTransforms
  TritonGPUTransforms
  ${dialect_libs}
  ${conversion_libs}
  # tests
  TritonTestAnalysis
  # MLIR core
  MLIROptLib
  MLIRPass
  MLIRTransforms
)

mlir_check_all_link_libraries(triton-opt)


add_llvm_executable(triton-translate triton-translate.cpp PARTIAL_SOURCES_INTENDED)
llvm_update_compile_flags(triton-translate)
target_link_libraries(triton-translate PRIVATE
        TritonAnalysis
        TritonTransforms
        TritonGPUTransforms
        TritonLLVMIR
        TritonDriver
        ${dialect_libs}
        ${conversion_libs}
        # tests
        TritonTestAnalysis

        LLVMCore
        LLVMSupport
        LLVMOption
        LLVMCodeGen
        LLVMAsmParser

        # MLIR core
        MLIROptLib
        MLIRIR
        MLIRPass
        MLIRSupport
        MLIRTransforms
        MLIRExecutionEngine
        MLIRMathToLLVM
        MLIRTransformUtils
        MLIRLLVMToLLVMIRTranslation
        MLIRNVVMToLLVMIRTranslation
        )
mlir_check_all_link_libraries(triton-translate)
[PACKAGING] Added FileCheck 2022-07-07 16:53:19 -07:00			`add_subdirectory(FileCheck)`
			`# add_llvm_executable(FileCheck FileCheck/FileCheck.cpp)`
			`# target_link_libraries(FileCheck PRIVATE LLVMFileCheck LLVMSupport)`

Add triton's opt 2022-06-04 22:10:00 +08:00			`get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)`
			`get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)`

[BACKEND] add triton-translate to translate mlir to llvmir or PTX code (#37) 2022-08-08 13:34:36 +08:00			`add_llvm_executable(triton-opt triton-opt.cpp PARTIAL_SOURCES_INTENDED)`
Add triton's opt 2022-06-04 22:10:00 +08:00
			`# TODO: what's this?`
[BACKEND] add triton-translate to translate mlir to llvmir or PTX code (#37) 2022-08-08 13:34:36 +08:00			`llvm_update_compile_flags(triton-opt)`
Add triton's opt 2022-06-04 22:10:00 +08:00			`target_link_libraries(triton-opt PRIVATE`
[Analysis] Added Axis Info Analysis (#8) 2022-07-19 13:38:48 -07:00			`TritonAnalysis`
update mma encoding & triton-opt 2022-06-06 21:03:58 +08:00			`TritonTransforms`
			`TritonGPUTransforms`
Add triton's opt 2022-06-04 22:10:00 +08:00			`${dialect_libs}`
			`${conversion_libs}`
[Analysis] Added Axis Info Analysis (#8) 2022-07-19 13:38:48 -07:00			`# tests`
			`TritonTestAnalysis`
			`# MLIR core`
Add triton's opt 2022-06-04 22:10:00 +08:00			`MLIROptLib`
update mma encoding & triton-opt 2022-06-06 21:03:58 +08:00			`MLIRPass`
			`MLIRTransforms`
Add triton's opt 2022-06-04 22:10:00 +08:00			`)`

[BACKEND] add triton-translate to translate mlir to llvmir or PTX code (#37) 2022-08-08 13:34:36 +08:00			`mlir_check_all_link_libraries(triton-opt)`


			`add_llvm_executable(triton-translate triton-translate.cpp PARTIAL_SOURCES_INTENDED)`
			`llvm_update_compile_flags(triton-translate)`
			`target_link_libraries(triton-translate PRIVATE`
			`TritonAnalysis`
			`TritonTransforms`
			`TritonGPUTransforms`
			`TritonLLVMIR`
			`TritonDriver`
			`${dialect_libs}`
			`${conversion_libs}`
			`# tests`
			`TritonTestAnalysis`

			`LLVMCore`
			`LLVMSupport`
			`LLVMOption`
			`LLVMCodeGen`
			`LLVMAsmParser`

			`# MLIR core`
			`MLIROptLib`
			`MLIRIR`
			`MLIRPass`
			`MLIRSupport`
			`MLIRTransforms`
			`MLIRExecutionEngine`
[Triton] Support math and libdevice ops (#91) This PR adds basic math ops by using `MathDialect` and `libdevice` ops by using `extern_elementwise`. This is needed to compile some tutorial code (e.g., `softmax`). This PR implements only interface till PTX (so from frontend to TritonGPU-MLIR) - Currently till TritonGPU. It cannot be lowered to PTX now. - No special optimizations (e.g., constant folding etc) are applied. - 14.x does not define folders for many operators for math ops, but 15.x seems to increase its coverage: https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0-rc3/mlir/include/mlir/Dialect/Math/IR/MathOps.td - No constant folding etc for `libdevice` ops. ```py import triton import triton.language as tl import sys @triton.jit def add_kernel( x_ptr, y_ptr, BLOCK_SIZE: tl.constexpr, ): offsets = tl.arange(0, BLOCK_SIZE) x = tl.load(x_ptr + offsets) x = tl.sin(x) output = tl.libdevice.sin(x) output = tl.libdevice.fdiv_rn(output, output) output = tl.libdevice.fmaf_rd(output, output, output) tl.store(y_ptr + offsets, output) if __name__ == "__main__" and len(sys.argv) >= 2: signature = "fp32,fp32" constants = {'BLOCK_SIZE': 1024} output = triton.compile(add_kernel, signature, device=0, constants=constants, output="ttgir") print(output) ``` -> ```llvm #blocked = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}> module attributes {"triton_gpu.num-warps" = 4 : i32} { func @add_kernel__Pfp32_Pfp32__2c1024(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>) { %0 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked> %1 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked> %2 = tt.getelementptr %1, %0 : tensor<1024x!tt.ptr<f32>, #blocked> %3 = tt.load %2 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1024xf32, #blocked> %4 = math.sin %3 : tensor<1024xf32, #blocked> %5 = tt.ext_elemwise %4 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_sinf"} : tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked> %6 = tt.ext_elemwise %5, %5 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fdiv_rn"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked> %7 = tt.ext_elemwise %6, %6, %6 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fmaf_rd"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked> %8 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked> %9 = tt.getelementptr %8, %0 : tensor<1024x!tt.ptr<f32>, #blocked> tt.store %9, %7 : tensor<1024xf32, #blocked> return } } ``` 2022-09-01 16:34:27 -07:00			`MLIRMathToLLVM`
[BACKEND] add triton-translate to translate mlir to llvmir or PTX code (#37) 2022-08-08 13:34:36 +08:00			`MLIRTransformUtils`
			`MLIRLLVMToLLVMIRTranslation`
[BACKEND] Add backend support of arith::AddIOp, arith::AddFOp, GetProgramIdOp & GEPOp and bugfix for SplatOp, StoreOp, FuncOp (#60) Add backend support of arith::AddIOp, arith::AddFOp, GetProgramIdOp, GEPOp and bugfix for SplatOp, StoreOp, FuncOp Co-authored-by: gzhu <gzhu@nvidia.com> 2022-08-18 20:46:45 +08:00			`MLIRNVVMToLLVMIRTranslation`
[BACKEND] add triton-translate to translate mlir to llvmir or PTX code (#37) 2022-08-08 13:34:36 +08:00			`)`
			`mlir_check_all_link_libraries(triton-translate)`