This PR does the following: - CUDA utilities (e.g., cuGetInfo) won't be compiled as part of libtriton.so anymore. - Refactoring driver/llvm.cc to split it between PTX codegen and python. - By extension this will also deprecate include/external so Triton won't have to live with a copy of some CUDA/Hip headers anymore. - `triton-translate` becomes a `triton.tools.aot` Python utility that re-uses functions from the triton.compile sub-module.
15 lines
348 B
MLIR
15 lines
348 B
MLIR
// RUN: python3 -m triton.tools.aot %s --target=ptx --sm=80 --ptx-version=63 | FileCheck %s
|
|
// CHECK-LABEL: // Generated by LLVM NVPTX Back-End
|
|
// CHECK: .version 6.3
|
|
// CHECK: .target sm_80
|
|
// CHECK: .address_size 64
|
|
|
|
module attributes {"triton_gpu.num-warps" = 4 : i32} {
|
|
|
|
func @test_empty_kernel(%lb : index, %A : !tt.ptr<f16>) {
|
|
|
|
return
|
|
}
|
|
|
|
}
|