[GENERAL] Removed deprecated driver files and added basic compatibility with rocm (#268)
- Removed driver module -- accelerator runtime is handled by pytorch - Added basic support for ROCM based on @micmelesse 's PR -- now can execute empty kernel on AMD devices without any compile-time changes - Now only using PREFER_SHARED for kernels when the size of shared memory is greater than 49k. Otherwise there can be poor L1 performance for broadcast tensors
This commit is contained in:
@@ -31,7 +31,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||
# LLVM
|
||||
##########
|
||||
if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx")
|
||||
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx;amdgpu")
|
||||
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
||||
if(APPLE)
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
||||
@@ -39,14 +39,52 @@ if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
|
||||
else()
|
||||
set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
|
||||
set(LLVM_LIBRARIES libLLVMNVPTXCodeGen.a libLLVMSelectionDAG.a libLLVMipo.a libLLVMInstrumentation.a
|
||||
libLLVMVectorize.a libLLVMLinker.a libLLVMIRReader.a libLLVMAsmParser.a libLLVMFrontendOpenMP.a
|
||||
libLLVMAsmPrinter.a libLLVMDebugInfoDWARF.a libLLVMCodeGen.a libLLVMTarget.a libLLVMScalarOpts.a
|
||||
libLLVMInstCombine.a libLLVMAggressiveInstCombine.a libLLVMTransformUtils.a libLLVMBitWriter.a
|
||||
libLLVMAnalysis.a libLLVMProfileData.a libLLVMObject.a libLLVMTextAPI.a libLLVMMCParser.a
|
||||
libLLVMBitReader.a libLLVMCore.a libLLVMRemarks.a libLLVMBitstreamReader.a libLLVMNVPTXDesc.a
|
||||
libLLVMMC.a libLLVMDebugInfoCodeView.a libLLVMDebugInfoMSF.a libLLVMBinaryFormat.a libLLVMNVPTXInfo.a
|
||||
libLLVMSupport.a libLLVMDemangle.a)
|
||||
set(LLVM_LIBRARIES
|
||||
libLLVMNVPTXCodeGen.a
|
||||
libLLVMNVPTXDesc.a
|
||||
libLLVMNVPTXInfo.a
|
||||
libLLVMAMDGPUDisassembler.a
|
||||
libLLVMMCDisassembler.a
|
||||
libLLVMAMDGPUCodeGen.a
|
||||
libLLVMMIRParser.a
|
||||
libLLVMGlobalISel.a
|
||||
libLLVMSelectionDAG.a
|
||||
libLLVMipo.a
|
||||
libLLVMInstrumentation.a
|
||||
libLLVMVectorize.a
|
||||
libLLVMLinker.a
|
||||
libLLVMIRReader.a
|
||||
libLLVMAsmParser.a
|
||||
libLLVMFrontendOpenMP.a
|
||||
libLLVMAsmPrinter.a
|
||||
libLLVMDebugInfoDWARF.a
|
||||
libLLVMCodeGen.a
|
||||
libLLVMTarget.a
|
||||
libLLVMScalarOpts.a
|
||||
libLLVMInstCombine.a
|
||||
libLLVMAggressiveInstCombine.a
|
||||
libLLVMTransformUtils.a
|
||||
libLLVMBitWriter.a
|
||||
libLLVMAnalysis.a
|
||||
libLLVMProfileData.a
|
||||
libLLVMObject.a
|
||||
libLLVMTextAPI.a
|
||||
libLLVMBitReader.a
|
||||
libLLVMAMDGPUAsmParser.a
|
||||
libLLVMMCParser.a
|
||||
libLLVMAMDGPUDesc.a
|
||||
libLLVMAMDGPUUtils.a
|
||||
libLLVMMC.a
|
||||
libLLVMDebugInfoCodeView.a
|
||||
libLLVMDebugInfoMSF.a
|
||||
libLLVMCore.a
|
||||
libLLVMRemarks.a
|
||||
libLLVMBitstreamReader.a
|
||||
libLLVMBinaryFormat.a
|
||||
libLLVMAMDGPUInfo.a
|
||||
libLLVMSupport.a
|
||||
libLLVMDemangle.a
|
||||
)
|
||||
endif()
|
||||
include_directories("${LLVM_INCLUDE_DIRS}")
|
||||
|
||||
@@ -82,4 +120,4 @@ if(BUILD_PYTHON_MODULE)
|
||||
set(PYTHON_LDFLAGS "-undefined dynamic_lookup -flto")
|
||||
endif()
|
||||
target_link_libraries(triton ${CUTLASS_LIBRARIES} ${PYTHON_LDFLAGS})
|
||||
endif()
|
||||
endif()
|
||||
|
Reference in New Issue
Block a user