Improve ROCm support. (#780)
- updates to support ROCm 5.2 - workarounds in tests where NV tools were used unconditionally - implemented `get_num_blocks()` and `add_memfence()` for AMD GPU - backported from history some atomics - added bf16 support - minor warnings cleanup - added dockerfile to run on a ROCm enabled machine Co-authored-by: B1tway <andrew.shukshov@gmail.com> Co-authored-by: Andrey Shukshov <36711069+B1tway@users.noreply.github.com>
This commit is contained in:
@@ -41,6 +41,11 @@ endif()
|
||||
|
||||
if (TRITON_USE_ROCM)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17 -Wno-unused-result -Wno-attributes")
|
||||
set(MI_GPU_ARCH $ENV{MI_GPU_ARCH})
|
||||
if (NOT MI_GPU_ARCH)
|
||||
set(MI_GPU_ARCH "gfx90a")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMI_GPU_ARCH=${MI_GPU_ARCH}")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||
endif()
|
||||
|
Reference in New Issue
Block a user