Improve ROCm support. (#780)
- updates to support ROCm 5.2 - workarounds in tests where NV tools were used unconditionally - implemented `get_num_blocks()` and `add_memfence()` for AMD GPU - backported from history some atomics - added bf16 support - minor warnings cleanup - added dockerfile to run on a ROCm enabled machine Co-authored-by: B1tway <andrew.shukshov@gmail.com> Co-authored-by: Andrey Shukshov <36711069+B1tway@users.noreply.github.com>
This commit is contained in:
@@ -268,7 +268,7 @@ std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
std::string triple = "amdgcn-amd-amdhsa";
|
||||
std::string layout = "";
|
||||
std::string features="+sramecc,-xnack";
|
||||
std::string proc = "gfx908";
|
||||
std::string proc = STRINGIFY(MI_GPU_ARCH);
|
||||
// name kernel
|
||||
auto in_time_t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||
std::stringstream cur_time;
|
||||
|
Reference in New Issue
Block a user