Improve ROCm support. (#780)
- updates to support ROCm 5.2 - workarounds in tests where NV tools were used unconditionally - implemented `get_num_blocks()` and `add_memfence()` for AMD GPU - backported from history some atomics - added bf16 support - minor warnings cleanup - added dockerfile to run on a ROCm enabled machine Co-authored-by: B1tway <andrew.shukshov@gmail.com> Co-authored-by: Andrey Shukshov <36711069+B1tway@users.noreply.github.com>
This commit is contained in:
@@ -212,6 +212,7 @@ bool dispatch::hipinit(){
|
||||
return res;
|
||||
}
|
||||
|
||||
#define HIP_DEFINE0(ret, fname) DEFINE0(hipinit, hip_, ret, fname)
|
||||
#define HIP_DEFINE1(ret, fname, t1) DEFINE1(hipinit, hip_, ret, fname, t1)
|
||||
#define HIP_DEFINE2(ret, fname, t1, t2) DEFINE2(hipinit, hip_, ret, fname, t1, t2)
|
||||
#define HIP_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(hipinit, hip_, ret, fname, t1, t2, t3)
|
||||
@@ -268,7 +269,8 @@ HIP_DEFINE2(hipError_t, hipEventCreate, hipEvent_t *, unsigned int)
|
||||
HIP_DEFINE3(hipError_t, hipEventElapsedTime, float *, hipEvent_t, hipEvent_t)
|
||||
HIP_DEFINE2(hipError_t, hipEventRecord, hipEvent_t, hipStream_t)
|
||||
HIP_DEFINE1(hipError_t, hipEventDestroy, hipEvent_t)
|
||||
|
||||
// error handling
|
||||
HIP_DEFINE0(hipError_t, hipGetLastError)
|
||||
|
||||
/* ------------------- *
|
||||
* COMMON
|
||||
|
@@ -268,7 +268,7 @@ std::string llir_to_amdgpu(llvm::Module* module, const std::string& _proc) {
|
||||
std::string triple = "amdgcn-amd-amdhsa";
|
||||
std::string layout = "";
|
||||
std::string features="+sramecc,-xnack";
|
||||
std::string proc = "gfx908";
|
||||
std::string proc = STRINGIFY(MI_GPU_ARCH);
|
||||
// name kernel
|
||||
auto in_time_t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||
std::stringstream cur_time;
|
||||
|
Reference in New Issue
Block a user