[RUNTIME] Major code cleanup (#711)
This PR does the following: - CUDA utilities (e.g., cuGetInfo) won't be compiled as part of libtriton.so anymore. - Refactoring driver/llvm.cc to split it between PTX codegen and python. - By extension this will also deprecate include/external so Triton won't have to live with a copy of some CUDA/Hip headers anymore. - `triton-translate` becomes a `triton.tools.aot` Python utility that re-uses functions from the triton.compile sub-module.
This commit is contained in:
@@ -1,34 +1,17 @@
|
||||
#ifndef TRITON_TARGET_PTXTRANSLATION_H
|
||||
#define TRITON_TARGET_PTXTRANSLATION_H
|
||||
|
||||
#include "triton/driver/dispatch.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace mlir {
|
||||
|
||||
class ModuleOp;
|
||||
|
||||
} // namespace mlir
|
||||
namespace llvm {
|
||||
class Module;
|
||||
} // namespace llvm
|
||||
|
||||
namespace triton {
|
||||
|
||||
template <CUdevice_attribute attr> int cuGetInfo(CUdevice device) {
|
||||
int res;
|
||||
driver::dispatch::cuDeviceGetAttribute(&res, attr, device);
|
||||
return res;
|
||||
}
|
||||
|
||||
void getCuCCAndVersionFromDevice(uint64_t device, int *cc, int *version,
|
||||
std::string *ptxasPath);
|
||||
|
||||
// Translate TritonGPU IR to PTX code.
|
||||
std::tuple<std::string, // ptx code
|
||||
size_t, // PTX cc
|
||||
int, // PTX version
|
||||
std::string // ptxas path
|
||||
>
|
||||
translateTritonGPUToPTX(mlir::ModuleOp module, uint64_t device);
|
||||
std::string translateLLVMIRToPTX(llvm::Module &module, int cc, int version);
|
||||
|
||||
} // namespace triton
|
||||
|
||||
|
@@ -1,376 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef _TRITON_DRIVER_DISPATCH_H_
|
||||
#define _TRITON_DRIVER_DISPATCH_H_
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <type_traits>
|
||||
|
||||
// CUDA Backend
|
||||
#include "triton/external/CUDA/cuda.h"
|
||||
#include "triton/external/CUDA/nvml.h"
|
||||
|
||||
//// HIP backend
|
||||
//#define __HIP_PLATFORM_AMD__
|
||||
#include "triton/external/hip.h"
|
||||
|
||||
// Exceptions
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace llvm {
|
||||
class PassRegistry;
|
||||
class Module;
|
||||
} // namespace llvm
|
||||
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
class cu_context;
|
||||
|
||||
template <class T> void check(T) {}
|
||||
void check(CUresult err);
|
||||
void check(hipError_t err);
|
||||
|
||||
class dispatch {
|
||||
protected:
|
||||
template <class F> struct return_type;
|
||||
|
||||
template <class R, class... A> struct return_type<R (*)(A...)> {
|
||||
typedef R type;
|
||||
};
|
||||
|
||||
typedef bool (*f_init_t)();
|
||||
|
||||
template <f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type
|
||||
f_impl(void *&lib_h, FunPtrT, void *&cache, const char *name, Args... args) {
|
||||
initializer();
|
||||
if (cache == nullptr) {
|
||||
cache = dlsym(lib_h, name);
|
||||
if (cache == 0) {
|
||||
#ifdef __EXCEPTIONS
|
||||
throw std::runtime_error("dlsym unable to load function");
|
||||
#else
|
||||
std::cerr << "Triton: dlsym unable to load function `" << name << "`"
|
||||
<< std::endl;
|
||||
std::abort();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
FunPtrT fptr;
|
||||
*reinterpret_cast<void **>(&fptr) = cache;
|
||||
typename return_type<FunPtrT>::type res = (*fptr)(args...);
|
||||
check(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
static void release();
|
||||
// Nvidia
|
||||
static bool nvmlinit();
|
||||
static bool cuinit();
|
||||
// AMD
|
||||
static bool hipinit();
|
||||
|
||||
/* ------------------- *
|
||||
* CUDA
|
||||
* ------------------- */
|
||||
// context management
|
||||
static CUresult cuInit(unsigned int Flags);
|
||||
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags,
|
||||
CUdevice dev);
|
||||
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
|
||||
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
|
||||
static CUresult cuCtxGetDevice(CUdevice *result);
|
||||
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext,
|
||||
unsigned int flags);
|
||||
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||
// device management
|
||||
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib,
|
||||
CUdevice dev);
|
||||
static CUresult cuDeviceGetCount(int *count);
|
||||
// link management
|
||||
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type,
|
||||
void *data, size_t size, const char *name,
|
||||
unsigned int numOptions,
|
||||
CUjit_option *options, void **optionValues);
|
||||
static CUresult cuLinkCreate_v2(unsigned int numOptions,
|
||||
CUjit_option *options, void **optionValues,
|
||||
CUlinkState *stateOut);
|
||||
static CUresult cuLinkComplete(CUlinkState state, void **cubinOut,
|
||||
size_t *sizeOut);
|
||||
static CUresult cuLinkDestroy(CUlinkState state);
|
||||
// module management
|
||||
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes,
|
||||
CUmodule hmod, const char *name);
|
||||
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||
static CUresult cuModuleLoadData(CUmodule *module, const void *image);
|
||||
static CUresult cuModuleUnload(CUmodule hmod);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image,
|
||||
unsigned int numOptions,
|
||||
CUjit_option *options,
|
||||
void **optionValues);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod,
|
||||
const char *name);
|
||||
// stream management
|
||||
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
|
||||
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
|
||||
unsigned int gridDimY, unsigned int gridDimZ,
|
||||
unsigned int blockDimX, unsigned int blockDimY,
|
||||
unsigned int blockDimZ,
|
||||
unsigned int sharedMemBytes, CUstream hStream,
|
||||
void **kernelParams, void **extra);
|
||||
// function management
|
||||
static CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib,
|
||||
CUfunction hfunc);
|
||||
static CUresult cuFuncSetAttribute(CUfunction hfunc,
|
||||
CUfunction_attribute attrib, int value);
|
||||
static CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
|
||||
// memory management
|
||||
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||
static CUresult cuPointerGetAttribute(void *data,
|
||||
CUpointer_attribute attribute,
|
||||
CUdeviceptr ptr);
|
||||
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N,
|
||||
CUstream stream);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice,
|
||||
size_t ByteCount);
|
||||
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice,
|
||||
size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice,
|
||||
const void *srcHost, size_t ByteCount,
|
||||
CUstream hStream);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost,
|
||||
size_t ByteCount);
|
||||
// event management
|
||||
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart,
|
||||
CUevent hEnd);
|
||||
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||
|
||||
/* ------------------- *
|
||||
* NVML
|
||||
* ------------------- */
|
||||
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId,
|
||||
nvmlDevice_t *device);
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device,
|
||||
nvmlClockType_t type,
|
||||
unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device,
|
||||
nvmlClockType_t type,
|
||||
unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device,
|
||||
unsigned int mem_clock,
|
||||
unsigned int sm_clock);
|
||||
|
||||
/* ------------------- *
|
||||
* HIP
|
||||
* ------------------- */
|
||||
// context management
|
||||
static hipError_t hipInit(unsigned int Flags);
|
||||
static hipError_t hipCtxDestroy(hipCtx_t ctx);
|
||||
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags,
|
||||
hipDevice_t dev);
|
||||
static hipError_t hipCtxPushCurrent(hipCtx_t ctx);
|
||||
static hipError_t hipCtxPopCurrent(hipCtx_t *pctx);
|
||||
static hipError_t hipCtxGetDevice(hipDevice_t *result);
|
||||
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext,
|
||||
unsigned int flags);
|
||||
static hipError_t hipDriverGetVersion(int *driverVersion);
|
||||
// device management
|
||||
static hipError_t hipGetDevice(hipDevice_t *device, int ordinal);
|
||||
static hipError_t hipDeviceGetName(char *name, int len, hipDevice_t dev);
|
||||
static hipError_t hipDeviceGetPCIBusId(char *id, int len, hipDevice_t dev);
|
||||
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib,
|
||||
hipDevice_t dev);
|
||||
static hipError_t hipGetDeviceCount(int *count);
|
||||
// module management
|
||||
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char *name);
|
||||
static hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
|
||||
static hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
|
||||
static hipError_t hipModuleUnload(hipModule_t hmod);
|
||||
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
|
||||
unsigned int numOptions,
|
||||
hipJitOption *options,
|
||||
void **optionValues);
|
||||
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
const char *name);
|
||||
// stream management
|
||||
static hipError_t hipStreamCreate(hipStream_t *phStream, unsigned int Flags);
|
||||
static hipError_t hipStreamSynchronize(hipStream_t hStream);
|
||||
static hipError_t hipStreamDestroy(hipStream_t hStream);
|
||||
static hipError_t
|
||||
hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
|
||||
unsigned int gridDimY, unsigned int gridDimZ,
|
||||
unsigned int blockDimX, unsigned int blockDimY,
|
||||
unsigned int blockDimZ, unsigned int sharedMemBytes,
|
||||
hipStream_t hStream, void **kernelParams, void **extra);
|
||||
// function management
|
||||
static hipError_t hipFuncGetAttributes(hipFuncAttributes *attrib,
|
||||
void *hfunc);
|
||||
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc,
|
||||
hipFuncAttribute attrib, int value);
|
||||
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc,
|
||||
hipFuncCache_t config);
|
||||
// memory management
|
||||
static hipError_t hipMalloc(hipDeviceptr_t *dptr, size_t bytesize);
|
||||
static hipError_t hipPointerGetAttribute(void *data,
|
||||
CUpointer_attribute attribute,
|
||||
hipDeviceptr_t ptr);
|
||||
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x,
|
||||
size_t N, hipStream_t stream);
|
||||
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice,
|
||||
size_t ByteCount);
|
||||
static hipError_t hipFree(hipDeviceptr_t dptr);
|
||||
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice,
|
||||
size_t ByteCount, hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice,
|
||||
const void *srcHost, size_t ByteCount,
|
||||
hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost,
|
||||
size_t ByteCount);
|
||||
// event management
|
||||
static hipError_t hipEventCreate(hipEvent_t *phEvent, unsigned int Flags);
|
||||
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart,
|
||||
hipEvent_t hEnd);
|
||||
static hipError_t hipEventRecord(hipEvent_t hEvent, hipStream_t hStream);
|
||||
static hipError_t hipEventDestroy(hipEvent_t hEvent);
|
||||
|
||||
private:
|
||||
// Libraries
|
||||
static void *cuda_;
|
||||
static void *nvml_;
|
||||
static void *hip_;
|
||||
|
||||
/* ------------------- *
|
||||
* CUDA
|
||||
* ------------------- */
|
||||
// context management
|
||||
static void *cuCtxGetCurrent_;
|
||||
static void *cuCtxSetCurrent_;
|
||||
static void *cuCtxDestroy_v2_;
|
||||
static void *cuCtxCreate_v2_;
|
||||
static void *cuCtxGetDevice_;
|
||||
static void *cuCtxPushCurrent_v2_;
|
||||
static void *cuCtxPopCurrent_v2_;
|
||||
static void *cuCtxEnablePeerAccess_;
|
||||
static void *cuDriverGetVersion_;
|
||||
static void *cuInit_;
|
||||
// device management
|
||||
static void *cuDeviceGet_;
|
||||
static void *cuDeviceGetName_;
|
||||
static void *cuDeviceGetPCIBusId_;
|
||||
static void *cuDeviceGetAttribute_;
|
||||
static void *cuDeviceGetCount_;
|
||||
// link management
|
||||
static void *cuLinkAddData_v2_;
|
||||
static void *cuLinkCreate_v2_;
|
||||
static void *cuLinkDestroy_;
|
||||
static void *cuLinkComplete_;
|
||||
// module management
|
||||
static void *cuModuleGetGlobal_v2_;
|
||||
static void *cuModuleLoad_;
|
||||
static void *cuModuleUnload_;
|
||||
static void *cuModuleLoadDataEx_;
|
||||
static void *cuModuleLoadData_;
|
||||
static void *cuModuleGetFunction_;
|
||||
// stream management
|
||||
static void *cuStreamCreate_;
|
||||
static void *cuStreamSynchronize_;
|
||||
static void *cuStreamDestroy_v2_;
|
||||
static void *cuStreamGetCtx_;
|
||||
static void *cuLaunchKernel_;
|
||||
// function management
|
||||
static void *cuFuncGetAttribute_;
|
||||
static void *cuFuncSetAttribute_;
|
||||
static void *cuFuncSetCacheConfig_;
|
||||
// memory management
|
||||
static void *cuMemcpyDtoH_v2_;
|
||||
static void *cuMemFree_v2_;
|
||||
static void *cuMemcpyDtoHAsync_v2_;
|
||||
static void *cuMemcpyHtoDAsync_v2_;
|
||||
static void *cuMemcpyHtoD_v2_;
|
||||
static void *cuMemAlloc_v2_;
|
||||
static void *cuMemsetD8Async_;
|
||||
static void *cuPointerGetAttribute_;
|
||||
// event management
|
||||
static void *cuEventCreate_;
|
||||
static void *cuEventElapsedTime_;
|
||||
static void *cuEventRecord_;
|
||||
static void *cuEventDestroy_v2_;
|
||||
|
||||
/* ------------------- *
|
||||
* NVML
|
||||
* ------------------- */
|
||||
static void *nvmlInit_v2_;
|
||||
static void *nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void *nvmlDeviceGetClockInfo_;
|
||||
static void *nvmlDeviceGetMaxClockInfo_;
|
||||
static void *nvmlDeviceSetApplicationsClocks_;
|
||||
|
||||
/* ------------------- *
|
||||
* HIP
|
||||
* ------------------- */
|
||||
// context management
|
||||
static void *hipInit_;
|
||||
static void *hipCtxDestroy_;
|
||||
static void *hipCtxCreate_;
|
||||
static void *hipCtxPushCurrent_;
|
||||
static void *hipCtxPopCurrent_;
|
||||
static void *hipCtxGetDevice_;
|
||||
static void *hipCtxEnablePeerAccess_;
|
||||
static void *hipDriverGetVersion_;
|
||||
// device management
|
||||
static void *hipGetDevice_;
|
||||
static void *hipDeviceGetName_;
|
||||
static void *hipDeviceGetPCIBusId_;
|
||||
static void *hipDeviceGetAttribute_;
|
||||
static void *hipGetDeviceCount_;
|
||||
// module management
|
||||
static void *hipModuleGetGlobal_;
|
||||
static void *hipModuleLoad_;
|
||||
static void *hipModuleLoadData_;
|
||||
static void *hipModuleUnload_;
|
||||
static void *hipModuleLoadDataEx_;
|
||||
static void *hipModuleGetFunction_;
|
||||
// stream management
|
||||
static void *hipStreamCreate_;
|
||||
static void *hipStreamSynchronize_;
|
||||
static void *hipStreamDestroy_;
|
||||
static void *hipModuleLaunchKernel_;
|
||||
;
|
||||
// function management
|
||||
static void *hipFuncGetAttributes_;
|
||||
static void *hipFuncSetAttribute_;
|
||||
static void *hipFuncSetCacheConfig_;
|
||||
// memory management
|
||||
static void *hipMalloc_;
|
||||
static void *hipPointerGetAttribute_;
|
||||
static void *hipMemsetD8Async_;
|
||||
static void *hipMemcpyDtoH_;
|
||||
static void *hipFree_;
|
||||
static void *hipMemcpyDtoHAsync_;
|
||||
static void *hipMemcpyHtoDAsync_;
|
||||
static void *hipMemcpyHtoD_;
|
||||
// event management
|
||||
static void *hipEventCreate_;
|
||||
static void *hipEventElapsedTime_;
|
||||
static void *hipEventRecord_;
|
||||
static void *hipEventDestroy_;
|
||||
};
|
||||
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,254 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef _TRITON_DRIVER_ERROR_H_
|
||||
#define _TRITON_DRIVER_ERROR_H_
|
||||
|
||||
#include "triton/driver/dispatch.h"
|
||||
#include <exception>
|
||||
|
||||
namespace triton {
|
||||
|
||||
namespace driver {
|
||||
|
||||
namespace exception {
|
||||
|
||||
namespace nvrtc {
|
||||
|
||||
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
|
||||
class name : public std::exception { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "NVRTC: Error- " msg; } \
|
||||
}
|
||||
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure,
|
||||
"program creation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input, "invalid input");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program, "invalid program");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option, "invalid option");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(compilation, "compilation");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure,
|
||||
"builtin operation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error, "unknown error");
|
||||
|
||||
#undef TRITON_CREATE_NVRTC_EXCEPTION
|
||||
} // namespace nvrtc
|
||||
|
||||
namespace cuda {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "CUDA: Error- " msg; } \
|
||||
}
|
||||
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(deinitialized, "deinitialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled, "profiler disabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized,
|
||||
"profiler not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started,
|
||||
"profiler already started");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped,
|
||||
"profiler already stopped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_device, "no device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_device, "invalid device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_image, "invalid image");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_context, "invalid context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_current,
|
||||
"context already current");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(map_failed, "map failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed, "unmap failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped, "array is mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_mapped, "already mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_acquired, "already acquired");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped, "not mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array, "not mapped as array");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit, "unsupported limit");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use, "context already in use");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported,
|
||||
"peer access unsupported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx, "invalid ptx");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context,
|
||||
"invalid graphics context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_source, "invalid source");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(file_not_found, "file not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found,
|
||||
"shared object symbol not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed,
|
||||
"shared object init failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(operating_system, "operating system");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle, "invalid handle");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_found, "not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_ready, "not ready");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_address, "illegal address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources,
|
||||
"launch out of resources");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout, "launch timeout");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing,
|
||||
"launch incompatible texturing");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled,
|
||||
"peer access already enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled,
|
||||
"peer access not enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active, "primary context active");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed, "context is destroyed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(assert_error, "assert");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers, "too many peers");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered,
|
||||
"host memory already registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered,
|
||||
"hot memory not registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error, "hardware stack error");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction, "illegal instruction");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address, "misaligned address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space, "invalid address space");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc, "invalid pc");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_failed, "launch failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_permitted, "not permitted");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUDA_EXCEPTION
|
||||
} // namespace cuda
|
||||
|
||||
namespace cublas {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { \
|
||||
return "CUBLAS: Error- " msg; \
|
||||
} \
|
||||
}
|
||||
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed, "alloc failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch, "arch mismatch");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error, "mapping error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed, "execution failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error, "internal error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(license_error, "license error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUBLAS_EXCEPTION
|
||||
} // namespace cublas
|
||||
|
||||
namespace cudnn {
|
||||
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
|
||||
class name : public std::exception { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "CUDNN: Error- " msg; } \
|
||||
}
|
||||
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed, "allocation failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(bad_param, "bad param");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(internal_error, "internal error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch, "arch mismatch");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error, "mapping error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed, "execution failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(license_error, "license error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing,
|
||||
"prerequisite missing");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress, "runtime in progress");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow, "runtime fp overflow");
|
||||
} // namespace cudnn
|
||||
|
||||
namespace hip {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "HIP: Error- " msg; } \
|
||||
}
|
||||
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_HIP_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(deinitialized, "deinitialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled, "profiler disabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized,
|
||||
"profiler not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started,
|
||||
"profiler already started");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped,
|
||||
"profiler already stopped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_device, "no device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_device, "invalid device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_image, "invalid image");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_context, "invalid context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_current, "context already current");
|
||||
TRITON_CREATE_HIP_EXCEPTION(map_failed, "map failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unmap_failed, "unmap failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped, "array is mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_mapped, "already mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_acquired, "already acquired");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped, "not mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array, "not mapped as array");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
|
||||
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit, "unsupported limit");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use, "context already in use");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported, "peer access unsupported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx, "invalid ptx");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context,
|
||||
"invalid graphics context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_source, "invalid source");
|
||||
TRITON_CREATE_HIP_EXCEPTION(file_not_found, "file not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found,
|
||||
"shared object symbol not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed,
|
||||
"shared object init failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(operating_system, "operating system");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_handle, "invalid handle");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_found, "not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_ready, "not ready");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_address, "illegal address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources, "launch out of resources");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_timeout, "launch timeout");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing,
|
||||
"launch incompatible texturing");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled,
|
||||
"peer access already enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled, "peer access not enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(primary_context_active, "primary context active");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed, "context is destroyed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(assert_error, "assert");
|
||||
TRITON_CREATE_HIP_EXCEPTION(too_many_peers, "too many peers");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered,
|
||||
"host memory already registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered,
|
||||
"hot memory not registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error, "hardware stack error");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction, "illegal instruction");
|
||||
TRITON_CREATE_HIP_EXCEPTION(misaligned_address, "misaligned address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space, "invalid address space");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_pc, "invalid pc");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_failed, "launch failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_permitted, "not permitted");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol, "invalid symbol");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUDA_EXCEPTION
|
||||
} // namespace hip
|
||||
|
||||
} // namespace exception
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,22 +0,0 @@
|
||||
#include "triton/external/CUDA/cuda.h"
|
||||
#include "triton/external/hip.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
class Module;
|
||||
}
|
||||
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
void init_llvm();
|
||||
std::string path_to_ptxas(int &version);
|
||||
std::string llir_to_ptx(llvm::Module *module, int cc, int version);
|
||||
std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas_path,
|
||||
int cc);
|
||||
CUmodule ptx_to_cumodule(const std::string &ptx, int cc);
|
||||
std::string llir_to_amdgpu(llvm::Module *module, const std::string &proc);
|
||||
hipModule_t amdgpu_to_hipmodule(const std::string &path);
|
||||
|
||||
} // namespace driver
|
||||
} // namespace triton
|
18994
include/triton/external/CUDA/cuda.h
vendored
18994
include/triton/external/CUDA/cuda.h
vendored
File diff suppressed because it is too large
Load Diff
6281
include/triton/external/CUDA/nvml.h
vendored
6281
include/triton/external/CUDA/nvml.h
vendored
File diff suppressed because it is too large
Load Diff
3067
include/triton/external/half.hpp
vendored
3067
include/triton/external/half.hpp
vendored
File diff suppressed because it is too large
Load Diff
293
include/triton/external/hip.h
vendored
293
include/triton/external/hip.h
vendored
@@ -1,293 +0,0 @@
|
||||
#ifndef __external_hip_h__
|
||||
#define __external_hip_h__
|
||||
|
||||
/*
|
||||
* @brief hipError_t
|
||||
* @enum
|
||||
* @ingroup Enumerations
|
||||
*/
|
||||
// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
|
||||
// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
|
||||
|
||||
// Ignoring error-code return values from hip APIs is discouraged. On C++17,
|
||||
// we can make that yield a warning
|
||||
|
||||
/*
|
||||
* @brief hipError_t
|
||||
* @enum
|
||||
* @ingroup Enumerations
|
||||
*/
|
||||
// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
|
||||
// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
typedef enum hipError_t {
|
||||
hipSuccess = 0, ///< Successful completion.
|
||||
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
|
||||
///< or not in an acceptable range.
|
||||
hipErrorOutOfMemory = 2,
|
||||
// Deprecated
|
||||
hipErrorMemoryAllocation = 2, ///< Memory allocation error.
|
||||
hipErrorNotInitialized = 3,
|
||||
// Deprecated
|
||||
hipErrorInitializationError = 3,
|
||||
hipErrorDeinitialized = 4,
|
||||
hipErrorProfilerDisabled = 5,
|
||||
hipErrorProfilerNotInitialized = 6,
|
||||
hipErrorProfilerAlreadyStarted = 7,
|
||||
hipErrorProfilerAlreadyStopped = 8,
|
||||
hipErrorInvalidConfiguration = 9,
|
||||
hipErrorInvalidPitchValue = 12,
|
||||
hipErrorInvalidSymbol = 13,
|
||||
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
|
||||
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
|
||||
hipErrorInsufficientDriver = 35,
|
||||
hipErrorMissingConfiguration = 52,
|
||||
hipErrorPriorLaunchFailure = 53,
|
||||
hipErrorInvalidDeviceFunction = 98,
|
||||
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
|
||||
hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices.
|
||||
hipErrorInvalidImage = 200,
|
||||
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
|
||||
hipErrorContextAlreadyCurrent = 202,
|
||||
hipErrorMapFailed = 205,
|
||||
// Deprecated
|
||||
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
|
||||
hipErrorUnmapFailed = 206,
|
||||
hipErrorArrayIsMapped = 207,
|
||||
hipErrorAlreadyMapped = 208,
|
||||
hipErrorNoBinaryForGpu = 209,
|
||||
hipErrorAlreadyAcquired = 210,
|
||||
hipErrorNotMapped = 211,
|
||||
hipErrorNotMappedAsArray = 212,
|
||||
hipErrorNotMappedAsPointer = 213,
|
||||
hipErrorECCNotCorrectable = 214,
|
||||
hipErrorUnsupportedLimit = 215,
|
||||
hipErrorContextAlreadyInUse = 216,
|
||||
hipErrorPeerAccessUnsupported = 217,
|
||||
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
|
||||
hipErrorInvalidGraphicsContext = 219,
|
||||
hipErrorInvalidSource = 300,
|
||||
hipErrorFileNotFound = 301,
|
||||
hipErrorSharedObjectSymbolNotFound = 302,
|
||||
hipErrorSharedObjectInitFailed = 303,
|
||||
hipErrorOperatingSystem = 304,
|
||||
hipErrorInvalidHandle = 400,
|
||||
// Deprecated
|
||||
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
|
||||
hipErrorNotFound = 500,
|
||||
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
|
||||
///< ready. This is not actually an error, but is used to distinguish
|
||||
///< from hipSuccess (which indicates completion). APIs that return
|
||||
///< this error include hipEventQuery and hipStreamQuery.
|
||||
hipErrorIllegalAddress = 700,
|
||||
hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
|
||||
hipErrorLaunchTimeOut = 702,
|
||||
hipErrorPeerAccessAlreadyEnabled =
|
||||
704, ///< Peer access was already enabled from the current device.
|
||||
hipErrorPeerAccessNotEnabled =
|
||||
705, ///< Peer access was never enabled from the current device.
|
||||
hipErrorSetOnActiveProcess = 708,
|
||||
hipErrorAssert = 710, ///< Produced when the kernel calls assert.
|
||||
hipErrorHostMemoryAlreadyRegistered =
|
||||
712, ///< Produced when trying to lock a page-locked memory.
|
||||
hipErrorHostMemoryNotRegistered =
|
||||
713, ///< Produced when trying to unlock a non-page-locked memory.
|
||||
hipErrorLaunchFailure =
|
||||
719, ///< An exception occurred on the device while executing a kernel.
|
||||
hipErrorCooperativeLaunchTooLarge =
|
||||
720, ///< This error indicates that the number of blocks launched per grid for a kernel
|
||||
///< that was launched via cooperative launch APIs exceeds the maximum number of
|
||||
///< allowed blocks for the current device
|
||||
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
|
||||
hipErrorUnknown = 999, //< Unknown error.
|
||||
// HSA Runtime Error Codes start here.
|
||||
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
|
||||
///< in production systems.
|
||||
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
|
||||
///< not seen in production systems.
|
||||
hipErrorTbd ///< Marker that more error codes are needed.
|
||||
} hipError_t;
|
||||
|
||||
|
||||
typedef struct ihipCtx_t* hipCtx_t;
|
||||
|
||||
// Note many APIs also use integer deviceIds as an alternative to the device pointer:
|
||||
typedef int hipDevice_t;
|
||||
|
||||
typedef enum hipDeviceP2PAttr {
|
||||
hipDevP2PAttrPerformanceRank = 0,
|
||||
hipDevP2PAttrAccessSupported,
|
||||
hipDevP2PAttrNativeAtomicSupported,
|
||||
hipDevP2PAttrHipArrayAccessSupported
|
||||
} hipDeviceP2PAttr;
|
||||
|
||||
typedef struct ihipStream_t* hipStream_t;
|
||||
|
||||
#define hipIpcMemLazyEnablePeerAccess 0
|
||||
|
||||
#define HIP_IPC_HANDLE_SIZE 64
|
||||
|
||||
typedef struct hipIpcMemHandle_st {
|
||||
char reserved[HIP_IPC_HANDLE_SIZE];
|
||||
} hipIpcMemHandle_t;
|
||||
|
||||
typedef struct hipIpcEventHandle_st {
|
||||
char reserved[HIP_IPC_HANDLE_SIZE];
|
||||
} hipIpcEventHandle_t;
|
||||
|
||||
typedef struct ihipModule_t* hipModule_t;
|
||||
|
||||
typedef struct ihipModuleSymbol_t* hipFunction_t;
|
||||
|
||||
typedef struct hipFuncAttributes {
|
||||
int binaryVersion;
|
||||
int cacheModeCA;
|
||||
size_t constSizeBytes;
|
||||
size_t localSizeBytes;
|
||||
int maxDynamicSharedSizeBytes;
|
||||
int maxThreadsPerBlock;
|
||||
int numRegs;
|
||||
int preferredShmemCarveout;
|
||||
int ptxVersion;
|
||||
size_t sharedSizeBytes;
|
||||
} hipFuncAttributes;
|
||||
|
||||
typedef struct ihipEvent_t* hipEvent_t;
|
||||
|
||||
/*
|
||||
* @brief hipDeviceAttribute_t
|
||||
* @enum
|
||||
* @ingroup Enumerations
|
||||
*/
|
||||
typedef enum hipDeviceAttribute_t {
|
||||
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
|
||||
hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block.
|
||||
hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block.
|
||||
hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block.
|
||||
hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid.
|
||||
hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid.
|
||||
hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid.
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in
|
||||
///< bytes.
|
||||
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
|
||||
hipDeviceAttributeWarpSize, ///< Warp size in threads.
|
||||
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a
|
||||
///< thread block. This number is shared by all thread
|
||||
///< blocks simultaneously resident on a
|
||||
///< multiprocessor.
|
||||
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
|
||||
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
|
||||
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
|
||||
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
|
||||
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2
|
||||
///< cache.
|
||||
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per
|
||||
///< multiprocessor.
|
||||
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
|
||||
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
|
||||
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels
|
||||
///< concurrently.
|
||||
hipDeviceAttributePciBusId, ///< PCI Bus ID.
|
||||
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
|
||||
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per
|
||||
///< Multiprocessor.
|
||||
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
|
||||
hipDeviceAttributeIntegrated, ///< iGPU
|
||||
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
|
||||
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
|
||||
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images
|
||||
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements
|
||||
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimensions height of 3D images in image elements
|
||||
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimensions depth of 3D images in image elements
|
||||
|
||||
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
|
||||
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
|
||||
hipDeviceAttributeTextureAlignment, ///<Alignment requirement for textures
|
||||
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
|
||||
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
|
||||
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched functions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched grid dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched block dimensions
|
||||
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
|
||||
///devices with unmatched shared memories
|
||||
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
|
||||
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
|
||||
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
|
||||
/// the device without migration
|
||||
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory
|
||||
/// concurrently with the CPU
|
||||
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
|
||||
/// without calling hipHostRegister on it
|
||||
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via
|
||||
/// the host's page tables
|
||||
hipDeviceAttributeCanUseStreamWaitValue ///< '1' if Device supports hipStreamWaitValue32() and
|
||||
///< hipStreamWaitValue64() , '0' otherwise.
|
||||
|
||||
} hipDeviceAttribute_t;
|
||||
|
||||
typedef void* hipDeviceptr_t;
|
||||
|
||||
/*
|
||||
* @brief hipJitOption
|
||||
* @enum
|
||||
* @ingroup Enumerations
|
||||
*/
|
||||
typedef enum hipJitOption {
|
||||
hipJitOptionMaxRegisters = 0,
|
||||
hipJitOptionThreadsPerBlock,
|
||||
hipJitOptionWallTime,
|
||||
hipJitOptionInfoLogBuffer,
|
||||
hipJitOptionInfoLogBufferSizeBytes,
|
||||
hipJitOptionErrorLogBuffer,
|
||||
hipJitOptionErrorLogBufferSizeBytes,
|
||||
hipJitOptionOptimizationLevel,
|
||||
hipJitOptionTargetFromContext,
|
||||
hipJitOptionTarget,
|
||||
hipJitOptionFallbackStrategy,
|
||||
hipJitOptionGenerateDebugInfo,
|
||||
hipJitOptionLogVerbose,
|
||||
hipJitOptionGenerateLineInfo,
|
||||
hipJitOptionCacheMode,
|
||||
hipJitOptionSm3xOpt,
|
||||
hipJitOptionFastCompile,
|
||||
hipJitOptionNumOptions
|
||||
} hipJitOption;
|
||||
|
||||
/**
|
||||
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
|
||||
*/
|
||||
typedef enum hipFuncAttribute {
|
||||
hipFuncAttributeMaxDynamicSharedMemorySize = 8,
|
||||
hipFuncAttributePreferredSharedMemoryCarveout = 9,
|
||||
hipFuncAttributeMax
|
||||
} hipFuncAttribute;
|
||||
|
||||
/**
|
||||
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
|
||||
*/
|
||||
typedef enum hipFuncCache_t {
|
||||
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
|
||||
hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache
|
||||
hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory
|
||||
hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory
|
||||
} hipFuncCache_t;
|
||||
|
||||
|
||||
#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
|
||||
#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
|
||||
#define HIP_LAUNCH_PARAM_END ((void*)0x03)
|
||||
|
||||
#endif
|
@@ -1,57 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef _TRITON_TOOLS_BENCH_H_
|
||||
#define _TRITON_TOOLS_BENCH_H_
|
||||
|
||||
#include "triton/driver/device.h"
|
||||
#include "triton/driver/stream.h"
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
|
||||
namespace triton {
|
||||
namespace tools {
|
||||
|
||||
class timer {
|
||||
typedef std::chrono::high_resolution_clock high_resolution_clock;
|
||||
typedef std::chrono::nanoseconds nanoseconds;
|
||||
|
||||
public:
|
||||
explicit timer(bool run = false) {
|
||||
if (run)
|
||||
start();
|
||||
}
|
||||
|
||||
void start() { _start = high_resolution_clock::now(); }
|
||||
|
||||
nanoseconds get() const {
|
||||
return std::chrono::duration_cast<nanoseconds>(
|
||||
high_resolution_clock::now() - _start);
|
||||
}
|
||||
|
||||
private:
|
||||
high_resolution_clock::time_point _start;
|
||||
};
|
||||
|
||||
inline double bench(std::function<void()> const &op, driver::stream *stream,
|
||||
size_t warmup = 10, size_t repeat = 200) {
|
||||
timer tmr;
|
||||
std::vector<size_t> times;
|
||||
double total_time = 0;
|
||||
for (size_t i = 0; i < warmup; i++)
|
||||
op();
|
||||
stream->synchronize();
|
||||
tmr.start();
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
op();
|
||||
}
|
||||
stream->synchronize();
|
||||
return (float)tmr.get().count() / repeat;
|
||||
|
||||
// return *std::min_element(times.begin(), times.end());
|
||||
}
|
||||
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,68 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef _TRITON_TOOLS_THREAD_GRAPH_H_
|
||||
#define _TRITON_TOOLS_THREAD_GRAPH_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace triton {
|
||||
namespace tools {
|
||||
|
||||
template <class node_t> class graph {
|
||||
typedef std::map<node_t, std::set<node_t>> edges_t;
|
||||
|
||||
public:
|
||||
typedef std::map<size_t, std::vector<node_t>> cmap_t;
|
||||
typedef std::map<node_t, size_t> nmap_t;
|
||||
|
||||
private:
|
||||
void connected_components_impl(node_t x, std::set<node_t> &nodes,
|
||||
nmap_t *nmap, cmap_t *cmap, int id) const {
|
||||
if (nmap)
|
||||
(*nmap)[x] = id;
|
||||
if (cmap)
|
||||
(*cmap)[id].push_back(x);
|
||||
if (nodes.find(x) != nodes.end()) {
|
||||
nodes.erase(x);
|
||||
for (const node_t &y : edges_.at(x))
|
||||
connected_components_impl(y, nodes, nmap, cmap, id);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void connected_components(cmap_t *cmap, nmap_t *nmap) const {
|
||||
if (cmap)
|
||||
cmap->clear();
|
||||
if (nmap)
|
||||
nmap->clear();
|
||||
std::set<node_t> nodes = nodes_;
|
||||
unsigned id = 0;
|
||||
while (!nodes.empty()) {
|
||||
connected_components_impl(*nodes.begin(), nodes, nmap, cmap, id++);
|
||||
}
|
||||
}
|
||||
|
||||
void add_edge(node_t x, node_t y) {
|
||||
nodes_.insert(x);
|
||||
nodes_.insert(y);
|
||||
edges_[x].insert(y);
|
||||
edges_[y].insert(x);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
nodes_.clear();
|
||||
edges_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::set<node_t> nodes_;
|
||||
edges_t edges_;
|
||||
};
|
||||
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,172 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2011, Micael Hildenborg
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Micael Hildenborg nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY Micael Hildenborg ''AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL Micael Hildenborg BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Contributors:
|
||||
Gustav
|
||||
Several members in the gamedev.se forum.
|
||||
Gregory Petrosyan
|
||||
*/
|
||||
|
||||
#ifndef _TRITON_TOOLS_SHA1_HPP_
|
||||
#define _TRITON_TOOLS_SHA1_HPP_
|
||||
|
||||
namespace sha1 {
|
||||
namespace // local
|
||||
{
|
||||
// Rotate an integer value to left.
|
||||
inline unsigned int rol(const unsigned int value, const unsigned int steps) {
|
||||
return ((value << steps) | (value >> (32 - steps)));
|
||||
}
|
||||
|
||||
// Sets the first 16 integers in the buffert to zero.
|
||||
// Used for clearing the W buffert.
|
||||
inline void clearWBuffert(unsigned int *buffert) {
|
||||
for (int pos = 16; --pos >= 0;) {
|
||||
buffert[pos] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void innerHash(unsigned int *result, unsigned int *w) {
|
||||
unsigned int a = result[0];
|
||||
unsigned int b = result[1];
|
||||
unsigned int c = result[2];
|
||||
unsigned int d = result[3];
|
||||
unsigned int e = result[4];
|
||||
|
||||
int round = 0;
|
||||
|
||||
#define sha1macro(func, val) \
|
||||
{ \
|
||||
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
|
||||
e = d; \
|
||||
d = c; \
|
||||
c = rol(b, 30); \
|
||||
b = a; \
|
||||
a = t; \
|
||||
}
|
||||
|
||||
while (round < 16) {
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
|
||||
}
|
||||
while (round < 20) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
|
||||
}
|
||||
while (round < 40) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0x6ed9eba1)++ round;
|
||||
}
|
||||
while (round < 60) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)++ round;
|
||||
}
|
||||
while (round < 80) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0xca62c1d6)++ round;
|
||||
}
|
||||
|
||||
#undef sha1macro
|
||||
|
||||
result[0] += a;
|
||||
result[1] += b;
|
||||
result[2] += c;
|
||||
result[3] += d;
|
||||
result[4] += e;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
inline void calc(const void *src, const int bytelength, unsigned char *hash) {
|
||||
// Init the result array.
|
||||
unsigned int result[5] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476,
|
||||
0xc3d2e1f0};
|
||||
|
||||
// Cast the void src pointer to be the byte array we can work with.
|
||||
const unsigned char *sarray = (const unsigned char *)src;
|
||||
|
||||
// The reusable round buffer
|
||||
unsigned int w[80];
|
||||
|
||||
// Loop through all complete 64byte blocks.
|
||||
const int endOfFullBlocks = bytelength - 64;
|
||||
int endCurrentBlock;
|
||||
int currentBlock = 0;
|
||||
|
||||
while (currentBlock <= endOfFullBlocks) {
|
||||
endCurrentBlock = currentBlock + 64;
|
||||
|
||||
// Init the round buffer with the 64 byte block data.
|
||||
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4) {
|
||||
// This line will swap endian on big endian and keep endian on little
|
||||
// endian.
|
||||
w[roundPos++] = (unsigned int)sarray[currentBlock + 3] |
|
||||
(((unsigned int)sarray[currentBlock + 2]) << 8) |
|
||||
(((unsigned int)sarray[currentBlock + 1]) << 16) |
|
||||
(((unsigned int)sarray[currentBlock]) << 24);
|
||||
}
|
||||
innerHash(result, w);
|
||||
}
|
||||
|
||||
// Handle the last and not full 64 byte block if existing.
|
||||
endCurrentBlock = bytelength - currentBlock;
|
||||
clearWBuffert(w);
|
||||
int lastBlockBytes = 0;
|
||||
for (; lastBlockBytes < endCurrentBlock; ++lastBlockBytes) {
|
||||
w[lastBlockBytes >> 2] |=
|
||||
(unsigned int)sarray[lastBlockBytes + currentBlock]
|
||||
<< ((3 - (lastBlockBytes & 3)) << 3);
|
||||
}
|
||||
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
|
||||
if (endCurrentBlock >= 56) {
|
||||
innerHash(result, w);
|
||||
clearWBuffert(w);
|
||||
}
|
||||
w[15] = bytelength << 3;
|
||||
innerHash(result, w);
|
||||
|
||||
// Store hash in result pointer, and make sure we get in in the correct order
|
||||
// on both endian models.
|
||||
for (int hashByte = 20; --hashByte >= 0;) {
|
||||
hash[hashByte] =
|
||||
(result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
inline void toHexString(const unsigned char *hash, char *hexstring) {
|
||||
const char hexDigits[] = {"0123456789abcdef"};
|
||||
|
||||
for (int hashByte = 20; --hashByte >= 0;) {
|
||||
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
|
||||
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
|
||||
}
|
||||
hexstring[40] = 0;
|
||||
}
|
||||
} // namespace sha1
|
||||
|
||||
#endif
|
@@ -1,42 +0,0 @@
|
||||
#ifndef TRITON_TOOLS_SYS_EXEC_HPP
|
||||
#define TRITON_TOOLS_SYS_EXEC_HPP
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace triton {
|
||||
namespace tools {
|
||||
|
||||
#ifdef _WIN32
|
||||
#define popen _popen
|
||||
#define pclose _pclose
|
||||
#endif
|
||||
|
||||
#ifndef WEXITSTATUS
|
||||
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val)&255)
|
||||
#endif
|
||||
|
||||
int exec(const std::string &cmd, std::string &result) {
|
||||
char buffer[128];
|
||||
FILE *pipe = popen(cmd.c_str(), "r");
|
||||
if (!pipe)
|
||||
return 0;
|
||||
result.clear();
|
||||
try {
|
||||
while (fgets(buffer, sizeof buffer, pipe) != NULL)
|
||||
result += buffer;
|
||||
} catch (...) {
|
||||
pclose(pipe);
|
||||
return 0;
|
||||
}
|
||||
int status = pclose(pipe);
|
||||
return WEXITSTATUS(status);
|
||||
}
|
||||
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
|
||||
*
|
||||
* This file is part of ISAAC.
|
||||
*
|
||||
* ISAAC is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef TDL_TOOLS_SYS_MKDIR_HPP
|
||||
#define TDL_TOOLS_SYS_MKDIR_HPP
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#if defined(_WIN32)
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
namespace triton {
|
||||
|
||||
namespace tools {
|
||||
|
||||
inline int mkdir(std::string const &path) {
|
||||
#if defined(_WIN32)
|
||||
return _mkdir(path.c_str());
|
||||
#else
|
||||
return ::mkdir(path.c_str(), 0777);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int mkpath(std::string const &path) {
|
||||
int status = 0;
|
||||
size_t pp = 0;
|
||||
size_t sp;
|
||||
while ((sp = path.find('/', pp)) != std::string::npos) {
|
||||
if (sp != pp) {
|
||||
status = mkdir(path.substr(0, sp));
|
||||
}
|
||||
pp = sp + 1;
|
||||
}
|
||||
return (status == 0 || errno == EEXIST) ? 0 : -1;
|
||||
}
|
||||
|
||||
inline int mtime(std::string const &path) {
|
||||
struct stat st;
|
||||
if (stat(path.c_str(), &st) != 0)
|
||||
return 0;
|
||||
return st.st_mtime;
|
||||
}
|
||||
|
||||
} // namespace tools
|
||||
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
@@ -1,81 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef _TRITON_TOOLS_THREAD_POOL_H_
|
||||
#define _TRITON_TOOLS_THREAD_POOL_H_
|
||||
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool(size_t threads) : stop(false) {
|
||||
for (size_t i = 0; i < threads; ++i)
|
||||
workers.emplace_back([this] {
|
||||
for (;;) {
|
||||
std::function<void()> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||
this->condition.wait(
|
||||
lock, [this] { return this->stop || !this->tasks.empty(); });
|
||||
if (this->stop && this->tasks.empty())
|
||||
return;
|
||||
task = std::move(this->tasks.front());
|
||||
this->tasks.pop();
|
||||
}
|
||||
task();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class F, class... Args>
|
||||
auto enqueue(F &&f, Args &&...args)
|
||||
-> std::future<typename std::result_of<F(Args...)>::type> {
|
||||
using return_type = typename std::result_of<F(Args...)>::type;
|
||||
|
||||
auto task = std::make_shared<std::packaged_task<return_type()>>(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||
|
||||
std::future<return_type> res = task->get_future();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
|
||||
// don't allow enqueueing after stopping the pool
|
||||
if (stop)
|
||||
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||
|
||||
tasks.emplace([task]() { (*task)(); });
|
||||
}
|
||||
condition.notify_one();
|
||||
return res;
|
||||
}
|
||||
|
||||
~ThreadPool() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
stop = true;
|
||||
}
|
||||
condition.notify_all();
|
||||
for (std::thread &worker : workers)
|
||||
worker.join();
|
||||
}
|
||||
|
||||
private:
|
||||
// need to keep track of threads so we can join them
|
||||
std::vector<std::thread> workers;
|
||||
// the task queue
|
||||
std::queue<std::function<void()>> tasks;
|
||||
|
||||
// synchronization
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable condition;
|
||||
bool stop;
|
||||
};
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user