[CI] run clang-format (#24)
This commit is contained in:
@@ -10,7 +10,6 @@
|
||||
|
||||
namespace mlir {
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AxisInfo
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -25,26 +24,25 @@ public:
|
||||
|
||||
public:
|
||||
// Default constructor
|
||||
AxisInfo(): AxisInfo({}, {}, {}) { }
|
||||
AxisInfo() : AxisInfo({}, {}, {}) {}
|
||||
// Construct contiguity info with known contiguity
|
||||
AxisInfo(ContiguityT knownContiguity, DivisibilityT knownDivisibility,
|
||||
ConstancyT knownConstancy)
|
||||
: contiguity(knownContiguity), divisibility(knownDivisibility),
|
||||
constancy(knownConstancy), rank(contiguity.size()) {
|
||||
assert(knownDivisibility.size() == rank);
|
||||
assert(knownConstancy.size() == rank);
|
||||
}
|
||||
|
||||
|
||||
: contiguity(knownContiguity), divisibility(knownDivisibility),
|
||||
constancy(knownConstancy), rank(contiguity.size()) {
|
||||
assert(knownDivisibility.size() == rank);
|
||||
assert(knownConstancy.size() == rank);
|
||||
}
|
||||
|
||||
// Accessors
|
||||
int getContiguity(size_t d) const { return contiguity[d]; }
|
||||
const ContiguityT& getContiguity() const { return contiguity; }
|
||||
int getContiguity(size_t d) const { return contiguity[d]; }
|
||||
const ContiguityT &getContiguity() const { return contiguity; }
|
||||
|
||||
int getDivisibility(size_t d) const { return divisibility[d]; }
|
||||
const DivisibilityT& getDivisibility() const { return divisibility; }
|
||||
const DivisibilityT &getDivisibility() const { return divisibility; }
|
||||
|
||||
int getConstancy(size_t d) const { return constancy[d]; }
|
||||
const ConstancyT& getConstancy() const { return constancy; }
|
||||
int getConstancy(size_t d) const { return constancy[d]; }
|
||||
const ConstancyT &getConstancy() const { return constancy; }
|
||||
|
||||
int getRank() const { return rank; }
|
||||
|
||||
@@ -56,13 +54,13 @@ public:
|
||||
}
|
||||
|
||||
/// The pessimistic value state of the contiguity is unknown.
|
||||
static AxisInfo getPessimisticValueState(MLIRContext *context)
|
||||
{ return AxisInfo(); }
|
||||
static AxisInfo getPessimisticValueState(MLIRContext *context) {
|
||||
return AxisInfo();
|
||||
}
|
||||
static AxisInfo getPessimisticValueState(Value value);
|
||||
|
||||
// The gcd of both arguments for each dimension
|
||||
static AxisInfo join(const AxisInfo &lhs,
|
||||
const AxisInfo &rhs);
|
||||
static AxisInfo join(const AxisInfo &lhs, const AxisInfo &rhs);
|
||||
|
||||
private:
|
||||
/// The _contiguity_ information maps the `d`-th
|
||||
@@ -81,7 +79,7 @@ private:
|
||||
/// [19, 23, 27, 31]
|
||||
/// Would have contiguity [2, 1].
|
||||
ContiguityT contiguity;
|
||||
|
||||
|
||||
/// The _divisibility_ information maps the `d`-th
|
||||
/// dimension to the largest power-of-two that
|
||||
/// divides the first element of all the values along it
|
||||
@@ -107,39 +105,36 @@ private:
|
||||
/// [16, 16, 16, 16, 20, 20, 20, 20]
|
||||
/// would have constancy [1, 4]
|
||||
ConstancyT constancy;
|
||||
|
||||
|
||||
// number of dimensions of the lattice
|
||||
int rank;
|
||||
};
|
||||
|
||||
|
||||
class AxisInfoAnalysis
|
||||
: public ForwardDataFlowAnalysis<AxisInfo> {
|
||||
class AxisInfoAnalysis : public ForwardDataFlowAnalysis<AxisInfo> {
|
||||
|
||||
private:
|
||||
static const int maxPow2Divisor = 65536;
|
||||
|
||||
int highestPowOf2Divisor(int n){
|
||||
if(n==0)
|
||||
|
||||
int highestPowOf2Divisor(int n) {
|
||||
if (n == 0)
|
||||
return maxPow2Divisor;
|
||||
return (n & (~(n - 1)));
|
||||
}
|
||||
|
||||
AxisInfo visitBinaryOp(Operation* op, AxisInfo lhsInfo, AxisInfo rhsInfo,
|
||||
const std::function<int(AxisInfo,AxisInfo,int)>& getContiguity,
|
||||
const std::function<int(AxisInfo,AxisInfo,int)>& getDivisibility,
|
||||
const std::function<int(AxisInfo,AxisInfo,int)>& getConstancy);
|
||||
AxisInfo visitBinaryOp(
|
||||
Operation *op, AxisInfo lhsInfo, AxisInfo rhsInfo,
|
||||
const std::function<int(AxisInfo, AxisInfo, int)> &getContiguity,
|
||||
const std::function<int(AxisInfo, AxisInfo, int)> &getDivisibility,
|
||||
const std::function<int(AxisInfo, AxisInfo, int)> &getConstancy);
|
||||
|
||||
public:
|
||||
using ForwardDataFlowAnalysis<AxisInfo>::ForwardDataFlowAnalysis;
|
||||
|
||||
ChangeResult visitOperation(Operation *op,
|
||||
ArrayRef<LatticeElement<AxisInfo> *> operands) override;
|
||||
|
||||
ChangeResult
|
||||
visitOperation(Operation *op,
|
||||
ArrayRef<LatticeElement<AxisInfo> *> operands) override;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
@@ -3,17 +3,13 @@
|
||||
|
||||
#include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPU.h"
|
||||
|
||||
namespace mlir
|
||||
{
|
||||
namespace triton
|
||||
{
|
||||
namespace mlir {
|
||||
namespace triton {
|
||||
|
||||
#define GEN_PASS_REGISTRATION
|
||||
#include "triton/Conversion/Passes.h.inc"
|
||||
|
||||
|
||||
} // namespace triton
|
||||
} // namespace mlir
|
||||
|
||||
|
||||
#endif
|
@@ -3,18 +3,17 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace mlir{
|
||||
namespace mlir {
|
||||
|
||||
class ModuleOp;
|
||||
template <typename T> class OperationPass;
|
||||
|
||||
namespace triton{
|
||||
namespace triton {
|
||||
|
||||
std::unique_ptr<OperationPass<ModuleOp>>
|
||||
std::unique_ptr<OperationPass<ModuleOp>>
|
||||
createConvertTritonToTritonGPUPass(int numWarps = 4);
|
||||
|
||||
}
|
||||
} // namespace mlir
|
||||
|
||||
|
||||
#endif
|
@@ -1,17 +1,16 @@
|
||||
#ifndef TRITON_DIALECT_TRITON_IR_DIALECT_H_
|
||||
#define TRITON_DIALECT_TRITON_IR_DIALECT_H_
|
||||
|
||||
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/Dialect.h"
|
||||
#include "mlir/Interfaces/ControlFlowInterfaces.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
|
||||
#include "triton/Dialect/Triton/IR/Traits.h"
|
||||
#include "triton/Dialect/Triton/IR/Types.h"
|
||||
#include "triton/Dialect/Triton/IR/Dialect.h.inc"
|
||||
#include "triton/Dialect/Triton/IR/OpsEnums.h.inc"
|
||||
#include "triton/Dialect/Triton/IR/Traits.h"
|
||||
#include "triton/Dialect/Triton/IR/Types.h"
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "triton/Dialect/Triton/IR/Ops.h.inc"
|
||||
|
@@ -19,7 +19,7 @@ public:
|
||||
static LogicalResult verifyTrait(Operation *op) {
|
||||
// The rationale for this number is to prevent users from creating programs
|
||||
// that would have catastrophic register pressure and cause the compiler to
|
||||
// hang.
|
||||
// hang.
|
||||
// Since H100 has 256KB registers, we should allow users to create tensors
|
||||
// of size up to 256K elements. It will spill for datatypes wider than 1B,
|
||||
// but we probably should limit number of elements (rather than bytes) to
|
||||
@@ -31,8 +31,8 @@ public:
|
||||
for (int64_t s : tensorType.getShape())
|
||||
numElements *= s;
|
||||
if (numElements > maxElement)
|
||||
return op->emitError("Maximum allowed number of elements is ") << maxElement << ", but "
|
||||
<< *op << " has more than that";
|
||||
return op->emitError("Maximum allowed number of elements is ")
|
||||
<< maxElement << ", but " << *op << " has more than that";
|
||||
if ((numElements & (numElements - 1)) != 0)
|
||||
return op->emitError("Number of elements must be power-of-two, but ")
|
||||
<< *op << " doesn't follow the rule";
|
||||
@@ -45,8 +45,8 @@ public:
|
||||
for (int64_t s : tensorType.getShape())
|
||||
numElements *= s;
|
||||
if (numElements > maxElement)
|
||||
return op->emitError("Maximum allowed number of elements is ") << maxElement << ", but "
|
||||
<< *op << " has more than that";
|
||||
return op->emitError("Maximum allowed number of elements is ")
|
||||
<< maxElement << ", but " << *op << " has more than that";
|
||||
if ((numElements & (numElements - 1)) != 0)
|
||||
return op->emitError("Number of elements must be power-of-two, but ")
|
||||
<< *op << " doesn't follow the rule";
|
||||
@@ -57,7 +57,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace OpTrait
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
|
@@ -13,6 +13,6 @@ std::unique_ptr<Pass> createCombineOpsPass();
|
||||
#define GEN_PASS_REGISTRATION
|
||||
#include "triton/Dialect/Triton/Transforms/Passes.h.inc"
|
||||
|
||||
}
|
||||
} // namespace mlir
|
||||
|
||||
#endif
|
||||
|
@@ -15,5 +15,4 @@
|
||||
#define GET_OP_CLASSES
|
||||
#include "triton/Dialect/TritonGPU/IR/Ops.h.inc"
|
||||
|
||||
|
||||
#endif // TRITON_DIALECT_TRITONGPU_IR_DIALECT_H_
|
||||
|
@@ -14,6 +14,7 @@ namespace mlir {
|
||||
class TritonGPUTypeConverter : public TypeConverter {
|
||||
public:
|
||||
TritonGPUTypeConverter(MLIRContext *context, int numThreads);
|
||||
|
||||
private:
|
||||
MLIRContext *context;
|
||||
int numThreads;
|
||||
@@ -21,8 +22,10 @@ private:
|
||||
|
||||
class TritonGPUConversionTarget : public ConversionTarget {
|
||||
TritonGPUTypeConverter &typeConverter;
|
||||
|
||||
public:
|
||||
explicit TritonGPUConversionTarget(MLIRContext &ctx, TritonGPUTypeConverter &typeConverter);
|
||||
explicit TritonGPUConversionTarget(MLIRContext &ctx,
|
||||
TritonGPUTypeConverter &typeConverter);
|
||||
|
||||
/// update layouts & insert ConvertLayoutOp if necessary
|
||||
LogicalResult refineLayouts(ModuleOp mod, int numThreads);
|
||||
|
387
include/triton/driver/dispatch.h
Executable file → Normal file
387
include/triton/driver/dispatch.h
Executable file → Normal file
@@ -3,10 +3,10 @@
|
||||
#ifndef _TRITON_DRIVER_DISPATCH_H_
|
||||
#define _TRITON_DRIVER_DISPATCH_H_
|
||||
|
||||
#include <type_traits>
|
||||
#include <dlfcn.h>
|
||||
#include <type_traits>
|
||||
|
||||
//CUDA Backend
|
||||
// CUDA Backend
|
||||
#include "triton/external/CUDA/cuda.h"
|
||||
#include "triton/external/CUDA/nvml.h"
|
||||
|
||||
@@ -14,47 +14,43 @@
|
||||
//#define __HIP_PLATFORM_AMD__
|
||||
#include "triton/external/hip.h"
|
||||
|
||||
//Exceptions
|
||||
// Exceptions
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace llvm {
|
||||
class PassRegistry;
|
||||
class Module;
|
||||
}
|
||||
} // namespace llvm
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
class cu_context;
|
||||
|
||||
template<class T> void check(T){}
|
||||
template <class T> void check(T) {}
|
||||
void check(CUresult err);
|
||||
void check(hipError_t err);
|
||||
|
||||
class dispatch
|
||||
{
|
||||
class dispatch {
|
||||
protected:
|
||||
template <class F>
|
||||
struct return_type;
|
||||
template <class F> struct return_type;
|
||||
|
||||
template <class R, class... A>
|
||||
struct return_type<R (*)(A...)>
|
||||
{ typedef R type; };
|
||||
template <class R, class... A> struct return_type<R (*)(A...)> {
|
||||
typedef R type;
|
||||
};
|
||||
|
||||
typedef bool (*f_init_t)();
|
||||
|
||||
template<f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
|
||||
{
|
||||
template <f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type
|
||||
f_impl(void *&lib_h, FunPtrT, void *&cache, const char *name, Args... args) {
|
||||
initializer();
|
||||
if(cache == nullptr){
|
||||
if (cache == nullptr) {
|
||||
cache = dlsym(lib_h, name);
|
||||
if(cache == 0)
|
||||
throw std::runtime_error("dlsym unable to load function");
|
||||
}
|
||||
if (cache == 0)
|
||||
throw std::runtime_error("dlsym unable to load function");
|
||||
}
|
||||
FunPtrT fptr;
|
||||
*reinterpret_cast<void **>(&fptr) = cache;
|
||||
typename return_type<FunPtrT>::type res = (*fptr)(args...);
|
||||
@@ -76,63 +72,99 @@ public:
|
||||
// context management
|
||||
static CUresult cuInit(unsigned int Flags);
|
||||
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags,
|
||||
CUdevice dev);
|
||||
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
|
||||
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
|
||||
static CUresult cuCtxGetDevice(CUdevice* result);
|
||||
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int flags);
|
||||
static CUresult cuCtxGetDevice(CUdevice *result);
|
||||
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext,
|
||||
unsigned int flags);
|
||||
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||
// device management
|
||||
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib,
|
||||
CUdevice dev);
|
||||
static CUresult cuDeviceGetCount(int *count);
|
||||
// link management
|
||||
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues);
|
||||
static CUresult cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut);
|
||||
static CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut);
|
||||
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type,
|
||||
void *data, size_t size, const char *name,
|
||||
unsigned int numOptions,
|
||||
CUjit_option *options, void **optionValues);
|
||||
static CUresult cuLinkCreate_v2(unsigned int numOptions,
|
||||
CUjit_option *options, void **optionValues,
|
||||
CUlinkState *stateOut);
|
||||
static CUresult cuLinkComplete(CUlinkState state, void **cubinOut,
|
||||
size_t *sizeOut);
|
||||
static CUresult cuLinkDestroy(CUlinkState state);
|
||||
// module management
|
||||
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t* bytes, CUmodule hmod, const char *name);
|
||||
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes,
|
||||
CUmodule hmod, const char *name);
|
||||
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||
static CUresult cuModuleLoadData(CUmodule* module, const void* image);
|
||||
static CUresult cuModuleLoadData(CUmodule *module, const void *image);
|
||||
static CUresult cuModuleUnload(CUmodule hmod);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image,
|
||||
unsigned int numOptions,
|
||||
CUjit_option *options,
|
||||
void **optionValues);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod,
|
||||
const char *name);
|
||||
// stream management
|
||||
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx);
|
||||
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
|
||||
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
|
||||
unsigned int gridDimY, unsigned int gridDimZ,
|
||||
unsigned int blockDimX, unsigned int blockDimY,
|
||||
unsigned int blockDimZ,
|
||||
unsigned int sharedMemBytes, CUstream hStream,
|
||||
void **kernelParams, void **extra);
|
||||
// function management
|
||||
static CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
|
||||
static CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
|
||||
static CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib,
|
||||
CUfunction hfunc);
|
||||
static CUresult cuFuncSetAttribute(CUfunction hfunc,
|
||||
CUfunction_attribute attrib, int value);
|
||||
static CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
|
||||
// memory management
|
||||
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||||
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||||
static CUresult cuPointerGetAttribute(void *data,
|
||||
CUpointer_attribute attribute,
|
||||
CUdeviceptr ptr);
|
||||
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N,
|
||||
CUstream stream);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice,
|
||||
size_t ByteCount);
|
||||
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice,
|
||||
size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice,
|
||||
const void *srcHost, size_t ByteCount,
|
||||
CUstream hStream);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost,
|
||||
size_t ByteCount);
|
||||
// event management
|
||||
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart,
|
||||
CUevent hEnd);
|
||||
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||
|
||||
|
||||
/* ------------------- *
|
||||
* NVML
|
||||
* ------------------- */
|
||||
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int mem_clock, unsigned int sm_clock);
|
||||
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId,
|
||||
nvmlDevice_t *device);
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device,
|
||||
nvmlClockType_t type,
|
||||
unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device,
|
||||
nvmlClockType_t type,
|
||||
unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device,
|
||||
unsigned int mem_clock,
|
||||
unsigned int sm_clock);
|
||||
|
||||
/* ------------------- *
|
||||
* HIP
|
||||
@@ -140,177 +172,198 @@ public:
|
||||
// context management
|
||||
static hipError_t hipInit(unsigned int Flags);
|
||||
static hipError_t hipCtxDestroy(hipCtx_t ctx);
|
||||
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
|
||||
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags,
|
||||
hipDevice_t dev);
|
||||
static hipError_t hipCtxPushCurrent(hipCtx_t ctx);
|
||||
static hipError_t hipCtxPopCurrent(hipCtx_t *pctx);
|
||||
static hipError_t hipCtxGetDevice(hipDevice_t* result);
|
||||
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext, unsigned int flags);
|
||||
static hipError_t hipCtxGetDevice(hipDevice_t *result);
|
||||
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext,
|
||||
unsigned int flags);
|
||||
static hipError_t hipDriverGetVersion(int *driverVersion);
|
||||
// device management
|
||||
static hipError_t hipGetDevice(hipDevice_t *device, int ordinal);
|
||||
static hipError_t hipDeviceGetName(char *name, int len, hipDevice_t dev);
|
||||
static hipError_t hipDeviceGetPCIBusId(char *id, int len, hipDevice_t dev);
|
||||
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib,
|
||||
hipDevice_t dev);
|
||||
static hipError_t hipGetDeviceCount(int *count);
|
||||
// module management
|
||||
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t* bytes, hipModule_t hmod, const char *name);
|
||||
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char *name);
|
||||
static hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
|
||||
static hipError_t hipModuleLoadData(hipModule_t* module, const void* image);
|
||||
static hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
|
||||
static hipError_t hipModuleUnload(hipModule_t hmod);
|
||||
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
|
||||
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name);
|
||||
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
|
||||
unsigned int numOptions,
|
||||
hipJitOption *options,
|
||||
void **optionValues);
|
||||
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
const char *name);
|
||||
// stream management
|
||||
static hipError_t hipStreamCreate(hipStream_t *phStream, unsigned int Flags);
|
||||
static hipError_t hipStreamSynchronize(hipStream_t hStream);
|
||||
static hipError_t hipStreamDestroy(hipStream_t hStream);
|
||||
static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
|
||||
static hipError_t
|
||||
hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
|
||||
unsigned int gridDimY, unsigned int gridDimZ,
|
||||
unsigned int blockDimX, unsigned int blockDimY,
|
||||
unsigned int blockDimZ, unsigned int sharedMemBytes,
|
||||
hipStream_t hStream, void **kernelParams, void **extra);
|
||||
// function management
|
||||
static hipError_t hipFuncGetAttributes(hipFuncAttributes* attrib, void* hfunc);
|
||||
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc, hipFuncAttribute attrib, int value);
|
||||
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc, hipFuncCache_t config);
|
||||
static hipError_t hipFuncGetAttributes(hipFuncAttributes *attrib,
|
||||
void *hfunc);
|
||||
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc,
|
||||
hipFuncAttribute attrib, int value);
|
||||
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc,
|
||||
hipFuncCache_t config);
|
||||
// memory management
|
||||
static hipError_t hipMalloc(hipDeviceptr_t *dptr, size_t bytesize);
|
||||
static hipError_t hipPointerGetAttribute(void * data, CUpointer_attribute attribute, hipDeviceptr_t ptr);
|
||||
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x, size_t N, hipStream_t stream);
|
||||
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
|
||||
static hipError_t hipPointerGetAttribute(void *data,
|
||||
CUpointer_attribute attribute,
|
||||
hipDeviceptr_t ptr);
|
||||
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x,
|
||||
size_t N, hipStream_t stream);
|
||||
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice,
|
||||
size_t ByteCount);
|
||||
static hipError_t hipFree(hipDeviceptr_t dptr);
|
||||
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
|
||||
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice,
|
||||
size_t ByteCount, hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice,
|
||||
const void *srcHost, size_t ByteCount,
|
||||
hipStream_t hStream);
|
||||
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost,
|
||||
size_t ByteCount);
|
||||
// event management
|
||||
static hipError_t hipEventCreate(hipEvent_t *phEvent, unsigned int Flags);
|
||||
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
|
||||
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart,
|
||||
hipEvent_t hEnd);
|
||||
static hipError_t hipEventRecord(hipEvent_t hEvent, hipStream_t hStream);
|
||||
static hipError_t hipEventDestroy(hipEvent_t hEvent);
|
||||
|
||||
|
||||
|
||||
private:
|
||||
|
||||
// Libraries
|
||||
static void* cuda_;
|
||||
static void* nvml_;
|
||||
static void* hip_;
|
||||
|
||||
static void *cuda_;
|
||||
static void *nvml_;
|
||||
static void *hip_;
|
||||
|
||||
/* ------------------- *
|
||||
* CUDA
|
||||
* ------------------- */
|
||||
// context management
|
||||
static void* cuCtxGetCurrent_;
|
||||
static void* cuCtxSetCurrent_;
|
||||
static void* cuCtxDestroy_v2_;
|
||||
static void* cuCtxCreate_v2_;
|
||||
static void* cuCtxGetDevice_;
|
||||
static void* cuCtxPushCurrent_v2_;
|
||||
static void* cuCtxPopCurrent_v2_;
|
||||
static void* cuCtxEnablePeerAccess_;
|
||||
static void* cuDriverGetVersion_;
|
||||
static void* cuInit_;
|
||||
static void *cuCtxGetCurrent_;
|
||||
static void *cuCtxSetCurrent_;
|
||||
static void *cuCtxDestroy_v2_;
|
||||
static void *cuCtxCreate_v2_;
|
||||
static void *cuCtxGetDevice_;
|
||||
static void *cuCtxPushCurrent_v2_;
|
||||
static void *cuCtxPopCurrent_v2_;
|
||||
static void *cuCtxEnablePeerAccess_;
|
||||
static void *cuDriverGetVersion_;
|
||||
static void *cuInit_;
|
||||
// device management
|
||||
static void* cuDeviceGet_;
|
||||
static void* cuDeviceGetName_;
|
||||
static void* cuDeviceGetPCIBusId_;
|
||||
static void* cuDeviceGetAttribute_;
|
||||
static void* cuDeviceGetCount_;
|
||||
static void *cuDeviceGet_;
|
||||
static void *cuDeviceGetName_;
|
||||
static void *cuDeviceGetPCIBusId_;
|
||||
static void *cuDeviceGetAttribute_;
|
||||
static void *cuDeviceGetCount_;
|
||||
// link management
|
||||
static void* cuLinkAddData_v2_;
|
||||
static void* cuLinkCreate_v2_;
|
||||
static void* cuLinkDestroy_;
|
||||
static void* cuLinkComplete_;
|
||||
static void *cuLinkAddData_v2_;
|
||||
static void *cuLinkCreate_v2_;
|
||||
static void *cuLinkDestroy_;
|
||||
static void *cuLinkComplete_;
|
||||
// module management
|
||||
static void* cuModuleGetGlobal_v2_;
|
||||
static void* cuModuleLoad_;
|
||||
static void* cuModuleUnload_;
|
||||
static void* cuModuleLoadDataEx_;
|
||||
static void* cuModuleLoadData_;
|
||||
static void* cuModuleGetFunction_;
|
||||
static void *cuModuleGetGlobal_v2_;
|
||||
static void *cuModuleLoad_;
|
||||
static void *cuModuleUnload_;
|
||||
static void *cuModuleLoadDataEx_;
|
||||
static void *cuModuleLoadData_;
|
||||
static void *cuModuleGetFunction_;
|
||||
// stream management
|
||||
static void* cuStreamCreate_;
|
||||
static void* cuStreamSynchronize_;
|
||||
static void* cuStreamDestroy_v2_;
|
||||
static void* cuStreamGetCtx_;
|
||||
static void* cuLaunchKernel_;
|
||||
static void *cuStreamCreate_;
|
||||
static void *cuStreamSynchronize_;
|
||||
static void *cuStreamDestroy_v2_;
|
||||
static void *cuStreamGetCtx_;
|
||||
static void *cuLaunchKernel_;
|
||||
// function management
|
||||
static void* cuFuncGetAttribute_;
|
||||
static void* cuFuncSetAttribute_;
|
||||
static void* cuFuncSetCacheConfig_;
|
||||
static void *cuFuncGetAttribute_;
|
||||
static void *cuFuncSetAttribute_;
|
||||
static void *cuFuncSetCacheConfig_;
|
||||
// memory management
|
||||
static void* cuMemcpyDtoH_v2_;
|
||||
static void* cuMemFree_v2_;
|
||||
static void* cuMemcpyDtoHAsync_v2_;
|
||||
static void* cuMemcpyHtoDAsync_v2_;
|
||||
static void* cuMemcpyHtoD_v2_;
|
||||
static void* cuMemAlloc_v2_;
|
||||
static void* cuMemsetD8Async_;
|
||||
static void* cuPointerGetAttribute_;
|
||||
static void *cuMemcpyDtoH_v2_;
|
||||
static void *cuMemFree_v2_;
|
||||
static void *cuMemcpyDtoHAsync_v2_;
|
||||
static void *cuMemcpyHtoDAsync_v2_;
|
||||
static void *cuMemcpyHtoD_v2_;
|
||||
static void *cuMemAlloc_v2_;
|
||||
static void *cuMemsetD8Async_;
|
||||
static void *cuPointerGetAttribute_;
|
||||
// event management
|
||||
static void* cuEventCreate_;
|
||||
static void* cuEventElapsedTime_;
|
||||
static void* cuEventRecord_;
|
||||
static void* cuEventDestroy_v2_;
|
||||
static void *cuEventCreate_;
|
||||
static void *cuEventElapsedTime_;
|
||||
static void *cuEventRecord_;
|
||||
static void *cuEventDestroy_v2_;
|
||||
|
||||
/* ------------------- *
|
||||
* NVML
|
||||
* ------------------- */
|
||||
static void* nvmlInit_v2_;
|
||||
static void* nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void* nvmlDeviceGetClockInfo_;
|
||||
static void* nvmlDeviceGetMaxClockInfo_;
|
||||
static void* nvmlDeviceSetApplicationsClocks_;
|
||||
static void *nvmlInit_v2_;
|
||||
static void *nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void *nvmlDeviceGetClockInfo_;
|
||||
static void *nvmlDeviceGetMaxClockInfo_;
|
||||
static void *nvmlDeviceSetApplicationsClocks_;
|
||||
|
||||
/* ------------------- *
|
||||
* HIP
|
||||
* ------------------- */
|
||||
// context management
|
||||
static void* hipInit_;
|
||||
static void* hipCtxDestroy_;
|
||||
static void* hipCtxCreate_;
|
||||
static void* hipCtxPushCurrent_;
|
||||
static void* hipCtxPopCurrent_;
|
||||
static void* hipCtxGetDevice_;
|
||||
static void* hipCtxEnablePeerAccess_;
|
||||
static void* hipDriverGetVersion_;
|
||||
static void *hipInit_;
|
||||
static void *hipCtxDestroy_;
|
||||
static void *hipCtxCreate_;
|
||||
static void *hipCtxPushCurrent_;
|
||||
static void *hipCtxPopCurrent_;
|
||||
static void *hipCtxGetDevice_;
|
||||
static void *hipCtxEnablePeerAccess_;
|
||||
static void *hipDriverGetVersion_;
|
||||
// device management
|
||||
static void* hipGetDevice_;
|
||||
static void* hipDeviceGetName_;
|
||||
static void* hipDeviceGetPCIBusId_;
|
||||
static void* hipDeviceGetAttribute_;
|
||||
static void* hipGetDeviceCount_;
|
||||
static void *hipGetDevice_;
|
||||
static void *hipDeviceGetName_;
|
||||
static void *hipDeviceGetPCIBusId_;
|
||||
static void *hipDeviceGetAttribute_;
|
||||
static void *hipGetDeviceCount_;
|
||||
// module management
|
||||
static void* hipModuleGetGlobal_;
|
||||
static void* hipModuleLoad_;
|
||||
static void* hipModuleLoadData_;
|
||||
static void* hipModuleUnload_;
|
||||
static void* hipModuleLoadDataEx_;
|
||||
static void* hipModuleGetFunction_;
|
||||
static void *hipModuleGetGlobal_;
|
||||
static void *hipModuleLoad_;
|
||||
static void *hipModuleLoadData_;
|
||||
static void *hipModuleUnload_;
|
||||
static void *hipModuleLoadDataEx_;
|
||||
static void *hipModuleGetFunction_;
|
||||
// stream management
|
||||
static void* hipStreamCreate_;
|
||||
static void* hipStreamSynchronize_;
|
||||
static void* hipStreamDestroy_;
|
||||
static void* hipModuleLaunchKernel_;;
|
||||
static void *hipStreamCreate_;
|
||||
static void *hipStreamSynchronize_;
|
||||
static void *hipStreamDestroy_;
|
||||
static void *hipModuleLaunchKernel_;
|
||||
;
|
||||
// function management
|
||||
static void* hipFuncGetAttributes_;
|
||||
static void* hipFuncSetAttribute_;
|
||||
static void* hipFuncSetCacheConfig_;
|
||||
static void *hipFuncGetAttributes_;
|
||||
static void *hipFuncSetAttribute_;
|
||||
static void *hipFuncSetCacheConfig_;
|
||||
// memory management
|
||||
static void* hipMalloc_;
|
||||
static void* hipPointerGetAttribute_;
|
||||
static void* hipMemsetD8Async_;
|
||||
static void* hipMemcpyDtoH_;
|
||||
static void* hipFree_;
|
||||
static void* hipMemcpyDtoHAsync_;
|
||||
static void* hipMemcpyHtoDAsync_;
|
||||
static void* hipMemcpyHtoD_;
|
||||
static void *hipMalloc_;
|
||||
static void *hipPointerGetAttribute_;
|
||||
static void *hipMemsetD8Async_;
|
||||
static void *hipMemcpyDtoH_;
|
||||
static void *hipFree_;
|
||||
static void *hipMemcpyDtoHAsync_;
|
||||
static void *hipMemcpyHtoDAsync_;
|
||||
static void *hipMemcpyHtoD_;
|
||||
// event management
|
||||
static void* hipEventCreate_;
|
||||
static void* hipEventElapsedTime_;
|
||||
static void* hipEventRecord_;
|
||||
static void* hipEventDestroy_;
|
||||
static void *hipEventCreate_;
|
||||
static void *hipEventElapsedTime_;
|
||||
static void *hipEventRecord_;
|
||||
static void *hipEventDestroy_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
415
include/triton/driver/error.h
Executable file → Normal file
415
include/triton/driver/error.h
Executable file → Normal file
@@ -3,223 +3,252 @@
|
||||
#ifndef _TRITON_DRIVER_ERROR_H_
|
||||
#define _TRITON_DRIVER_ERROR_H_
|
||||
|
||||
#include <exception>
|
||||
#include "triton/driver/dispatch.h"
|
||||
#include <exception>
|
||||
|
||||
namespace triton {
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace driver {
|
||||
|
||||
namespace driver
|
||||
{
|
||||
namespace exception {
|
||||
|
||||
namespace exception
|
||||
{
|
||||
namespace nvrtc {
|
||||
|
||||
namespace nvrtc
|
||||
{
|
||||
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
|
||||
class name : public std::exception { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "NVRTC: Error- " msg; } \
|
||||
}
|
||||
|
||||
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
|
||||
class name: public std::exception { public: const char * what() const throw() override { return "NVRTC: Error- " msg; } }
|
||||
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure,
|
||||
"program creation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input, "invalid input");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program, "invalid program");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option, "invalid option");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(compilation, "compilation");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure,
|
||||
"builtin operation failure");
|
||||
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error, "unknown error");
|
||||
|
||||
#undef TRITON_CREATE_NVRTC_EXCEPTION
|
||||
} // namespace nvrtc
|
||||
|
||||
namespace cuda {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "CUDA: Error- " msg; } \
|
||||
}
|
||||
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
class base: public std::exception{};
|
||||
|
||||
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
|
||||
class name: public base { public:const char * what() const throw() override { return "CUDA: Error- " msg; } }
|
||||
|
||||
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_device ,"no device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_found ,"not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(deinitialized, "deinitialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled, "profiler disabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized,
|
||||
"profiler not initialized");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started,
|
||||
"profiler already started");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped,
|
||||
"profiler already stopped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_device, "no device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_device, "invalid device");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_image, "invalid image");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_context, "invalid context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_current,
|
||||
"context already current");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(map_failed, "map failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed, "unmap failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped, "array is mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_mapped, "already mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(already_acquired, "already acquired");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped, "not mapped");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array, "not mapped as array");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit, "unsupported limit");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use, "context already in use");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported,
|
||||
"peer access unsupported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx, "invalid ptx");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context,
|
||||
"invalid graphics context");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_source, "invalid source");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(file_not_found, "file not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found,
|
||||
"shared object symbol not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed,
|
||||
"shared object init failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(operating_system, "operating system");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle, "invalid handle");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_found, "not found");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_ready, "not ready");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_address, "illegal address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources,
|
||||
"launch out of resources");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout, "launch timeout");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing,
|
||||
"launch incompatible texturing");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled,
|
||||
"peer access already enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled,
|
||||
"peer access not enabled");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active, "primary context active");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed, "context is destroyed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(assert_error, "assert");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers, "too many peers");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered,
|
||||
"host memory already registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered,
|
||||
"hot memory not registered");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error, "hardware stack error");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction, "illegal instruction");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address, "misaligned address");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space, "invalid address space");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc, "invalid pc");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(launch_failed, "launch failed");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_permitted, "not permitted");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUDA_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUDA_EXCEPTION
|
||||
} // namespace cuda
|
||||
|
||||
namespace cublas {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { \
|
||||
return "CUBLAS: Error- " msg; \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace cublas
|
||||
{
|
||||
class base: public std::exception{};
|
||||
|
||||
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
|
||||
class name: public base { public: const char * what() const throw() override { return "CUBLAS: Error- " msg; } }
|
||||
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed, "alloc failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch, "arch mismatch");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error, "mapping error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed, "execution failed");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error, "internal error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(license_error, "license error");
|
||||
TRITON_CREATE_CUBLAS_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUBLAS_EXCEPTION
|
||||
} // namespace cublas
|
||||
|
||||
namespace cudnn {
|
||||
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
|
||||
class name : public std::exception { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "CUDNN: Error- " msg; } \
|
||||
}
|
||||
|
||||
namespace cudnn
|
||||
{
|
||||
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
|
||||
class name: public std::exception { public: const char * what() const throw() override { return "CUDNN: Error- " msg; } }
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed, "allocation failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(bad_param, "bad param");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(internal_error, "internal error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch, "arch mismatch");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error, "mapping error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed, "execution failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(license_error, "license error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing,
|
||||
"prerequisite missing");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress, "runtime in progress");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow, "runtime fp overflow");
|
||||
} // namespace cudnn
|
||||
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized ,"not initialized");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed ,"allocation failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(bad_param ,"bad param");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(internal_error ,"internal error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value ,"invalid value");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error ,"mapping error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed ,"execution failed");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(not_supported ,"not supported");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(license_error ,"license error");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing ,"prerequisite missing");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress ,"runtime in progress");
|
||||
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
|
||||
namespace hip {
|
||||
class base : public std::exception {};
|
||||
|
||||
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
|
||||
class name : public base { \
|
||||
public: \
|
||||
const char *what() const throw() override { return "HIP: Error- " msg; } \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
namespace hip
|
||||
{
|
||||
class base: public std::exception{};
|
||||
|
||||
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
|
||||
class name: public base { public:const char * what() const throw() override { return "HIP: Error- " msg; } }
|
||||
|
||||
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_value ,"invalid value");
|
||||
TRITON_CREATE_HIP_EXCEPTION(out_of_memory ,"out of memory");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_initialized ,"not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(deinitialized ,"deinitialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled ,"profiler disabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started ,"profiler already started");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_device ,"no device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_device ,"invalid device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_image ,"invalid image");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_context ,"invalid context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_current ,"context already current");
|
||||
TRITON_CREATE_HIP_EXCEPTION(map_failed ,"map failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unmap_failed ,"unmap failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped ,"array is mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_mapped ,"already mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_acquired ,"already acquired");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped ,"not mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array ,"not mapped as array");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
|
||||
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit ,"unsupported limit");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use ,"context already in use");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx ,"invalid ptx");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_source ,"invalid source");
|
||||
TRITON_CREATE_HIP_EXCEPTION(file_not_found ,"file not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed ,"shared object init failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(operating_system ,"operating system");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_handle ,"invalid handle");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_found ,"not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_ready ,"not ready");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_address ,"illegal address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources ,"launch out of resources");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_timeout ,"launch timeout");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(primary_context_active ,"primary context active");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed ,"context is destroyed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(assert_error ,"assert");
|
||||
TRITON_CREATE_HIP_EXCEPTION(too_many_peers ,"too many peers");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered ,"host memory already registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error ,"hardware stack error");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction ,"illegal instruction");
|
||||
TRITON_CREATE_HIP_EXCEPTION(misaligned_address ,"misaligned address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space ,"invalid address space");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_pc ,"invalid pc");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_failed ,"launch failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_permitted ,"not permitted");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_supported ,"not supported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol ,"invalid symbol");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unknown ,"unknown");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_value, "invalid value");
|
||||
TRITON_CREATE_HIP_EXCEPTION(out_of_memory, "out of memory");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_initialized, "not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(deinitialized, "deinitialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled, "profiler disabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized,
|
||||
"profiler not initialized");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started,
|
||||
"profiler already started");
|
||||
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped,
|
||||
"profiler already stopped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_device, "no device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_device, "invalid device");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_image, "invalid image");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_context, "invalid context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_current, "context already current");
|
||||
TRITON_CREATE_HIP_EXCEPTION(map_failed, "map failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unmap_failed, "unmap failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped, "array is mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_mapped, "already mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
|
||||
TRITON_CREATE_HIP_EXCEPTION(already_acquired, "already acquired");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped, "not mapped");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array, "not mapped as array");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
|
||||
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit, "unsupported limit");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use, "context already in use");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported, "peer access unsupported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx, "invalid ptx");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context,
|
||||
"invalid graphics context");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_source, "invalid source");
|
||||
TRITON_CREATE_HIP_EXCEPTION(file_not_found, "file not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found,
|
||||
"shared object symbol not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed,
|
||||
"shared object init failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(operating_system, "operating system");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_handle, "invalid handle");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_found, "not found");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_ready, "not ready");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_address, "illegal address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources, "launch out of resources");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_timeout, "launch timeout");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing,
|
||||
"launch incompatible texturing");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled,
|
||||
"peer access already enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled, "peer access not enabled");
|
||||
TRITON_CREATE_HIP_EXCEPTION(primary_context_active, "primary context active");
|
||||
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed, "context is destroyed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(assert_error, "assert");
|
||||
TRITON_CREATE_HIP_EXCEPTION(too_many_peers, "too many peers");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered,
|
||||
"host memory already registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered,
|
||||
"hot memory not registered");
|
||||
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error, "hardware stack error");
|
||||
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction, "illegal instruction");
|
||||
TRITON_CREATE_HIP_EXCEPTION(misaligned_address, "misaligned address");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space, "invalid address space");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_pc, "invalid pc");
|
||||
TRITON_CREATE_HIP_EXCEPTION(launch_failed, "launch failed");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_permitted, "not permitted");
|
||||
TRITON_CREATE_HIP_EXCEPTION(not_supported, "not supported");
|
||||
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol, "invalid symbol");
|
||||
TRITON_CREATE_HIP_EXCEPTION(unknown, "unknown");
|
||||
|
||||
#undef TRITON_CREATE_CUDA_EXCEPTION
|
||||
}
|
||||
} // namespace hip
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace exception
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
@@ -1,20 +1,21 @@
|
||||
#include <string>
|
||||
#include "triton/driver/dispatch.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm{
|
||||
namespace llvm {
|
||||
class Module;
|
||||
}
|
||||
|
||||
namespace triton{
|
||||
namespace driver{
|
||||
namespace triton {
|
||||
namespace driver {
|
||||
|
||||
void init_llvm();
|
||||
std::string path_to_ptxas(int& version);
|
||||
std::string llir_to_ptx(llvm::Module* module, int cc, int version);
|
||||
std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas_path, int cc);
|
||||
CUmodule ptx_to_cumodule(const std::string& ptx, int cc);
|
||||
std::string llir_to_amdgpu(llvm::Module* module, const std::string& proc);
|
||||
hipModule_t amdgpu_to_hipmodule(const std::string& path);
|
||||
std::string path_to_ptxas(int &version);
|
||||
std::string llir_to_ptx(llvm::Module *module, int cc, int version);
|
||||
std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas_path,
|
||||
int cc);
|
||||
CUmodule ptx_to_cumodule(const std::string &ptx, int cc);
|
||||
std::string llir_to_amdgpu(llvm::Module *module, const std::string &proc);
|
||||
hipModule_t amdgpu_to_hipmodule(const std::string &path);
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace driver
|
||||
} // namespace triton
|
||||
|
@@ -3,52 +3,55 @@
|
||||
#ifndef _TRITON_TOOLS_BENCH_H_
|
||||
#define _TRITON_TOOLS_BENCH_H_
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include "triton/driver/device.h"
|
||||
#include "triton/driver/stream.h"
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
|
||||
namespace triton{
|
||||
namespace tools{
|
||||
namespace triton {
|
||||
namespace tools {
|
||||
|
||||
class timer{
|
||||
typedef std::chrono::high_resolution_clock high_resolution_clock;
|
||||
typedef std::chrono::nanoseconds nanoseconds;
|
||||
class timer {
|
||||
typedef std::chrono::high_resolution_clock high_resolution_clock;
|
||||
typedef std::chrono::nanoseconds nanoseconds;
|
||||
|
||||
public:
|
||||
explicit timer(bool run = false)
|
||||
{ if (run) start(); }
|
||||
explicit timer(bool run = false) {
|
||||
if (run)
|
||||
start();
|
||||
}
|
||||
|
||||
void start()
|
||||
{ _start = high_resolution_clock::now(); }
|
||||
void start() { _start = high_resolution_clock::now(); }
|
||||
|
||||
nanoseconds get() const
|
||||
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
|
||||
nanoseconds get() const {
|
||||
return std::chrono::duration_cast<nanoseconds>(
|
||||
high_resolution_clock::now() - _start);
|
||||
}
|
||||
|
||||
private:
|
||||
high_resolution_clock::time_point _start;
|
||||
high_resolution_clock::time_point _start;
|
||||
};
|
||||
|
||||
inline double bench(std::function<void()> const & op, driver::stream * stream, size_t warmup = 10, size_t repeat = 200)
|
||||
{
|
||||
inline double bench(std::function<void()> const &op, driver::stream *stream,
|
||||
size_t warmup = 10, size_t repeat = 200) {
|
||||
timer tmr;
|
||||
std::vector<size_t> times;
|
||||
double total_time = 0;
|
||||
for(size_t i = 0; i < warmup; i++)
|
||||
for (size_t i = 0; i < warmup; i++)
|
||||
op();
|
||||
stream->synchronize();
|
||||
tmr.start();
|
||||
for(size_t i = 0; i < repeat; i++){
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
op();
|
||||
}
|
||||
stream->synchronize();
|
||||
return (float)tmr.get().count() / repeat;
|
||||
|
||||
// return *std::min_element(times.begin(), times.end());
|
||||
// return *std::min_element(times.begin(), times.end());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
@@ -3,16 +3,15 @@
|
||||
#ifndef _TRITON_TOOLS_THREAD_GRAPH_H_
|
||||
#define _TRITON_TOOLS_THREAD_GRAPH_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
namespace triton {
|
||||
namespace tools{
|
||||
namespace tools {
|
||||
|
||||
template<class node_t>
|
||||
class graph {
|
||||
template <class node_t> class graph {
|
||||
typedef std::map<node_t, std::set<node_t>> edges_t;
|
||||
|
||||
public:
|
||||
@@ -21,27 +20,27 @@ public:
|
||||
|
||||
private:
|
||||
void connected_components_impl(node_t x, std::set<node_t> &nodes,
|
||||
nmap_t* nmap, cmap_t* cmap, int id) const {
|
||||
if(nmap)
|
||||
nmap_t *nmap, cmap_t *cmap, int id) const {
|
||||
if (nmap)
|
||||
(*nmap)[x] = id;
|
||||
if(cmap)
|
||||
if (cmap)
|
||||
(*cmap)[id].push_back(x);
|
||||
if(nodes.find(x) != nodes.end()) {
|
||||
if (nodes.find(x) != nodes.end()) {
|
||||
nodes.erase(x);
|
||||
for(const node_t &y: edges_.at(x))
|
||||
for (const node_t &y : edges_.at(x))
|
||||
connected_components_impl(y, nodes, nmap, cmap, id);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void connected_components(cmap_t *cmap, nmap_t *nmap) const {
|
||||
if(cmap)
|
||||
if (cmap)
|
||||
cmap->clear();
|
||||
if(nmap)
|
||||
if (nmap)
|
||||
nmap->clear();
|
||||
std::set<node_t> nodes = nodes_;
|
||||
unsigned id = 0;
|
||||
while(!nodes.empty()){
|
||||
while (!nodes.empty()) {
|
||||
connected_components_impl(*nodes.begin(), nodes, nmap, cmap, id++);
|
||||
}
|
||||
}
|
||||
@@ -63,7 +62,7 @@ private:
|
||||
edges_t edges_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
@@ -33,154 +33,140 @@
|
||||
#ifndef _TRITON_TOOLS_SHA1_HPP_
|
||||
#define _TRITON_TOOLS_SHA1_HPP_
|
||||
|
||||
namespace sha1
|
||||
namespace sha1 {
|
||||
namespace // local
|
||||
{
|
||||
namespace // local
|
||||
{
|
||||
// Rotate an integer value to left.
|
||||
inline unsigned int rol(const unsigned int value,
|
||||
const unsigned int steps)
|
||||
{
|
||||
return ((value << steps) | (value >> (32 - steps)));
|
||||
}
|
||||
// Rotate an integer value to left.
|
||||
inline unsigned int rol(const unsigned int value, const unsigned int steps) {
|
||||
return ((value << steps) | (value >> (32 - steps)));
|
||||
}
|
||||
|
||||
// Sets the first 16 integers in the buffert to zero.
|
||||
// Used for clearing the W buffert.
|
||||
inline void clearWBuffert(unsigned int* buffert)
|
||||
{
|
||||
for (int pos = 16; --pos >= 0;)
|
||||
{
|
||||
buffert[pos] = 0;
|
||||
}
|
||||
}
|
||||
// Sets the first 16 integers in the buffert to zero.
|
||||
// Used for clearing the W buffert.
|
||||
inline void clearWBuffert(unsigned int *buffert) {
|
||||
for (int pos = 16; --pos >= 0;) {
|
||||
buffert[pos] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void innerHash(unsigned int* result, unsigned int* w)
|
||||
{
|
||||
unsigned int a = result[0];
|
||||
unsigned int b = result[1];
|
||||
unsigned int c = result[2];
|
||||
unsigned int d = result[3];
|
||||
unsigned int e = result[4];
|
||||
inline void innerHash(unsigned int *result, unsigned int *w) {
|
||||
unsigned int a = result[0];
|
||||
unsigned int b = result[1];
|
||||
unsigned int c = result[2];
|
||||
unsigned int d = result[3];
|
||||
unsigned int e = result[4];
|
||||
|
||||
int round = 0;
|
||||
int round = 0;
|
||||
|
||||
#define sha1macro(func,val) \
|
||||
{ \
|
||||
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
|
||||
e = d; \
|
||||
d = c; \
|
||||
c = rol(b, 30); \
|
||||
b = a; \
|
||||
a = t; \
|
||||
}
|
||||
#define sha1macro(func, val) \
|
||||
{ \
|
||||
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
|
||||
e = d; \
|
||||
d = c; \
|
||||
c = rol(b, 30); \
|
||||
b = a; \
|
||||
a = t; \
|
||||
}
|
||||
|
||||
while (round < 16)
|
||||
{
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)
|
||||
++round;
|
||||
}
|
||||
while (round < 20)
|
||||
{
|
||||
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)
|
||||
++round;
|
||||
}
|
||||
while (round < 40)
|
||||
{
|
||||
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0x6ed9eba1)
|
||||
++round;
|
||||
}
|
||||
while (round < 60)
|
||||
{
|
||||
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)
|
||||
++round;
|
||||
}
|
||||
while (round < 80)
|
||||
{
|
||||
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0xca62c1d6)
|
||||
++round;
|
||||
}
|
||||
while (round < 16) {
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
|
||||
}
|
||||
while (round < 20) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
|
||||
}
|
||||
while (round < 40) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0x6ed9eba1)++ round;
|
||||
}
|
||||
while (round < 60) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)++ round;
|
||||
}
|
||||
while (round < 80) {
|
||||
w[round] =
|
||||
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
|
||||
sha1macro(b ^ c ^ d, 0xca62c1d6)++ round;
|
||||
}
|
||||
|
||||
#undef sha1macro
|
||||
#undef sha1macro
|
||||
|
||||
result[0] += a;
|
||||
result[1] += b;
|
||||
result[2] += c;
|
||||
result[3] += d;
|
||||
result[4] += e;
|
||||
}
|
||||
} // namespace
|
||||
result[0] += a;
|
||||
result[1] += b;
|
||||
result[2] += c;
|
||||
result[3] += d;
|
||||
result[4] += e;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
inline void calc(const void* src, const int bytelength, unsigned char* hash)
|
||||
{
|
||||
// Init the result array.
|
||||
unsigned int result[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 };
|
||||
inline void calc(const void *src, const int bytelength, unsigned char *hash) {
|
||||
// Init the result array.
|
||||
unsigned int result[5] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476,
|
||||
0xc3d2e1f0};
|
||||
|
||||
// Cast the void src pointer to be the byte array we can work with.
|
||||
const unsigned char* sarray = (const unsigned char*) src;
|
||||
// Cast the void src pointer to be the byte array we can work with.
|
||||
const unsigned char *sarray = (const unsigned char *)src;
|
||||
|
||||
// The reusable round buffer
|
||||
unsigned int w[80];
|
||||
// The reusable round buffer
|
||||
unsigned int w[80];
|
||||
|
||||
// Loop through all complete 64byte blocks.
|
||||
const int endOfFullBlocks = bytelength - 64;
|
||||
int endCurrentBlock;
|
||||
int currentBlock = 0;
|
||||
// Loop through all complete 64byte blocks.
|
||||
const int endOfFullBlocks = bytelength - 64;
|
||||
int endCurrentBlock;
|
||||
int currentBlock = 0;
|
||||
|
||||
while (currentBlock <= endOfFullBlocks)
|
||||
{
|
||||
endCurrentBlock = currentBlock + 64;
|
||||
while (currentBlock <= endOfFullBlocks) {
|
||||
endCurrentBlock = currentBlock + 64;
|
||||
|
||||
// Init the round buffer with the 64 byte block data.
|
||||
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4)
|
||||
{
|
||||
// This line will swap endian on big endian and keep endian on little endian.
|
||||
w[roundPos++] = (unsigned int) sarray[currentBlock + 3]
|
||||
| (((unsigned int) sarray[currentBlock + 2]) << 8)
|
||||
| (((unsigned int) sarray[currentBlock + 1]) << 16)
|
||||
| (((unsigned int) sarray[currentBlock]) << 24);
|
||||
}
|
||||
innerHash(result, w);
|
||||
}
|
||||
|
||||
// Handle the last and not full 64 byte block if existing.
|
||||
endCurrentBlock = bytelength - currentBlock;
|
||||
clearWBuffert(w);
|
||||
int lastBlockBytes = 0;
|
||||
for (;lastBlockBytes < endCurrentBlock; ++lastBlockBytes)
|
||||
{
|
||||
w[lastBlockBytes >> 2] |= (unsigned int) sarray[lastBlockBytes + currentBlock] << ((3 - (lastBlockBytes & 3)) << 3);
|
||||
}
|
||||
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
|
||||
if (endCurrentBlock >= 56)
|
||||
{
|
||||
innerHash(result, w);
|
||||
clearWBuffert(w);
|
||||
}
|
||||
w[15] = bytelength << 3;
|
||||
innerHash(result, w);
|
||||
|
||||
// Store hash in result pointer, and make sure we get in in the correct order on both endian models.
|
||||
for (int hashByte = 20; --hashByte >= 0;)
|
||||
{
|
||||
hash[hashByte] = (result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
|
||||
}
|
||||
// Init the round buffer with the 64 byte block data.
|
||||
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4) {
|
||||
// This line will swap endian on big endian and keep endian on little
|
||||
// endian.
|
||||
w[roundPos++] = (unsigned int)sarray[currentBlock + 3] |
|
||||
(((unsigned int)sarray[currentBlock + 2]) << 8) |
|
||||
(((unsigned int)sarray[currentBlock + 1]) << 16) |
|
||||
(((unsigned int)sarray[currentBlock]) << 24);
|
||||
}
|
||||
innerHash(result, w);
|
||||
}
|
||||
|
||||
inline void toHexString(const unsigned char* hash, char* hexstring)
|
||||
{
|
||||
const char hexDigits[] = { "0123456789abcdef" };
|
||||
// Handle the last and not full 64 byte block if existing.
|
||||
endCurrentBlock = bytelength - currentBlock;
|
||||
clearWBuffert(w);
|
||||
int lastBlockBytes = 0;
|
||||
for (; lastBlockBytes < endCurrentBlock; ++lastBlockBytes) {
|
||||
w[lastBlockBytes >> 2] |=
|
||||
(unsigned int)sarray[lastBlockBytes + currentBlock]
|
||||
<< ((3 - (lastBlockBytes & 3)) << 3);
|
||||
}
|
||||
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
|
||||
if (endCurrentBlock >= 56) {
|
||||
innerHash(result, w);
|
||||
clearWBuffert(w);
|
||||
}
|
||||
w[15] = bytelength << 3;
|
||||
innerHash(result, w);
|
||||
|
||||
for (int hashByte = 20; --hashByte >= 0;)
|
||||
{
|
||||
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
|
||||
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
|
||||
}
|
||||
hexstring[40] = 0;
|
||||
}
|
||||
// Store hash in result pointer, and make sure we get in in the correct order
|
||||
// on both endian models.
|
||||
for (int hashByte = 20; --hashByte >= 0;) {
|
||||
hash[hashByte] =
|
||||
(result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
inline void toHexString(const unsigned char *hash, char *hexstring) {
|
||||
const char hexDigits[] = {"0123456789abcdef"};
|
||||
|
||||
for (int hashByte = 20; --hashByte >= 0;) {
|
||||
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
|
||||
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
|
||||
}
|
||||
hexstring[40] = 0;
|
||||
}
|
||||
} // namespace sha1
|
||||
|
||||
#endif
|
||||
|
@@ -7,11 +7,8 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace tools
|
||||
{
|
||||
|
||||
namespace triton {
|
||||
namespace tools {
|
||||
|
||||
#ifdef _WIN32
|
||||
#define popen _popen
|
||||
@@ -19,12 +16,12 @@ namespace tools
|
||||
#endif
|
||||
|
||||
#ifndef WEXITSTATUS
|
||||
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val) & 255)
|
||||
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val)&255)
|
||||
#endif
|
||||
|
||||
int exec(const std::string& cmd, std::string& result) {
|
||||
int exec(const std::string &cmd, std::string &result) {
|
||||
char buffer[128];
|
||||
FILE* pipe = popen(cmd.c_str(), "r");
|
||||
FILE *pipe = popen(cmd.c_str(), "r");
|
||||
if (!pipe)
|
||||
return 0;
|
||||
result.clear();
|
||||
@@ -37,10 +34,9 @@ int exec(const std::string& cmd, std::string& result) {
|
||||
}
|
||||
int status = pclose(pipe);
|
||||
return WEXITSTATUS(status);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
} // namespace tools
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
27
include/triton/tools/sys/getenv.hpp
Executable file → Normal file
27
include/triton/tools/sys/getenv.hpp
Executable file → Normal file
@@ -22,26 +22,23 @@
|
||||
#ifndef TDL_TOOLS_SYS_GETENV_HPP
|
||||
#define TDL_TOOLS_SYS_GETENV_HPP
|
||||
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace triton {
|
||||
|
||||
namespace tools
|
||||
{
|
||||
|
||||
inline std::string getenv(const char * name)
|
||||
{
|
||||
const char * cstr = std::getenv(name);
|
||||
if(!cstr)
|
||||
return "";
|
||||
std::string result(cstr);
|
||||
return result;
|
||||
}
|
||||
namespace tools {
|
||||
|
||||
inline std::string getenv(const char *name) {
|
||||
const char *cstr = std::getenv(name);
|
||||
if (!cstr)
|
||||
return "";
|
||||
std::string result(cstr);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace tools
|
||||
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
74
include/triton/tools/sys/mkdir.hpp
Executable file → Normal file
74
include/triton/tools/sys/mkdir.hpp
Executable file → Normal file
@@ -22,55 +22,49 @@
|
||||
#ifndef TDL_TOOLS_SYS_MKDIR_HPP
|
||||
#define TDL_TOOLS_SYS_MKDIR_HPP
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <sys/stat.h>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#if defined(_WIN32)
|
||||
#include <direct.h>
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace triton {
|
||||
|
||||
namespace tools
|
||||
{
|
||||
|
||||
inline int mkdir(std::string const & path)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return _mkdir(path.c_str());
|
||||
#else
|
||||
return ::mkdir(path.c_str(), 0777);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int mkpath(std::string const & path)
|
||||
{
|
||||
int status = 0;
|
||||
size_t pp = 0;
|
||||
size_t sp;
|
||||
while ((sp = path.find('/', pp)) != std::string::npos)
|
||||
{
|
||||
if (sp != pp){
|
||||
status = mkdir(path.substr(0, sp));
|
||||
}
|
||||
pp = sp + 1;
|
||||
}
|
||||
return (status==0 || errno==EEXIST)?0:-1;
|
||||
}
|
||||
|
||||
inline int mtime(std::string const & path)
|
||||
{
|
||||
struct stat st;
|
||||
if(stat(path.c_str(), &st) != 0)
|
||||
return 0;
|
||||
return st.st_mtime;
|
||||
}
|
||||
namespace tools {
|
||||
|
||||
inline int mkdir(std::string const &path) {
|
||||
#if defined(_WIN32)
|
||||
return _mkdir(path.c_str());
|
||||
#else
|
||||
return ::mkdir(path.c_str(), 0777);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int mkpath(std::string const &path) {
|
||||
int status = 0;
|
||||
size_t pp = 0;
|
||||
size_t sp;
|
||||
while ((sp = path.find('/', pp)) != std::string::npos) {
|
||||
if (sp != pp) {
|
||||
status = mkdir(path.substr(0, sp));
|
||||
}
|
||||
pp = sp + 1;
|
||||
}
|
||||
return (status == 0 || errno == EEXIST) ? 0 : -1;
|
||||
}
|
||||
|
||||
inline int mtime(std::string const &path) {
|
||||
struct stat st;
|
||||
if (stat(path.c_str(), &st) != 0)
|
||||
return 0;
|
||||
return st.st_mtime;
|
||||
}
|
||||
|
||||
} // namespace tools
|
||||
|
||||
} // namespace triton
|
||||
|
||||
#endif
|
||||
|
@@ -3,88 +3,79 @@
|
||||
#ifndef _TRITON_TOOLS_THREAD_POOL_H_
|
||||
#define _TRITON_TOOLS_THREAD_POOL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <future>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool(size_t threads)
|
||||
: stop(false) {
|
||||
for(size_t i = 0;i < threads;++i)
|
||||
workers.emplace_back(
|
||||
[this] {
|
||||
for(;;){
|
||||
std::function<void()> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||
this->condition.wait(lock,
|
||||
[this]{ return this->stop || !this->tasks.empty(); });
|
||||
if(this->stop && this->tasks.empty())
|
||||
return;
|
||||
task = std::move(this->tasks.front());
|
||||
this->tasks.pop();
|
||||
}
|
||||
task();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
ThreadPool(size_t threads) : stop(false) {
|
||||
for (size_t i = 0; i < threads; ++i)
|
||||
workers.emplace_back([this] {
|
||||
for (;;) {
|
||||
std::function<void()> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||
this->condition.wait(
|
||||
lock, [this] { return this->stop || !this->tasks.empty(); });
|
||||
if (this->stop && this->tasks.empty())
|
||||
return;
|
||||
task = std::move(this->tasks.front());
|
||||
this->tasks.pop();
|
||||
}
|
||||
task();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <class F, class... Args>
|
||||
auto enqueue(F &&f, Args &&... args)
|
||||
-> std::future<typename std::result_of<F(Args...)>::type> {
|
||||
using return_type = typename std::result_of<F(Args...)>::type;
|
||||
|
||||
template<class F, class... Args>
|
||||
auto enqueue(F&& f, Args&&... args)
|
||||
-> std::future<typename std::result_of<F(Args...)>::type>
|
||||
auto task = std::make_shared<std::packaged_task<return_type()>>(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||
|
||||
std::future<return_type> res = task->get_future();
|
||||
{
|
||||
using return_type = typename std::result_of<F(Args...)>::type;
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
|
||||
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
||||
);
|
||||
// don't allow enqueueing after stopping the pool
|
||||
if (stop)
|
||||
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||
|
||||
std::future<return_type> res = task->get_future();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
|
||||
// don't allow enqueueing after stopping the pool
|
||||
if(stop)
|
||||
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||
|
||||
tasks.emplace([task](){ (*task)(); });
|
||||
}
|
||||
condition.notify_one();
|
||||
return res;
|
||||
tasks.emplace([task]() { (*task)(); });
|
||||
}
|
||||
condition.notify_one();
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
~ThreadPool() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
stop = true;
|
||||
}
|
||||
condition.notify_all();
|
||||
for(std::thread &worker: workers)
|
||||
worker.join();
|
||||
~ThreadPool() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
stop = true;
|
||||
}
|
||||
|
||||
condition.notify_all();
|
||||
for (std::thread &worker : workers)
|
||||
worker.join();
|
||||
}
|
||||
|
||||
private:
|
||||
// need to keep track of threads so we can join them
|
||||
std::vector< std::thread > workers;
|
||||
// the task queue
|
||||
std::queue< std::function<void()> > tasks;
|
||||
// need to keep track of threads so we can join them
|
||||
std::vector<std::thread> workers;
|
||||
// the task queue
|
||||
std::queue<std::function<void()>> tasks;
|
||||
|
||||
// synchronization
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable condition;
|
||||
bool stop;
|
||||
// synchronization
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable condition;
|
||||
bool stop;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user