[CI] run clang-format (#24)

This commit is contained in:
Philippe Tillet
2022-07-26 17:25:03 -07:00
committed by GitHub
parent 25357083e6
commit 6d62d88d4f
62 changed files with 13673 additions and 11367 deletions

View File

@@ -10,7 +10,6 @@
namespace mlir {
//===----------------------------------------------------------------------===//
// AxisInfo
//===----------------------------------------------------------------------===//
@@ -25,26 +24,25 @@ public:
public:
// Default constructor
AxisInfo(): AxisInfo({}, {}, {}) { }
AxisInfo() : AxisInfo({}, {}, {}) {}
// Construct contiguity info with known contiguity
AxisInfo(ContiguityT knownContiguity, DivisibilityT knownDivisibility,
ConstancyT knownConstancy)
: contiguity(knownContiguity), divisibility(knownDivisibility),
constancy(knownConstancy), rank(contiguity.size()) {
assert(knownDivisibility.size() == rank);
assert(knownConstancy.size() == rank);
}
: contiguity(knownContiguity), divisibility(knownDivisibility),
constancy(knownConstancy), rank(contiguity.size()) {
assert(knownDivisibility.size() == rank);
assert(knownConstancy.size() == rank);
}
// Accessors
int getContiguity(size_t d) const { return contiguity[d]; }
const ContiguityT& getContiguity() const { return contiguity; }
int getContiguity(size_t d) const { return contiguity[d]; }
const ContiguityT &getContiguity() const { return contiguity; }
int getDivisibility(size_t d) const { return divisibility[d]; }
const DivisibilityT& getDivisibility() const { return divisibility; }
const DivisibilityT &getDivisibility() const { return divisibility; }
int getConstancy(size_t d) const { return constancy[d]; }
const ConstancyT& getConstancy() const { return constancy; }
int getConstancy(size_t d) const { return constancy[d]; }
const ConstancyT &getConstancy() const { return constancy; }
int getRank() const { return rank; }
@@ -56,13 +54,13 @@ public:
}
/// The pessimistic value state of the contiguity is unknown.
static AxisInfo getPessimisticValueState(MLIRContext *context)
{ return AxisInfo(); }
static AxisInfo getPessimisticValueState(MLIRContext *context) {
return AxisInfo();
}
static AxisInfo getPessimisticValueState(Value value);
// The gcd of both arguments for each dimension
static AxisInfo join(const AxisInfo &lhs,
const AxisInfo &rhs);
static AxisInfo join(const AxisInfo &lhs, const AxisInfo &rhs);
private:
/// The _contiguity_ information maps the `d`-th
@@ -81,7 +79,7 @@ private:
/// [19, 23, 27, 31]
/// Would have contiguity [2, 1].
ContiguityT contiguity;
/// The _divisibility_ information maps the `d`-th
/// dimension to the largest power-of-two that
/// divides the first element of all the values along it
@@ -107,39 +105,36 @@ private:
/// [16, 16, 16, 16, 20, 20, 20, 20]
/// would have constancy [1, 4]
ConstancyT constancy;
// number of dimensions of the lattice
int rank;
};
class AxisInfoAnalysis
: public ForwardDataFlowAnalysis<AxisInfo> {
class AxisInfoAnalysis : public ForwardDataFlowAnalysis<AxisInfo> {
private:
static const int maxPow2Divisor = 65536;
int highestPowOf2Divisor(int n){
if(n==0)
int highestPowOf2Divisor(int n) {
if (n == 0)
return maxPow2Divisor;
return (n & (~(n - 1)));
}
AxisInfo visitBinaryOp(Operation* op, AxisInfo lhsInfo, AxisInfo rhsInfo,
const std::function<int(AxisInfo,AxisInfo,int)>& getContiguity,
const std::function<int(AxisInfo,AxisInfo,int)>& getDivisibility,
const std::function<int(AxisInfo,AxisInfo,int)>& getConstancy);
AxisInfo visitBinaryOp(
Operation *op, AxisInfo lhsInfo, AxisInfo rhsInfo,
const std::function<int(AxisInfo, AxisInfo, int)> &getContiguity,
const std::function<int(AxisInfo, AxisInfo, int)> &getDivisibility,
const std::function<int(AxisInfo, AxisInfo, int)> &getConstancy);
public:
using ForwardDataFlowAnalysis<AxisInfo>::ForwardDataFlowAnalysis;
ChangeResult visitOperation(Operation *op,
ArrayRef<LatticeElement<AxisInfo> *> operands) override;
ChangeResult
visitOperation(Operation *op,
ArrayRef<LatticeElement<AxisInfo> *> operands) override;
};
}
} // namespace mlir
#endif

View File

@@ -3,17 +3,13 @@
#include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPU.h"
namespace mlir
{
namespace triton
{
namespace mlir {
namespace triton {
#define GEN_PASS_REGISTRATION
#include "triton/Conversion/Passes.h.inc"
} // namespace triton
} // namespace mlir
#endif

View File

@@ -3,18 +3,17 @@
#include <memory>
namespace mlir{
namespace mlir {
class ModuleOp;
template <typename T> class OperationPass;
namespace triton{
namespace triton {
std::unique_ptr<OperationPass<ModuleOp>>
std::unique_ptr<OperationPass<ModuleOp>>
createConvertTritonToTritonGPUPass(int numWarps = 4);
}
} // namespace mlir
#endif

View File

@@ -1,17 +1,16 @@
#ifndef TRITON_DIALECT_TRITON_IR_DIALECT_H_
#define TRITON_DIALECT_TRITON_IR_DIALECT_H_
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Dialect.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "triton/Dialect/Triton/IR/Traits.h"
#include "triton/Dialect/Triton/IR/Types.h"
#include "triton/Dialect/Triton/IR/Dialect.h.inc"
#include "triton/Dialect/Triton/IR/OpsEnums.h.inc"
#include "triton/Dialect/Triton/IR/Traits.h"
#include "triton/Dialect/Triton/IR/Types.h"
#define GET_OP_CLASSES
#include "triton/Dialect/Triton/IR/Ops.h.inc"

View File

@@ -19,7 +19,7 @@ public:
static LogicalResult verifyTrait(Operation *op) {
// The rationale for this number is to prevent users from creating programs
// that would have catastrophic register pressure and cause the compiler to
// hang.
// hang.
// Since H100 has 256KB registers, we should allow users to create tensors
// of size up to 256K elements. It will spill for datatypes wider than 1B,
// but we probably should limit number of elements (rather than bytes) to
@@ -31,8 +31,8 @@ public:
for (int64_t s : tensorType.getShape())
numElements *= s;
if (numElements > maxElement)
return op->emitError("Maximum allowed number of elements is ") << maxElement << ", but "
<< *op << " has more than that";
return op->emitError("Maximum allowed number of elements is ")
<< maxElement << ", but " << *op << " has more than that";
if ((numElements & (numElements - 1)) != 0)
return op->emitError("Number of elements must be power-of-two, but ")
<< *op << " doesn't follow the rule";
@@ -45,8 +45,8 @@ public:
for (int64_t s : tensorType.getShape())
numElements *= s;
if (numElements > maxElement)
return op->emitError("Maximum allowed number of elements is ") << maxElement << ", but "
<< *op << " has more than that";
return op->emitError("Maximum allowed number of elements is ")
<< maxElement << ", but " << *op << " has more than that";
if ((numElements & (numElements - 1)) != 0)
return op->emitError("Number of elements must be power-of-two, but ")
<< *op << " doesn't follow the rule";
@@ -57,7 +57,7 @@ public:
}
};
}
}
} // namespace OpTrait
} // namespace mlir
#endif

View File

@@ -13,6 +13,6 @@ std::unique_ptr<Pass> createCombineOpsPass();
#define GEN_PASS_REGISTRATION
#include "triton/Dialect/Triton/Transforms/Passes.h.inc"
}
} // namespace mlir
#endif

View File

@@ -15,5 +15,4 @@
#define GET_OP_CLASSES
#include "triton/Dialect/TritonGPU/IR/Ops.h.inc"
#endif // TRITON_DIALECT_TRITONGPU_IR_DIALECT_H_

View File

@@ -14,6 +14,7 @@ namespace mlir {
class TritonGPUTypeConverter : public TypeConverter {
public:
TritonGPUTypeConverter(MLIRContext *context, int numThreads);
private:
MLIRContext *context;
int numThreads;
@@ -21,8 +22,10 @@ private:
class TritonGPUConversionTarget : public ConversionTarget {
TritonGPUTypeConverter &typeConverter;
public:
explicit TritonGPUConversionTarget(MLIRContext &ctx, TritonGPUTypeConverter &typeConverter);
explicit TritonGPUConversionTarget(MLIRContext &ctx,
TritonGPUTypeConverter &typeConverter);
/// update layouts & insert ConvertLayoutOp if necessary
LogicalResult refineLayouts(ModuleOp mod, int numThreads);

387
include/triton/driver/dispatch.h Executable file → Normal file
View File

@@ -3,10 +3,10 @@
#ifndef _TRITON_DRIVER_DISPATCH_H_
#define _TRITON_DRIVER_DISPATCH_H_
#include <type_traits>
#include <dlfcn.h>
#include <type_traits>
//CUDA Backend
// CUDA Backend
#include "triton/external/CUDA/cuda.h"
#include "triton/external/CUDA/nvml.h"
@@ -14,47 +14,43 @@
//#define __HIP_PLATFORM_AMD__
#include "triton/external/hip.h"
//Exceptions
// Exceptions
#include <iostream>
#include <stdexcept>
namespace llvm {
class PassRegistry;
class Module;
}
} // namespace llvm
namespace triton
{
namespace driver
{
namespace triton {
namespace driver {
class cu_context;
template<class T> void check(T){}
template <class T> void check(T) {}
void check(CUresult err);
void check(hipError_t err);
class dispatch
{
class dispatch {
protected:
template <class F>
struct return_type;
template <class F> struct return_type;
template <class R, class... A>
struct return_type<R (*)(A...)>
{ typedef R type; };
template <class R, class... A> struct return_type<R (*)(A...)> {
typedef R type;
};
typedef bool (*f_init_t)();
template<f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
{
template <f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type
f_impl(void *&lib_h, FunPtrT, void *&cache, const char *name, Args... args) {
initializer();
if(cache == nullptr){
if (cache == nullptr) {
cache = dlsym(lib_h, name);
if(cache == 0)
throw std::runtime_error("dlsym unable to load function");
}
if (cache == 0)
throw std::runtime_error("dlsym unable to load function");
}
FunPtrT fptr;
*reinterpret_cast<void **>(&fptr) = cache;
typename return_type<FunPtrT>::type res = (*fptr)(args...);
@@ -76,63 +72,99 @@ public:
// context management
static CUresult cuInit(unsigned int Flags);
static CUresult cuCtxDestroy_v2(CUcontext ctx);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags,
CUdevice dev);
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
static CUresult cuCtxGetDevice(CUdevice* result);
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int flags);
static CUresult cuCtxGetDevice(CUdevice *result);
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext,
unsigned int flags);
static CUresult cuDriverGetVersion(int *driverVersion);
// device management
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib,
CUdevice dev);
static CUresult cuDeviceGetCount(int *count);
// link management
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues);
static CUresult cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut);
static CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut);
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type,
void *data, size_t size, const char *name,
unsigned int numOptions,
CUjit_option *options, void **optionValues);
static CUresult cuLinkCreate_v2(unsigned int numOptions,
CUjit_option *options, void **optionValues,
CUlinkState *stateOut);
static CUresult cuLinkComplete(CUlinkState state, void **cubinOut,
size_t *sizeOut);
static CUresult cuLinkDestroy(CUlinkState state);
// module management
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t* bytes, CUmodule hmod, const char *name);
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes,
CUmodule hmod, const char *name);
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
static CUresult cuModuleLoadData(CUmodule* module, const void* image);
static CUresult cuModuleLoadData(CUmodule *module, const void *image);
static CUresult cuModuleUnload(CUmodule hmod);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image,
unsigned int numOptions,
CUjit_option *options,
void **optionValues);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod,
const char *name);
// stream management
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
static CUresult cuStreamSynchronize(CUstream hStream);
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx);
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
static CUresult cuStreamDestroy_v2(CUstream hStream);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY,
unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream,
void **kernelParams, void **extra);
// function management
static CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
static CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
static CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib,
CUfunction hfunc);
static CUresult cuFuncSetAttribute(CUfunction hfunc,
CUfunction_attribute attrib, int value);
static CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
// memory management
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
static CUresult cuPointerGetAttribute(void *data,
CUpointer_attribute attribute,
CUdeviceptr ptr);
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N,
CUstream stream);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice,
size_t ByteCount);
static CUresult cuMemFree_v2(CUdeviceptr dptr);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice,
size_t ByteCount, CUstream hStream);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice,
const void *srcHost, size_t ByteCount,
CUstream hStream);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost,
size_t ByteCount);
// event management
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart,
CUevent hEnd);
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
static CUresult cuEventDestroy_v2(CUevent hEvent);
/* ------------------- *
* NVML
* ------------------- */
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int mem_clock, unsigned int sm_clock);
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId,
nvmlDevice_t *device);
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device,
nvmlClockType_t type,
unsigned int *clock);
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device,
nvmlClockType_t type,
unsigned int *clock);
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device,
unsigned int mem_clock,
unsigned int sm_clock);
/* ------------------- *
* HIP
@@ -140,177 +172,198 @@ public:
// context management
static hipError_t hipInit(unsigned int Flags);
static hipError_t hipCtxDestroy(hipCtx_t ctx);
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags,
hipDevice_t dev);
static hipError_t hipCtxPushCurrent(hipCtx_t ctx);
static hipError_t hipCtxPopCurrent(hipCtx_t *pctx);
static hipError_t hipCtxGetDevice(hipDevice_t* result);
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext, unsigned int flags);
static hipError_t hipCtxGetDevice(hipDevice_t *result);
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext,
unsigned int flags);
static hipError_t hipDriverGetVersion(int *driverVersion);
// device management
static hipError_t hipGetDevice(hipDevice_t *device, int ordinal);
static hipError_t hipDeviceGetName(char *name, int len, hipDevice_t dev);
static hipError_t hipDeviceGetPCIBusId(char *id, int len, hipDevice_t dev);
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib,
hipDevice_t dev);
static hipError_t hipGetDeviceCount(int *count);
// module management
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t* bytes, hipModule_t hmod, const char *name);
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipModule_t hmod, const char *name);
static hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
static hipError_t hipModuleLoadData(hipModule_t* module, const void* image);
static hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
static hipError_t hipModuleUnload(hipModule_t hmod);
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name);
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
unsigned int numOptions,
hipJitOption *options,
void **optionValues);
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
const char *name);
// stream management
static hipError_t hipStreamCreate(hipStream_t *phStream, unsigned int Flags);
static hipError_t hipStreamSynchronize(hipStream_t hStream);
static hipError_t hipStreamDestroy(hipStream_t hStream);
static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
static hipError_t
hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY,
unsigned int blockDimZ, unsigned int sharedMemBytes,
hipStream_t hStream, void **kernelParams, void **extra);
// function management
static hipError_t hipFuncGetAttributes(hipFuncAttributes* attrib, void* hfunc);
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc, hipFuncAttribute attrib, int value);
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc, hipFuncCache_t config);
static hipError_t hipFuncGetAttributes(hipFuncAttributes *attrib,
void *hfunc);
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc,
hipFuncAttribute attrib, int value);
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc,
hipFuncCache_t config);
// memory management
static hipError_t hipMalloc(hipDeviceptr_t *dptr, size_t bytesize);
static hipError_t hipPointerGetAttribute(void * data, CUpointer_attribute attribute, hipDeviceptr_t ptr);
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x, size_t N, hipStream_t stream);
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
static hipError_t hipPointerGetAttribute(void *data,
CUpointer_attribute attribute,
hipDeviceptr_t ptr);
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x,
size_t N, hipStream_t stream);
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice,
size_t ByteCount);
static hipError_t hipFree(hipDeviceptr_t dptr);
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice,
size_t ByteCount, hipStream_t hStream);
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice,
const void *srcHost, size_t ByteCount,
hipStream_t hStream);
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost,
size_t ByteCount);
// event management
static hipError_t hipEventCreate(hipEvent_t *phEvent, unsigned int Flags);
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart,
hipEvent_t hEnd);
static hipError_t hipEventRecord(hipEvent_t hEvent, hipStream_t hStream);
static hipError_t hipEventDestroy(hipEvent_t hEvent);
private:
// Libraries
static void* cuda_;
static void* nvml_;
static void* hip_;
static void *cuda_;
static void *nvml_;
static void *hip_;
/* ------------------- *
* CUDA
* ------------------- */
// context management
static void* cuCtxGetCurrent_;
static void* cuCtxSetCurrent_;
static void* cuCtxDestroy_v2_;
static void* cuCtxCreate_v2_;
static void* cuCtxGetDevice_;
static void* cuCtxPushCurrent_v2_;
static void* cuCtxPopCurrent_v2_;
static void* cuCtxEnablePeerAccess_;
static void* cuDriverGetVersion_;
static void* cuInit_;
static void *cuCtxGetCurrent_;
static void *cuCtxSetCurrent_;
static void *cuCtxDestroy_v2_;
static void *cuCtxCreate_v2_;
static void *cuCtxGetDevice_;
static void *cuCtxPushCurrent_v2_;
static void *cuCtxPopCurrent_v2_;
static void *cuCtxEnablePeerAccess_;
static void *cuDriverGetVersion_;
static void *cuInit_;
// device management
static void* cuDeviceGet_;
static void* cuDeviceGetName_;
static void* cuDeviceGetPCIBusId_;
static void* cuDeviceGetAttribute_;
static void* cuDeviceGetCount_;
static void *cuDeviceGet_;
static void *cuDeviceGetName_;
static void *cuDeviceGetPCIBusId_;
static void *cuDeviceGetAttribute_;
static void *cuDeviceGetCount_;
// link management
static void* cuLinkAddData_v2_;
static void* cuLinkCreate_v2_;
static void* cuLinkDestroy_;
static void* cuLinkComplete_;
static void *cuLinkAddData_v2_;
static void *cuLinkCreate_v2_;
static void *cuLinkDestroy_;
static void *cuLinkComplete_;
// module management
static void* cuModuleGetGlobal_v2_;
static void* cuModuleLoad_;
static void* cuModuleUnload_;
static void* cuModuleLoadDataEx_;
static void* cuModuleLoadData_;
static void* cuModuleGetFunction_;
static void *cuModuleGetGlobal_v2_;
static void *cuModuleLoad_;
static void *cuModuleUnload_;
static void *cuModuleLoadDataEx_;
static void *cuModuleLoadData_;
static void *cuModuleGetFunction_;
// stream management
static void* cuStreamCreate_;
static void* cuStreamSynchronize_;
static void* cuStreamDestroy_v2_;
static void* cuStreamGetCtx_;
static void* cuLaunchKernel_;
static void *cuStreamCreate_;
static void *cuStreamSynchronize_;
static void *cuStreamDestroy_v2_;
static void *cuStreamGetCtx_;
static void *cuLaunchKernel_;
// function management
static void* cuFuncGetAttribute_;
static void* cuFuncSetAttribute_;
static void* cuFuncSetCacheConfig_;
static void *cuFuncGetAttribute_;
static void *cuFuncSetAttribute_;
static void *cuFuncSetCacheConfig_;
// memory management
static void* cuMemcpyDtoH_v2_;
static void* cuMemFree_v2_;
static void* cuMemcpyDtoHAsync_v2_;
static void* cuMemcpyHtoDAsync_v2_;
static void* cuMemcpyHtoD_v2_;
static void* cuMemAlloc_v2_;
static void* cuMemsetD8Async_;
static void* cuPointerGetAttribute_;
static void *cuMemcpyDtoH_v2_;
static void *cuMemFree_v2_;
static void *cuMemcpyDtoHAsync_v2_;
static void *cuMemcpyHtoDAsync_v2_;
static void *cuMemcpyHtoD_v2_;
static void *cuMemAlloc_v2_;
static void *cuMemsetD8Async_;
static void *cuPointerGetAttribute_;
// event management
static void* cuEventCreate_;
static void* cuEventElapsedTime_;
static void* cuEventRecord_;
static void* cuEventDestroy_v2_;
static void *cuEventCreate_;
static void *cuEventElapsedTime_;
static void *cuEventRecord_;
static void *cuEventDestroy_v2_;
/* ------------------- *
* NVML
* ------------------- */
static void* nvmlInit_v2_;
static void* nvmlDeviceGetHandleByPciBusId_v2_;
static void* nvmlDeviceGetClockInfo_;
static void* nvmlDeviceGetMaxClockInfo_;
static void* nvmlDeviceSetApplicationsClocks_;
static void *nvmlInit_v2_;
static void *nvmlDeviceGetHandleByPciBusId_v2_;
static void *nvmlDeviceGetClockInfo_;
static void *nvmlDeviceGetMaxClockInfo_;
static void *nvmlDeviceSetApplicationsClocks_;
/* ------------------- *
* HIP
* ------------------- */
// context management
static void* hipInit_;
static void* hipCtxDestroy_;
static void* hipCtxCreate_;
static void* hipCtxPushCurrent_;
static void* hipCtxPopCurrent_;
static void* hipCtxGetDevice_;
static void* hipCtxEnablePeerAccess_;
static void* hipDriverGetVersion_;
static void *hipInit_;
static void *hipCtxDestroy_;
static void *hipCtxCreate_;
static void *hipCtxPushCurrent_;
static void *hipCtxPopCurrent_;
static void *hipCtxGetDevice_;
static void *hipCtxEnablePeerAccess_;
static void *hipDriverGetVersion_;
// device management
static void* hipGetDevice_;
static void* hipDeviceGetName_;
static void* hipDeviceGetPCIBusId_;
static void* hipDeviceGetAttribute_;
static void* hipGetDeviceCount_;
static void *hipGetDevice_;
static void *hipDeviceGetName_;
static void *hipDeviceGetPCIBusId_;
static void *hipDeviceGetAttribute_;
static void *hipGetDeviceCount_;
// module management
static void* hipModuleGetGlobal_;
static void* hipModuleLoad_;
static void* hipModuleLoadData_;
static void* hipModuleUnload_;
static void* hipModuleLoadDataEx_;
static void* hipModuleGetFunction_;
static void *hipModuleGetGlobal_;
static void *hipModuleLoad_;
static void *hipModuleLoadData_;
static void *hipModuleUnload_;
static void *hipModuleLoadDataEx_;
static void *hipModuleGetFunction_;
// stream management
static void* hipStreamCreate_;
static void* hipStreamSynchronize_;
static void* hipStreamDestroy_;
static void* hipModuleLaunchKernel_;;
static void *hipStreamCreate_;
static void *hipStreamSynchronize_;
static void *hipStreamDestroy_;
static void *hipModuleLaunchKernel_;
;
// function management
static void* hipFuncGetAttributes_;
static void* hipFuncSetAttribute_;
static void* hipFuncSetCacheConfig_;
static void *hipFuncGetAttributes_;
static void *hipFuncSetAttribute_;
static void *hipFuncSetCacheConfig_;
// memory management
static void* hipMalloc_;
static void* hipPointerGetAttribute_;
static void* hipMemsetD8Async_;
static void* hipMemcpyDtoH_;
static void* hipFree_;
static void* hipMemcpyDtoHAsync_;
static void* hipMemcpyHtoDAsync_;
static void* hipMemcpyHtoD_;
static void *hipMalloc_;
static void *hipPointerGetAttribute_;
static void *hipMemsetD8Async_;
static void *hipMemcpyDtoH_;
static void *hipFree_;
static void *hipMemcpyDtoHAsync_;
static void *hipMemcpyHtoDAsync_;
static void *hipMemcpyHtoD_;
// event management
static void* hipEventCreate_;
static void* hipEventElapsedTime_;
static void* hipEventRecord_;
static void* hipEventDestroy_;
static void *hipEventCreate_;
static void *hipEventElapsedTime_;
static void *hipEventRecord_;
static void *hipEventDestroy_;
};
}
}
} // namespace driver
} // namespace triton
#endif

415
include/triton/driver/error.h Executable file → Normal file
View File

@@ -3,223 +3,252 @@
#ifndef _TRITON_DRIVER_ERROR_H_
#define _TRITON_DRIVER_ERROR_H_
#include <exception>
#include "triton/driver/dispatch.h"
#include <exception>
namespace triton {
namespace triton
{
namespace driver {
namespace driver
{
namespace exception {
namespace exception
{
namespace nvrtc {
namespace nvrtc
{
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
class name : public std::exception { \
public: \
const char *what() const throw() override { return "NVRTC: Error- " msg; } \
}
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
class name: public std::exception { public: const char * what() const throw() override { return "NVRTC: Error- " msg; } }
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
TRITON_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure,
"program creation failure");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input, "invalid input");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program, "invalid program");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option, "invalid option");
TRITON_CREATE_NVRTC_EXCEPTION(compilation, "compilation");
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure,
"builtin operation failure");
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error, "unknown error");
#undef TRITON_CREATE_NVRTC_EXCEPTION
} // namespace nvrtc
namespace cuda {
class base : public std::exception {};
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { return "CUDA: Error- " msg; } \
}
namespace cuda
{
class base: public std::exception{};
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
class name: public base { public:const char * what() const throw() override { return "CUDA: Error- " msg; } }
TRITON_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
TRITON_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
TRITON_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
TRITON_CREATE_CUDA_EXCEPTION(no_device ,"no device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
TRITON_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
TRITON_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
TRITON_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
TRITON_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
TRITON_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
TRITON_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
TRITON_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
TRITON_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
TRITON_CREATE_CUDA_EXCEPTION(not_found ,"not found");
TRITON_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
TRITON_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
TRITON_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
TRITON_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
TRITON_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
TRITON_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
TRITON_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
TRITON_CREATE_CUDA_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_CUDA_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUDA_EXCEPTION(deinitialized, "deinitialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled, "profiler disabled");
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized,
"profiler not initialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started,
"profiler already started");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped,
"profiler already stopped");
TRITON_CREATE_CUDA_EXCEPTION(no_device, "no device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_device, "invalid device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_image, "invalid image");
TRITON_CREATE_CUDA_EXCEPTION(invalid_context, "invalid context");
TRITON_CREATE_CUDA_EXCEPTION(context_already_current,
"context already current");
TRITON_CREATE_CUDA_EXCEPTION(map_failed, "map failed");
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed, "unmap failed");
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped, "array is mapped");
TRITON_CREATE_CUDA_EXCEPTION(already_mapped, "already mapped");
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
TRITON_CREATE_CUDA_EXCEPTION(already_acquired, "already acquired");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped, "not mapped");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array, "not mapped as array");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit, "unsupported limit");
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use, "context already in use");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported,
"peer access unsupported");
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx, "invalid ptx");
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context,
"invalid graphics context");
TRITON_CREATE_CUDA_EXCEPTION(invalid_source, "invalid source");
TRITON_CREATE_CUDA_EXCEPTION(file_not_found, "file not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found,
"shared object symbol not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed,
"shared object init failed");
TRITON_CREATE_CUDA_EXCEPTION(operating_system, "operating system");
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle, "invalid handle");
TRITON_CREATE_CUDA_EXCEPTION(not_found, "not found");
TRITON_CREATE_CUDA_EXCEPTION(not_ready, "not ready");
TRITON_CREATE_CUDA_EXCEPTION(illegal_address, "illegal address");
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources,
"launch out of resources");
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout, "launch timeout");
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing,
"launch incompatible texturing");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled,
"peer access already enabled");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled,
"peer access not enabled");
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active, "primary context active");
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed, "context is destroyed");
TRITON_CREATE_CUDA_EXCEPTION(assert_error, "assert");
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers, "too many peers");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered,
"host memory already registered");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered,
"hot memory not registered");
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error, "hardware stack error");
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction, "illegal instruction");
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address, "misaligned address");
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space, "invalid address space");
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc, "invalid pc");
TRITON_CREATE_CUDA_EXCEPTION(launch_failed, "launch failed");
TRITON_CREATE_CUDA_EXCEPTION(not_permitted, "not permitted");
TRITON_CREATE_CUDA_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUDA_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUDA_EXCEPTION
} // namespace cuda
namespace cublas {
class base : public std::exception {};
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { \
return "CUBLAS: Error- " msg; \
} \
}
namespace cublas
{
class base: public std::exception{};
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
class name: public base { public: const char * what() const throw() override { return "CUBLAS: Error- " msg; } }
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
TRITON_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
TRITON_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed, "alloc failed");
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch, "arch mismatch");
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error, "mapping error");
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed, "execution failed");
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error, "internal error");
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUBLAS_EXCEPTION(license_error, "license error");
TRITON_CREATE_CUBLAS_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUBLAS_EXCEPTION
} // namespace cublas
namespace cudnn {
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
class name : public std::exception { \
public: \
const char *what() const throw() override { return "CUDNN: Error- " msg; } \
}
namespace cudnn
{
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
class name: public std::exception { public: const char * what() const throw() override { return "CUDNN: Error- " msg; } }
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed, "allocation failed");
TRITON_CREATE_CUDNN_EXCEPTION(bad_param, "bad param");
TRITON_CREATE_CUDNN_EXCEPTION(internal_error, "internal error");
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch, "arch mismatch");
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error, "mapping error");
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed, "execution failed");
TRITON_CREATE_CUDNN_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUDNN_EXCEPTION(license_error, "license error");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing,
"prerequisite missing");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress, "runtime in progress");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow, "runtime fp overflow");
} // namespace cudnn
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized ,"not initialized");
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed ,"allocation failed");
TRITON_CREATE_CUDNN_EXCEPTION(bad_param ,"bad param");
TRITON_CREATE_CUDNN_EXCEPTION(internal_error ,"internal error");
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value ,"invalid value");
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch ,"arch mismatch");
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error ,"mapping error");
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed ,"execution failed");
TRITON_CREATE_CUDNN_EXCEPTION(not_supported ,"not supported");
TRITON_CREATE_CUDNN_EXCEPTION(license_error ,"license error");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing ,"prerequisite missing");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress ,"runtime in progress");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
namespace hip {
class base : public std::exception {};
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { return "HIP: Error- " msg; } \
}
namespace hip
{
class base: public std::exception{};
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
class name: public base { public:const char * what() const throw() override { return "HIP: Error- " msg; } }
TRITON_CREATE_HIP_EXCEPTION(invalid_value ,"invalid value");
TRITON_CREATE_HIP_EXCEPTION(out_of_memory ,"out of memory");
TRITON_CREATE_HIP_EXCEPTION(not_initialized ,"not initialized");
TRITON_CREATE_HIP_EXCEPTION(deinitialized ,"deinitialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled ,"profiler disabled");
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started ,"profiler already started");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
TRITON_CREATE_HIP_EXCEPTION(no_device ,"no device");
TRITON_CREATE_HIP_EXCEPTION(invalid_device ,"invalid device");
TRITON_CREATE_HIP_EXCEPTION(invalid_image ,"invalid image");
TRITON_CREATE_HIP_EXCEPTION(invalid_context ,"invalid context");
TRITON_CREATE_HIP_EXCEPTION(context_already_current ,"context already current");
TRITON_CREATE_HIP_EXCEPTION(map_failed ,"map failed");
TRITON_CREATE_HIP_EXCEPTION(unmap_failed ,"unmap failed");
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped ,"array is mapped");
TRITON_CREATE_HIP_EXCEPTION(already_mapped ,"already mapped");
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
TRITON_CREATE_HIP_EXCEPTION(already_acquired ,"already acquired");
TRITON_CREATE_HIP_EXCEPTION(not_mapped ,"not mapped");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array ,"not mapped as array");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit ,"unsupported limit");
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use ,"context already in use");
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx ,"invalid ptx");
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
TRITON_CREATE_HIP_EXCEPTION(invalid_source ,"invalid source");
TRITON_CREATE_HIP_EXCEPTION(file_not_found ,"file not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed ,"shared object init failed");
TRITON_CREATE_HIP_EXCEPTION(operating_system ,"operating system");
TRITON_CREATE_HIP_EXCEPTION(invalid_handle ,"invalid handle");
TRITON_CREATE_HIP_EXCEPTION(not_found ,"not found");
TRITON_CREATE_HIP_EXCEPTION(not_ready ,"not ready");
TRITON_CREATE_HIP_EXCEPTION(illegal_address ,"illegal address");
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources ,"launch out of resources");
TRITON_CREATE_HIP_EXCEPTION(launch_timeout ,"launch timeout");
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
TRITON_CREATE_HIP_EXCEPTION(primary_context_active ,"primary context active");
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed ,"context is destroyed");
TRITON_CREATE_HIP_EXCEPTION(assert_error ,"assert");
TRITON_CREATE_HIP_EXCEPTION(too_many_peers ,"too many peers");
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered ,"host memory already registered");
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error ,"hardware stack error");
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction ,"illegal instruction");
TRITON_CREATE_HIP_EXCEPTION(misaligned_address ,"misaligned address");
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space ,"invalid address space");
TRITON_CREATE_HIP_EXCEPTION(invalid_pc ,"invalid pc");
TRITON_CREATE_HIP_EXCEPTION(launch_failed ,"launch failed");
TRITON_CREATE_HIP_EXCEPTION(not_permitted ,"not permitted");
TRITON_CREATE_HIP_EXCEPTION(not_supported ,"not supported");
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol ,"invalid symbol");
TRITON_CREATE_HIP_EXCEPTION(unknown ,"unknown");
TRITON_CREATE_HIP_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_HIP_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_HIP_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_HIP_EXCEPTION(deinitialized, "deinitialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled, "profiler disabled");
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized,
"profiler not initialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started,
"profiler already started");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped,
"profiler already stopped");
TRITON_CREATE_HIP_EXCEPTION(no_device, "no device");
TRITON_CREATE_HIP_EXCEPTION(invalid_device, "invalid device");
TRITON_CREATE_HIP_EXCEPTION(invalid_image, "invalid image");
TRITON_CREATE_HIP_EXCEPTION(invalid_context, "invalid context");
TRITON_CREATE_HIP_EXCEPTION(context_already_current, "context already current");
TRITON_CREATE_HIP_EXCEPTION(map_failed, "map failed");
TRITON_CREATE_HIP_EXCEPTION(unmap_failed, "unmap failed");
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped, "array is mapped");
TRITON_CREATE_HIP_EXCEPTION(already_mapped, "already mapped");
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
TRITON_CREATE_HIP_EXCEPTION(already_acquired, "already acquired");
TRITON_CREATE_HIP_EXCEPTION(not_mapped, "not mapped");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array, "not mapped as array");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit, "unsupported limit");
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use, "context already in use");
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported, "peer access unsupported");
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx, "invalid ptx");
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context,
"invalid graphics context");
TRITON_CREATE_HIP_EXCEPTION(invalid_source, "invalid source");
TRITON_CREATE_HIP_EXCEPTION(file_not_found, "file not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found,
"shared object symbol not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed,
"shared object init failed");
TRITON_CREATE_HIP_EXCEPTION(operating_system, "operating system");
TRITON_CREATE_HIP_EXCEPTION(invalid_handle, "invalid handle");
TRITON_CREATE_HIP_EXCEPTION(not_found, "not found");
TRITON_CREATE_HIP_EXCEPTION(not_ready, "not ready");
TRITON_CREATE_HIP_EXCEPTION(illegal_address, "illegal address");
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources, "launch out of resources");
TRITON_CREATE_HIP_EXCEPTION(launch_timeout, "launch timeout");
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing,
"launch incompatible texturing");
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled,
"peer access already enabled");
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled, "peer access not enabled");
TRITON_CREATE_HIP_EXCEPTION(primary_context_active, "primary context active");
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed, "context is destroyed");
TRITON_CREATE_HIP_EXCEPTION(assert_error, "assert");
TRITON_CREATE_HIP_EXCEPTION(too_many_peers, "too many peers");
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered,
"host memory already registered");
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered,
"hot memory not registered");
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error, "hardware stack error");
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction, "illegal instruction");
TRITON_CREATE_HIP_EXCEPTION(misaligned_address, "misaligned address");
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space, "invalid address space");
TRITON_CREATE_HIP_EXCEPTION(invalid_pc, "invalid pc");
TRITON_CREATE_HIP_EXCEPTION(launch_failed, "launch failed");
TRITON_CREATE_HIP_EXCEPTION(not_permitted, "not permitted");
TRITON_CREATE_HIP_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol, "invalid symbol");
TRITON_CREATE_HIP_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUDA_EXCEPTION
}
} // namespace hip
}
}
}
} // namespace exception
} // namespace driver
} // namespace triton
#endif

View File

@@ -1,20 +1,21 @@
#include <string>
#include "triton/driver/dispatch.h"
#include <string>
namespace llvm{
namespace llvm {
class Module;
}
namespace triton{
namespace driver{
namespace triton {
namespace driver {
void init_llvm();
std::string path_to_ptxas(int& version);
std::string llir_to_ptx(llvm::Module* module, int cc, int version);
std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas_path, int cc);
CUmodule ptx_to_cumodule(const std::string& ptx, int cc);
std::string llir_to_amdgpu(llvm::Module* module, const std::string& proc);
hipModule_t amdgpu_to_hipmodule(const std::string& path);
std::string path_to_ptxas(int &version);
std::string llir_to_ptx(llvm::Module *module, int cc, int version);
std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas_path,
int cc);
CUmodule ptx_to_cumodule(const std::string &ptx, int cc);
std::string llir_to_amdgpu(llvm::Module *module, const std::string &proc);
hipModule_t amdgpu_to_hipmodule(const std::string &path);
}
}
} // namespace driver
} // namespace triton

View File

@@ -3,52 +3,55 @@
#ifndef _TRITON_TOOLS_BENCH_H_
#define _TRITON_TOOLS_BENCH_H_
#include <chrono>
#include <functional>
#include <algorithm>
#include "triton/driver/device.h"
#include "triton/driver/stream.h"
#include <algorithm>
#include <chrono>
#include <functional>
namespace triton{
namespace tools{
namespace triton {
namespace tools {
class timer{
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::nanoseconds nanoseconds;
class timer {
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::nanoseconds nanoseconds;
public:
explicit timer(bool run = false)
{ if (run) start(); }
explicit timer(bool run = false) {
if (run)
start();
}
void start()
{ _start = high_resolution_clock::now(); }
void start() { _start = high_resolution_clock::now(); }
nanoseconds get() const
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
nanoseconds get() const {
return std::chrono::duration_cast<nanoseconds>(
high_resolution_clock::now() - _start);
}
private:
high_resolution_clock::time_point _start;
high_resolution_clock::time_point _start;
};
inline double bench(std::function<void()> const & op, driver::stream * stream, size_t warmup = 10, size_t repeat = 200)
{
inline double bench(std::function<void()> const &op, driver::stream *stream,
size_t warmup = 10, size_t repeat = 200) {
timer tmr;
std::vector<size_t> times;
double total_time = 0;
for(size_t i = 0; i < warmup; i++)
for (size_t i = 0; i < warmup; i++)
op();
stream->synchronize();
tmr.start();
for(size_t i = 0; i < repeat; i++){
for (size_t i = 0; i < repeat; i++) {
op();
}
stream->synchronize();
return (float)tmr.get().count() / repeat;
// return *std::min_element(times.begin(), times.end());
// return *std::min_element(times.begin(), times.end());
}
}
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -3,16 +3,15 @@
#ifndef _TRITON_TOOLS_THREAD_GRAPH_H_
#define _TRITON_TOOLS_THREAD_GRAPH_H_
#include <iostream>
#include <map>
#include <set>
#include <vector>
#include <iostream>
namespace triton {
namespace tools{
namespace tools {
template<class node_t>
class graph {
template <class node_t> class graph {
typedef std::map<node_t, std::set<node_t>> edges_t;
public:
@@ -21,27 +20,27 @@ public:
private:
void connected_components_impl(node_t x, std::set<node_t> &nodes,
nmap_t* nmap, cmap_t* cmap, int id) const {
if(nmap)
nmap_t *nmap, cmap_t *cmap, int id) const {
if (nmap)
(*nmap)[x] = id;
if(cmap)
if (cmap)
(*cmap)[id].push_back(x);
if(nodes.find(x) != nodes.end()) {
if (nodes.find(x) != nodes.end()) {
nodes.erase(x);
for(const node_t &y: edges_.at(x))
for (const node_t &y : edges_.at(x))
connected_components_impl(y, nodes, nmap, cmap, id);
}
}
public:
void connected_components(cmap_t *cmap, nmap_t *nmap) const {
if(cmap)
if (cmap)
cmap->clear();
if(nmap)
if (nmap)
nmap->clear();
std::set<node_t> nodes = nodes_;
unsigned id = 0;
while(!nodes.empty()){
while (!nodes.empty()) {
connected_components_impl(*nodes.begin(), nodes, nmap, cmap, id++);
}
}
@@ -63,7 +62,7 @@ private:
edges_t edges_;
};
}
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -33,154 +33,140 @@
#ifndef _TRITON_TOOLS_SHA1_HPP_
#define _TRITON_TOOLS_SHA1_HPP_
namespace sha1
namespace sha1 {
namespace // local
{
namespace // local
{
// Rotate an integer value to left.
inline unsigned int rol(const unsigned int value,
const unsigned int steps)
{
return ((value << steps) | (value >> (32 - steps)));
}
// Rotate an integer value to left.
inline unsigned int rol(const unsigned int value, const unsigned int steps) {
return ((value << steps) | (value >> (32 - steps)));
}
// Sets the first 16 integers in the buffert to zero.
// Used for clearing the W buffert.
inline void clearWBuffert(unsigned int* buffert)
{
for (int pos = 16; --pos >= 0;)
{
buffert[pos] = 0;
}
}
// Sets the first 16 integers in the buffert to zero.
// Used for clearing the W buffert.
inline void clearWBuffert(unsigned int *buffert) {
for (int pos = 16; --pos >= 0;) {
buffert[pos] = 0;
}
}
inline void innerHash(unsigned int* result, unsigned int* w)
{
unsigned int a = result[0];
unsigned int b = result[1];
unsigned int c = result[2];
unsigned int d = result[3];
unsigned int e = result[4];
inline void innerHash(unsigned int *result, unsigned int *w) {
unsigned int a = result[0];
unsigned int b = result[1];
unsigned int c = result[2];
unsigned int d = result[3];
unsigned int e = result[4];
int round = 0;
int round = 0;
#define sha1macro(func,val) \
{ \
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
e = d; \
d = c; \
c = rol(b, 30); \
b = a; \
a = t; \
}
#define sha1macro(func, val) \
{ \
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
e = d; \
d = c; \
c = rol(b, 30); \
b = a; \
a = t; \
}
while (round < 16)
{
sha1macro((b & c) | (~b & d), 0x5a827999)
++round;
}
while (round < 20)
{
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (~b & d), 0x5a827999)
++round;
}
while (round < 40)
{
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0x6ed9eba1)
++round;
}
while (round < 60)
{
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)
++round;
}
while (round < 80)
{
w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0xca62c1d6)
++round;
}
while (round < 16) {
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
}
while (round < 20) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
}
while (round < 40) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0x6ed9eba1)++ round;
}
while (round < 60) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)++ round;
}
while (round < 80) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0xca62c1d6)++ round;
}
#undef sha1macro
#undef sha1macro
result[0] += a;
result[1] += b;
result[2] += c;
result[3] += d;
result[4] += e;
}
} // namespace
result[0] += a;
result[1] += b;
result[2] += c;
result[3] += d;
result[4] += e;
}
} // namespace
inline void calc(const void* src, const int bytelength, unsigned char* hash)
{
// Init the result array.
unsigned int result[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 };
inline void calc(const void *src, const int bytelength, unsigned char *hash) {
// Init the result array.
unsigned int result[5] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476,
0xc3d2e1f0};
// Cast the void src pointer to be the byte array we can work with.
const unsigned char* sarray = (const unsigned char*) src;
// Cast the void src pointer to be the byte array we can work with.
const unsigned char *sarray = (const unsigned char *)src;
// The reusable round buffer
unsigned int w[80];
// The reusable round buffer
unsigned int w[80];
// Loop through all complete 64byte blocks.
const int endOfFullBlocks = bytelength - 64;
int endCurrentBlock;
int currentBlock = 0;
// Loop through all complete 64byte blocks.
const int endOfFullBlocks = bytelength - 64;
int endCurrentBlock;
int currentBlock = 0;
while (currentBlock <= endOfFullBlocks)
{
endCurrentBlock = currentBlock + 64;
while (currentBlock <= endOfFullBlocks) {
endCurrentBlock = currentBlock + 64;
// Init the round buffer with the 64 byte block data.
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4)
{
// This line will swap endian on big endian and keep endian on little endian.
w[roundPos++] = (unsigned int) sarray[currentBlock + 3]
| (((unsigned int) sarray[currentBlock + 2]) << 8)
| (((unsigned int) sarray[currentBlock + 1]) << 16)
| (((unsigned int) sarray[currentBlock]) << 24);
}
innerHash(result, w);
}
// Handle the last and not full 64 byte block if existing.
endCurrentBlock = bytelength - currentBlock;
clearWBuffert(w);
int lastBlockBytes = 0;
for (;lastBlockBytes < endCurrentBlock; ++lastBlockBytes)
{
w[lastBlockBytes >> 2] |= (unsigned int) sarray[lastBlockBytes + currentBlock] << ((3 - (lastBlockBytes & 3)) << 3);
}
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
if (endCurrentBlock >= 56)
{
innerHash(result, w);
clearWBuffert(w);
}
w[15] = bytelength << 3;
innerHash(result, w);
// Store hash in result pointer, and make sure we get in in the correct order on both endian models.
for (int hashByte = 20; --hashByte >= 0;)
{
hash[hashByte] = (result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
}
// Init the round buffer with the 64 byte block data.
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4) {
// This line will swap endian on big endian and keep endian on little
// endian.
w[roundPos++] = (unsigned int)sarray[currentBlock + 3] |
(((unsigned int)sarray[currentBlock + 2]) << 8) |
(((unsigned int)sarray[currentBlock + 1]) << 16) |
(((unsigned int)sarray[currentBlock]) << 24);
}
innerHash(result, w);
}
inline void toHexString(const unsigned char* hash, char* hexstring)
{
const char hexDigits[] = { "0123456789abcdef" };
// Handle the last and not full 64 byte block if existing.
endCurrentBlock = bytelength - currentBlock;
clearWBuffert(w);
int lastBlockBytes = 0;
for (; lastBlockBytes < endCurrentBlock; ++lastBlockBytes) {
w[lastBlockBytes >> 2] |=
(unsigned int)sarray[lastBlockBytes + currentBlock]
<< ((3 - (lastBlockBytes & 3)) << 3);
}
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
if (endCurrentBlock >= 56) {
innerHash(result, w);
clearWBuffert(w);
}
w[15] = bytelength << 3;
innerHash(result, w);
for (int hashByte = 20; --hashByte >= 0;)
{
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
}
hexstring[40] = 0;
}
// Store hash in result pointer, and make sure we get in in the correct order
// on both endian models.
for (int hashByte = 20; --hashByte >= 0;) {
hash[hashByte] =
(result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
}
}
inline void toHexString(const unsigned char *hash, char *hexstring) {
const char hexDigits[] = {"0123456789abcdef"};
for (int hashByte = 20; --hashByte >= 0;) {
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
}
hexstring[40] = 0;
}
} // namespace sha1
#endif

View File

@@ -7,11 +7,8 @@
#include <stdexcept>
#include <string>
namespace triton
{
namespace tools
{
namespace triton {
namespace tools {
#ifdef _WIN32
#define popen _popen
@@ -19,12 +16,12 @@ namespace tools
#endif
#ifndef WEXITSTATUS
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val) & 255)
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val)&255)
#endif
int exec(const std::string& cmd, std::string& result) {
int exec(const std::string &cmd, std::string &result) {
char buffer[128];
FILE* pipe = popen(cmd.c_str(), "r");
FILE *pipe = popen(cmd.c_str(), "r");
if (!pipe)
return 0;
result.clear();
@@ -37,10 +34,9 @@ int exec(const std::string& cmd, std::string& result) {
}
int status = pclose(pipe);
return WEXITSTATUS(status);
}
}
}
} // namespace tools
} // namespace triton
#endif

27
include/triton/tools/sys/getenv.hpp Executable file → Normal file
View File

@@ -22,26 +22,23 @@
#ifndef TDL_TOOLS_SYS_GETENV_HPP
#define TDL_TOOLS_SYS_GETENV_HPP
#include <string>
#include <cstdlib>
#include <string>
namespace triton
{
namespace triton {
namespace tools
{
inline std::string getenv(const char * name)
{
const char * cstr = std::getenv(name);
if(!cstr)
return "";
std::string result(cstr);
return result;
}
namespace tools {
inline std::string getenv(const char *name) {
const char *cstr = std::getenv(name);
if (!cstr)
return "";
std::string result(cstr);
return result;
}
}
} // namespace tools
} // namespace triton
#endif

74
include/triton/tools/sys/mkdir.hpp Executable file → Normal file
View File

@@ -22,55 +22,49 @@
#ifndef TDL_TOOLS_SYS_MKDIR_HPP
#define TDL_TOOLS_SYS_MKDIR_HPP
#include <cstring>
#include <string>
#include <cstdlib>
#include <sys/stat.h>
#include <cstring>
#include <errno.h>
#include <string>
#include <sys/stat.h>
#if defined(_WIN32)
#include <direct.h>
#include <direct.h>
#endif
namespace triton
{
namespace triton {
namespace tools
{
inline int mkdir(std::string const & path)
{
#if defined(_WIN32)
return _mkdir(path.c_str());
#else
return ::mkdir(path.c_str(), 0777);
#endif
}
inline int mkpath(std::string const & path)
{
int status = 0;
size_t pp = 0;
size_t sp;
while ((sp = path.find('/', pp)) != std::string::npos)
{
if (sp != pp){
status = mkdir(path.substr(0, sp));
}
pp = sp + 1;
}
return (status==0 || errno==EEXIST)?0:-1;
}
inline int mtime(std::string const & path)
{
struct stat st;
if(stat(path.c_str(), &st) != 0)
return 0;
return st.st_mtime;
}
namespace tools {
inline int mkdir(std::string const &path) {
#if defined(_WIN32)
return _mkdir(path.c_str());
#else
return ::mkdir(path.c_str(), 0777);
#endif
}
inline int mkpath(std::string const &path) {
int status = 0;
size_t pp = 0;
size_t sp;
while ((sp = path.find('/', pp)) != std::string::npos) {
if (sp != pp) {
status = mkdir(path.substr(0, sp));
}
pp = sp + 1;
}
return (status == 0 || errno == EEXIST) ? 0 : -1;
}
inline int mtime(std::string const &path) {
struct stat st;
if (stat(path.c_str(), &st) != 0)
return 0;
return st.st_mtime;
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -3,88 +3,79 @@
#ifndef _TRITON_TOOLS_THREAD_POOL_H_
#define _TRITON_TOOLS_THREAD_POOL_H_
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <thread>
#include <vector>
class ThreadPool {
public:
ThreadPool(size_t threads)
: stop(false) {
for(size_t i = 0;i < threads;++i)
workers.emplace_back(
[this] {
for(;;){
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(lock,
[this]{ return this->stop || !this->tasks.empty(); });
if(this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
}
);
}
ThreadPool(size_t threads) : stop(false) {
for (size_t i = 0; i < threads; ++i)
workers.emplace_back([this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(
lock, [this] { return this->stop || !this->tasks.empty(); });
if (this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
});
}
template <class F, class... Args>
auto enqueue(F &&f, Args &&... args)
-> std::future<typename std::result_of<F(Args...)>::type> {
using return_type = typename std::result_of<F(Args...)>::type;
template<class F, class... Args>
auto enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>
auto task = std::make_shared<std::packaged_task<return_type()>>(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task->get_future();
{
using return_type = typename std::result_of<F(Args...)>::type;
std::unique_lock<std::mutex> lock(queue_mutex);
auto task = std::make_shared< std::packaged_task<return_type()> >(
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
);
// don't allow enqueueing after stopping the pool
if (stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
if(stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task](){ (*task)(); });
}
condition.notify_one();
return res;
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for(std::thread &worker: workers)
worker.join();
~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread &worker : workers)
worker.join();
}
private:
// need to keep track of threads so we can join them
std::vector< std::thread > workers;
// the task queue
std::queue< std::function<void()> > tasks;
// need to keep track of threads so we can join them
std::vector<std::thread> workers;
// the task queue
std::queue<std::function<void()>> tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
#endif