[RUNTIME] Major code cleanup (#711)

This PR does the following:
- CUDA utilities (e.g., cuGetInfo) won't be compiled as part of libtriton.so anymore.
- Refactoring driver/llvm.cc to split it between PTX codegen and python.
- By extension this will also deprecate include/external so Triton won't have to live with a copy of some CUDA/Hip headers anymore.
- `triton-translate` becomes a `triton.tools.aot` Python utility that re-uses functions from the triton.compile sub-module.
This commit is contained in:
Philippe Tillet
2022-09-26 16:38:06 -07:00
committed by GitHub
parent 8bb09f83ee
commit 1e91ed30d0
28 changed files with 509 additions and 31483 deletions

View File

@@ -1,34 +1,17 @@
#ifndef TRITON_TARGET_PTXTRANSLATION_H
#define TRITON_TARGET_PTXTRANSLATION_H
#include "triton/driver/dispatch.h"
#include <memory>
#include <string>
namespace mlir {
class ModuleOp;
} // namespace mlir
namespace llvm {
class Module;
} // namespace llvm
namespace triton {
template <CUdevice_attribute attr> int cuGetInfo(CUdevice device) {
int res;
driver::dispatch::cuDeviceGetAttribute(&res, attr, device);
return res;
}
void getCuCCAndVersionFromDevice(uint64_t device, int *cc, int *version,
std::string *ptxasPath);
// Translate TritonGPU IR to PTX code.
std::tuple<std::string, // ptx code
size_t, // PTX cc
int, // PTX version
std::string // ptxas path
>
translateTritonGPUToPTX(mlir::ModuleOp module, uint64_t device);
std::string translateLLVMIRToPTX(llvm::Module &module, int cc, int version);
} // namespace triton

View File

@@ -1,376 +0,0 @@
#pragma once
#ifndef _TRITON_DRIVER_DISPATCH_H_
#define _TRITON_DRIVER_DISPATCH_H_
#include <dlfcn.h>
#include <type_traits>
// CUDA Backend
#include "triton/external/CUDA/cuda.h"
#include "triton/external/CUDA/nvml.h"
//// HIP backend
//#define __HIP_PLATFORM_AMD__
#include "triton/external/hip.h"
// Exceptions
#include <iostream>
#include <stdexcept>
namespace llvm {
class PassRegistry;
class Module;
} // namespace llvm
namespace triton {
namespace driver {
class cu_context;
template <class T> void check(T) {}
void check(CUresult err);
void check(hipError_t err);
class dispatch {
protected:
template <class F> struct return_type;
template <class R, class... A> struct return_type<R (*)(A...)> {
typedef R type;
};
typedef bool (*f_init_t)();
template <f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type
f_impl(void *&lib_h, FunPtrT, void *&cache, const char *name, Args... args) {
initializer();
if (cache == nullptr) {
cache = dlsym(lib_h, name);
if (cache == 0) {
#ifdef __EXCEPTIONS
throw std::runtime_error("dlsym unable to load function");
#else
std::cerr << "Triton: dlsym unable to load function `" << name << "`"
<< std::endl;
std::abort();
#endif
}
}
FunPtrT fptr;
*reinterpret_cast<void **>(&fptr) = cache;
typename return_type<FunPtrT>::type res = (*fptr)(args...);
check(res);
return res;
}
public:
static void release();
// Nvidia
static bool nvmlinit();
static bool cuinit();
// AMD
static bool hipinit();
/* ------------------- *
* CUDA
* ------------------- */
// context management
static CUresult cuInit(unsigned int Flags);
static CUresult cuCtxDestroy_v2(CUcontext ctx);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags,
CUdevice dev);
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
static CUresult cuCtxGetDevice(CUdevice *result);
static CUresult cuCtxEnablePeerAccess(CUcontext peerContext,
unsigned int flags);
static CUresult cuDriverGetVersion(int *driverVersion);
// device management
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib,
CUdevice dev);
static CUresult cuDeviceGetCount(int *count);
// link management
static CUresult cuLinkAddData_v2(CUlinkState state, CUjitInputType type,
void *data, size_t size, const char *name,
unsigned int numOptions,
CUjit_option *options, void **optionValues);
static CUresult cuLinkCreate_v2(unsigned int numOptions,
CUjit_option *options, void **optionValues,
CUlinkState *stateOut);
static CUresult cuLinkComplete(CUlinkState state, void **cubinOut,
size_t *sizeOut);
static CUresult cuLinkDestroy(CUlinkState state);
// module management
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes,
CUmodule hmod, const char *name);
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
static CUresult cuModuleLoadData(CUmodule *module, const void *image);
static CUresult cuModuleUnload(CUmodule hmod);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image,
unsigned int numOptions,
CUjit_option *options,
void **optionValues);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod,
const char *name);
// stream management
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
static CUresult cuStreamSynchronize(CUstream hStream);
static CUresult cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
static CUresult cuStreamDestroy_v2(CUstream hStream);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY,
unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream,
void **kernelParams, void **extra);
// function management
static CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib,
CUfunction hfunc);
static CUresult cuFuncSetAttribute(CUfunction hfunc,
CUfunction_attribute attrib, int value);
static CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
// memory management
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
static CUresult cuPointerGetAttribute(void *data,
CUpointer_attribute attribute,
CUdeviceptr ptr);
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N,
CUstream stream);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice,
size_t ByteCount);
static CUresult cuMemFree_v2(CUdeviceptr dptr);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice,
size_t ByteCount, CUstream hStream);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice,
const void *srcHost, size_t ByteCount,
CUstream hStream);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost,
size_t ByteCount);
// event management
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart,
CUevent hEnd);
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
static CUresult cuEventDestroy_v2(CUevent hEvent);
/* ------------------- *
* NVML
* ------------------- */
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2(const char *pciBusId,
nvmlDevice_t *device);
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device,
nvmlClockType_t type,
unsigned int *clock);
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device,
nvmlClockType_t type,
unsigned int *clock);
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device,
unsigned int mem_clock,
unsigned int sm_clock);
/* ------------------- *
* HIP
* ------------------- */
// context management
static hipError_t hipInit(unsigned int Flags);
static hipError_t hipCtxDestroy(hipCtx_t ctx);
static hipError_t hipCtxCreate(hipCtx_t *pctx, unsigned int flags,
hipDevice_t dev);
static hipError_t hipCtxPushCurrent(hipCtx_t ctx);
static hipError_t hipCtxPopCurrent(hipCtx_t *pctx);
static hipError_t hipCtxGetDevice(hipDevice_t *result);
static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerContext,
unsigned int flags);
static hipError_t hipDriverGetVersion(int *driverVersion);
// device management
static hipError_t hipGetDevice(hipDevice_t *device, int ordinal);
static hipError_t hipDeviceGetName(char *name, int len, hipDevice_t dev);
static hipError_t hipDeviceGetPCIBusId(char *id, int len, hipDevice_t dev);
static hipError_t hipDeviceGetAttribute(int *pi, hipDeviceAttribute_t attrib,
hipDevice_t dev);
static hipError_t hipGetDeviceCount(int *count);
// module management
static hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipModule_t hmod, const char *name);
static hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
static hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
static hipError_t hipModuleUnload(hipModule_t hmod);
static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image,
unsigned int numOptions,
hipJitOption *options,
void **optionValues);
static hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
const char *name);
// stream management
static hipError_t hipStreamCreate(hipStream_t *phStream, unsigned int Flags);
static hipError_t hipStreamSynchronize(hipStream_t hStream);
static hipError_t hipStreamDestroy(hipStream_t hStream);
static hipError_t
hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY,
unsigned int blockDimZ, unsigned int sharedMemBytes,
hipStream_t hStream, void **kernelParams, void **extra);
// function management
static hipError_t hipFuncGetAttributes(hipFuncAttributes *attrib,
void *hfunc);
static hipError_t hipFuncSetAttribute(hipFunction_t hfunc,
hipFuncAttribute attrib, int value);
static hipError_t hipFuncSetCacheConfig(hipFunction_t hfunc,
hipFuncCache_t config);
// memory management
static hipError_t hipMalloc(hipDeviceptr_t *dptr, size_t bytesize);
static hipError_t hipPointerGetAttribute(void *data,
CUpointer_attribute attribute,
hipDeviceptr_t ptr);
static hipError_t hipMemsetD8Async(hipDeviceptr_t dst, unsigned char x,
size_t N, hipStream_t stream);
static hipError_t hipMemcpyDtoH(void *dstHost, hipDeviceptr_t srcDevice,
size_t ByteCount);
static hipError_t hipFree(hipDeviceptr_t dptr);
static hipError_t hipMemcpyDtoHAsync(void *dstHost, hipDeviceptr_t srcDevice,
size_t ByteCount, hipStream_t hStream);
static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice,
const void *srcHost, size_t ByteCount,
hipStream_t hStream);
static hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, const void *srcHost,
size_t ByteCount);
// event management
static hipError_t hipEventCreate(hipEvent_t *phEvent, unsigned int Flags);
static hipError_t hipEventElapsedTime(float *pMilliseconds, hipEvent_t hStart,
hipEvent_t hEnd);
static hipError_t hipEventRecord(hipEvent_t hEvent, hipStream_t hStream);
static hipError_t hipEventDestroy(hipEvent_t hEvent);
private:
// Libraries
static void *cuda_;
static void *nvml_;
static void *hip_;
/* ------------------- *
* CUDA
* ------------------- */
// context management
static void *cuCtxGetCurrent_;
static void *cuCtxSetCurrent_;
static void *cuCtxDestroy_v2_;
static void *cuCtxCreate_v2_;
static void *cuCtxGetDevice_;
static void *cuCtxPushCurrent_v2_;
static void *cuCtxPopCurrent_v2_;
static void *cuCtxEnablePeerAccess_;
static void *cuDriverGetVersion_;
static void *cuInit_;
// device management
static void *cuDeviceGet_;
static void *cuDeviceGetName_;
static void *cuDeviceGetPCIBusId_;
static void *cuDeviceGetAttribute_;
static void *cuDeviceGetCount_;
// link management
static void *cuLinkAddData_v2_;
static void *cuLinkCreate_v2_;
static void *cuLinkDestroy_;
static void *cuLinkComplete_;
// module management
static void *cuModuleGetGlobal_v2_;
static void *cuModuleLoad_;
static void *cuModuleUnload_;
static void *cuModuleLoadDataEx_;
static void *cuModuleLoadData_;
static void *cuModuleGetFunction_;
// stream management
static void *cuStreamCreate_;
static void *cuStreamSynchronize_;
static void *cuStreamDestroy_v2_;
static void *cuStreamGetCtx_;
static void *cuLaunchKernel_;
// function management
static void *cuFuncGetAttribute_;
static void *cuFuncSetAttribute_;
static void *cuFuncSetCacheConfig_;
// memory management
static void *cuMemcpyDtoH_v2_;
static void *cuMemFree_v2_;
static void *cuMemcpyDtoHAsync_v2_;
static void *cuMemcpyHtoDAsync_v2_;
static void *cuMemcpyHtoD_v2_;
static void *cuMemAlloc_v2_;
static void *cuMemsetD8Async_;
static void *cuPointerGetAttribute_;
// event management
static void *cuEventCreate_;
static void *cuEventElapsedTime_;
static void *cuEventRecord_;
static void *cuEventDestroy_v2_;
/* ------------------- *
* NVML
* ------------------- */
static void *nvmlInit_v2_;
static void *nvmlDeviceGetHandleByPciBusId_v2_;
static void *nvmlDeviceGetClockInfo_;
static void *nvmlDeviceGetMaxClockInfo_;
static void *nvmlDeviceSetApplicationsClocks_;
/* ------------------- *
* HIP
* ------------------- */
// context management
static void *hipInit_;
static void *hipCtxDestroy_;
static void *hipCtxCreate_;
static void *hipCtxPushCurrent_;
static void *hipCtxPopCurrent_;
static void *hipCtxGetDevice_;
static void *hipCtxEnablePeerAccess_;
static void *hipDriverGetVersion_;
// device management
static void *hipGetDevice_;
static void *hipDeviceGetName_;
static void *hipDeviceGetPCIBusId_;
static void *hipDeviceGetAttribute_;
static void *hipGetDeviceCount_;
// module management
static void *hipModuleGetGlobal_;
static void *hipModuleLoad_;
static void *hipModuleLoadData_;
static void *hipModuleUnload_;
static void *hipModuleLoadDataEx_;
static void *hipModuleGetFunction_;
// stream management
static void *hipStreamCreate_;
static void *hipStreamSynchronize_;
static void *hipStreamDestroy_;
static void *hipModuleLaunchKernel_;
;
// function management
static void *hipFuncGetAttributes_;
static void *hipFuncSetAttribute_;
static void *hipFuncSetCacheConfig_;
// memory management
static void *hipMalloc_;
static void *hipPointerGetAttribute_;
static void *hipMemsetD8Async_;
static void *hipMemcpyDtoH_;
static void *hipFree_;
static void *hipMemcpyDtoHAsync_;
static void *hipMemcpyHtoDAsync_;
static void *hipMemcpyHtoD_;
// event management
static void *hipEventCreate_;
static void *hipEventElapsedTime_;
static void *hipEventRecord_;
static void *hipEventDestroy_;
};
} // namespace driver
} // namespace triton
#endif

View File

@@ -1,254 +0,0 @@
#pragma once
#ifndef _TRITON_DRIVER_ERROR_H_
#define _TRITON_DRIVER_ERROR_H_
#include "triton/driver/dispatch.h"
#include <exception>
namespace triton {
namespace driver {
namespace exception {
namespace nvrtc {
#define TRITON_CREATE_NVRTC_EXCEPTION(name, msg) \
class name : public std::exception { \
public: \
const char *what() const throw() override { return "NVRTC: Error- " msg; } \
}
TRITON_CREATE_NVRTC_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_NVRTC_EXCEPTION(program_creation_failure,
"program creation failure");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_input, "invalid input");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_program, "invalid program");
TRITON_CREATE_NVRTC_EXCEPTION(invalid_option, "invalid option");
TRITON_CREATE_NVRTC_EXCEPTION(compilation, "compilation");
TRITON_CREATE_NVRTC_EXCEPTION(builtin_operation_failure,
"builtin operation failure");
TRITON_CREATE_NVRTC_EXCEPTION(unknown_error, "unknown error");
#undef TRITON_CREATE_NVRTC_EXCEPTION
} // namespace nvrtc
namespace cuda {
class base : public std::exception {};
#define TRITON_CREATE_CUDA_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { return "CUDA: Error- " msg; } \
}
TRITON_CREATE_CUDA_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUDA_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_CUDA_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUDA_EXCEPTION(deinitialized, "deinitialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_disabled, "profiler disabled");
TRITON_CREATE_CUDA_EXCEPTION(profiler_not_initialized,
"profiler not initialized");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_started,
"profiler already started");
TRITON_CREATE_CUDA_EXCEPTION(profiler_already_stopped,
"profiler already stopped");
TRITON_CREATE_CUDA_EXCEPTION(no_device, "no device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_device, "invalid device");
TRITON_CREATE_CUDA_EXCEPTION(invalid_image, "invalid image");
TRITON_CREATE_CUDA_EXCEPTION(invalid_context, "invalid context");
TRITON_CREATE_CUDA_EXCEPTION(context_already_current,
"context already current");
TRITON_CREATE_CUDA_EXCEPTION(map_failed, "map failed");
TRITON_CREATE_CUDA_EXCEPTION(unmap_failed, "unmap failed");
TRITON_CREATE_CUDA_EXCEPTION(array_is_mapped, "array is mapped");
TRITON_CREATE_CUDA_EXCEPTION(already_mapped, "already mapped");
TRITON_CREATE_CUDA_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
TRITON_CREATE_CUDA_EXCEPTION(already_acquired, "already acquired");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped, "not mapped");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_array, "not mapped as array");
TRITON_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
TRITON_CREATE_CUDA_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
TRITON_CREATE_CUDA_EXCEPTION(unsupported_limit, "unsupported limit");
TRITON_CREATE_CUDA_EXCEPTION(context_already_in_use, "context already in use");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_unsupported,
"peer access unsupported");
TRITON_CREATE_CUDA_EXCEPTION(invalid_ptx, "invalid ptx");
TRITON_CREATE_CUDA_EXCEPTION(invalid_graphics_context,
"invalid graphics context");
TRITON_CREATE_CUDA_EXCEPTION(invalid_source, "invalid source");
TRITON_CREATE_CUDA_EXCEPTION(file_not_found, "file not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found,
"shared object symbol not found");
TRITON_CREATE_CUDA_EXCEPTION(shared_object_init_failed,
"shared object init failed");
TRITON_CREATE_CUDA_EXCEPTION(operating_system, "operating system");
TRITON_CREATE_CUDA_EXCEPTION(invalid_handle, "invalid handle");
TRITON_CREATE_CUDA_EXCEPTION(not_found, "not found");
TRITON_CREATE_CUDA_EXCEPTION(not_ready, "not ready");
TRITON_CREATE_CUDA_EXCEPTION(illegal_address, "illegal address");
TRITON_CREATE_CUDA_EXCEPTION(launch_out_of_resources,
"launch out of resources");
TRITON_CREATE_CUDA_EXCEPTION(launch_timeout, "launch timeout");
TRITON_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing,
"launch incompatible texturing");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_already_enabled,
"peer access already enabled");
TRITON_CREATE_CUDA_EXCEPTION(peer_access_not_enabled,
"peer access not enabled");
TRITON_CREATE_CUDA_EXCEPTION(primary_context_active, "primary context active");
TRITON_CREATE_CUDA_EXCEPTION(context_is_destroyed, "context is destroyed");
TRITON_CREATE_CUDA_EXCEPTION(assert_error, "assert");
TRITON_CREATE_CUDA_EXCEPTION(too_many_peers, "too many peers");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_already_registered,
"host memory already registered");
TRITON_CREATE_CUDA_EXCEPTION(host_memory_not_registered,
"hot memory not registered");
TRITON_CREATE_CUDA_EXCEPTION(hardware_stack_error, "hardware stack error");
TRITON_CREATE_CUDA_EXCEPTION(illegal_instruction, "illegal instruction");
TRITON_CREATE_CUDA_EXCEPTION(misaligned_address, "misaligned address");
TRITON_CREATE_CUDA_EXCEPTION(invalid_address_space, "invalid address space");
TRITON_CREATE_CUDA_EXCEPTION(invalid_pc, "invalid pc");
TRITON_CREATE_CUDA_EXCEPTION(launch_failed, "launch failed");
TRITON_CREATE_CUDA_EXCEPTION(not_permitted, "not permitted");
TRITON_CREATE_CUDA_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUDA_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUDA_EXCEPTION
} // namespace cuda
namespace cublas {
class base : public std::exception {};
#define TRITON_CREATE_CUBLAS_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { \
return "CUBLAS: Error- " msg; \
} \
}
TRITON_CREATE_CUBLAS_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUBLAS_EXCEPTION(alloc_failed, "alloc failed");
TRITON_CREATE_CUBLAS_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUBLAS_EXCEPTION(arch_mismatch, "arch mismatch");
TRITON_CREATE_CUBLAS_EXCEPTION(mapping_error, "mapping error");
TRITON_CREATE_CUBLAS_EXCEPTION(execution_failed, "execution failed");
TRITON_CREATE_CUBLAS_EXCEPTION(internal_error, "internal error");
TRITON_CREATE_CUBLAS_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUBLAS_EXCEPTION(license_error, "license error");
TRITON_CREATE_CUBLAS_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUBLAS_EXCEPTION
} // namespace cublas
namespace cudnn {
#define TRITON_CREATE_CUDNN_EXCEPTION(name, msg) \
class name : public std::exception { \
public: \
const char *what() const throw() override { return "CUDNN: Error- " msg; } \
}
TRITON_CREATE_CUDNN_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_CUDNN_EXCEPTION(alloc_failed, "allocation failed");
TRITON_CREATE_CUDNN_EXCEPTION(bad_param, "bad param");
TRITON_CREATE_CUDNN_EXCEPTION(internal_error, "internal error");
TRITON_CREATE_CUDNN_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_CUDNN_EXCEPTION(arch_mismatch, "arch mismatch");
TRITON_CREATE_CUDNN_EXCEPTION(mapping_error, "mapping error");
TRITON_CREATE_CUDNN_EXCEPTION(execution_failed, "execution failed");
TRITON_CREATE_CUDNN_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_CUDNN_EXCEPTION(license_error, "license error");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing,
"prerequisite missing");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_in_progress, "runtime in progress");
TRITON_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow, "runtime fp overflow");
} // namespace cudnn
namespace hip {
class base : public std::exception {};
#define TRITON_CREATE_HIP_EXCEPTION(name, msg) \
class name : public base { \
public: \
const char *what() const throw() override { return "HIP: Error- " msg; } \
}
TRITON_CREATE_HIP_EXCEPTION(invalid_value, "invalid value");
TRITON_CREATE_HIP_EXCEPTION(out_of_memory, "out of memory");
TRITON_CREATE_HIP_EXCEPTION(not_initialized, "not initialized");
TRITON_CREATE_HIP_EXCEPTION(deinitialized, "deinitialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_disabled, "profiler disabled");
TRITON_CREATE_HIP_EXCEPTION(profiler_not_initialized,
"profiler not initialized");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_started,
"profiler already started");
TRITON_CREATE_HIP_EXCEPTION(profiler_already_stopped,
"profiler already stopped");
TRITON_CREATE_HIP_EXCEPTION(no_device, "no device");
TRITON_CREATE_HIP_EXCEPTION(invalid_device, "invalid device");
TRITON_CREATE_HIP_EXCEPTION(invalid_image, "invalid image");
TRITON_CREATE_HIP_EXCEPTION(invalid_context, "invalid context");
TRITON_CREATE_HIP_EXCEPTION(context_already_current, "context already current");
TRITON_CREATE_HIP_EXCEPTION(map_failed, "map failed");
TRITON_CREATE_HIP_EXCEPTION(unmap_failed, "unmap failed");
TRITON_CREATE_HIP_EXCEPTION(array_is_mapped, "array is mapped");
TRITON_CREATE_HIP_EXCEPTION(already_mapped, "already mapped");
TRITON_CREATE_HIP_EXCEPTION(no_binary_for_gpu, "no binary for gpu");
TRITON_CREATE_HIP_EXCEPTION(already_acquired, "already acquired");
TRITON_CREATE_HIP_EXCEPTION(not_mapped, "not mapped");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_array, "not mapped as array");
TRITON_CREATE_HIP_EXCEPTION(not_mapped_as_pointer, "not mapped as pointer");
TRITON_CREATE_HIP_EXCEPTION(ecc_uncorrectable, "ecc uncorrectable");
TRITON_CREATE_HIP_EXCEPTION(unsupported_limit, "unsupported limit");
TRITON_CREATE_HIP_EXCEPTION(context_already_in_use, "context already in use");
TRITON_CREATE_HIP_EXCEPTION(peer_access_unsupported, "peer access unsupported");
TRITON_CREATE_HIP_EXCEPTION(invalid_ptx, "invalid ptx");
TRITON_CREATE_HIP_EXCEPTION(invalid_graphics_context,
"invalid graphics context");
TRITON_CREATE_HIP_EXCEPTION(invalid_source, "invalid source");
TRITON_CREATE_HIP_EXCEPTION(file_not_found, "file not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_symbol_not_found,
"shared object symbol not found");
TRITON_CREATE_HIP_EXCEPTION(shared_object_init_failed,
"shared object init failed");
TRITON_CREATE_HIP_EXCEPTION(operating_system, "operating system");
TRITON_CREATE_HIP_EXCEPTION(invalid_handle, "invalid handle");
TRITON_CREATE_HIP_EXCEPTION(not_found, "not found");
TRITON_CREATE_HIP_EXCEPTION(not_ready, "not ready");
TRITON_CREATE_HIP_EXCEPTION(illegal_address, "illegal address");
TRITON_CREATE_HIP_EXCEPTION(launch_out_of_resources, "launch out of resources");
TRITON_CREATE_HIP_EXCEPTION(launch_timeout, "launch timeout");
TRITON_CREATE_HIP_EXCEPTION(launch_incompatible_texturing,
"launch incompatible texturing");
TRITON_CREATE_HIP_EXCEPTION(peer_access_already_enabled,
"peer access already enabled");
TRITON_CREATE_HIP_EXCEPTION(peer_access_not_enabled, "peer access not enabled");
TRITON_CREATE_HIP_EXCEPTION(primary_context_active, "primary context active");
TRITON_CREATE_HIP_EXCEPTION(context_is_destroyed, "context is destroyed");
TRITON_CREATE_HIP_EXCEPTION(assert_error, "assert");
TRITON_CREATE_HIP_EXCEPTION(too_many_peers, "too many peers");
TRITON_CREATE_HIP_EXCEPTION(host_memory_already_registered,
"host memory already registered");
TRITON_CREATE_HIP_EXCEPTION(host_memory_not_registered,
"hot memory not registered");
TRITON_CREATE_HIP_EXCEPTION(hardware_stack_error, "hardware stack error");
TRITON_CREATE_HIP_EXCEPTION(illegal_instruction, "illegal instruction");
TRITON_CREATE_HIP_EXCEPTION(misaligned_address, "misaligned address");
TRITON_CREATE_HIP_EXCEPTION(invalid_address_space, "invalid address space");
TRITON_CREATE_HIP_EXCEPTION(invalid_pc, "invalid pc");
TRITON_CREATE_HIP_EXCEPTION(launch_failed, "launch failed");
TRITON_CREATE_HIP_EXCEPTION(not_permitted, "not permitted");
TRITON_CREATE_HIP_EXCEPTION(not_supported, "not supported");
TRITON_CREATE_HIP_EXCEPTION(invalid_symbol, "invalid symbol");
TRITON_CREATE_HIP_EXCEPTION(unknown, "unknown");
#undef TRITON_CREATE_CUDA_EXCEPTION
} // namespace hip
} // namespace exception
} // namespace driver
} // namespace triton
#endif

View File

@@ -1,22 +0,0 @@
#include "triton/external/CUDA/cuda.h"
#include "triton/external/hip.h"
#include <string>
namespace llvm {
class Module;
}
namespace triton {
namespace driver {
void init_llvm();
std::string path_to_ptxas(int &version);
std::string llir_to_ptx(llvm::Module *module, int cc, int version);
std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas_path,
int cc);
CUmodule ptx_to_cumodule(const std::string &ptx, int cc);
std::string llir_to_amdgpu(llvm::Module *module, const std::string &proc);
hipModule_t amdgpu_to_hipmodule(const std::string &path);
} // namespace driver
} // namespace triton

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,293 +0,0 @@
#ifndef __external_hip_h__
#define __external_hip_h__
/*
* @brief hipError_t
* @enum
* @ingroup Enumerations
*/
// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
// Ignoring error-code return values from hip APIs is discouraged. On C++17,
// we can make that yield a warning
/*
* @brief hipError_t
* @enum
* @ingroup Enumerations
*/
// Developer note - when updating these, update the hipErrorName and hipErrorString functions in
// NVCC and HCC paths Also update the hipCUDAErrorTohipError function in NVCC path.
#include <cstddef>
typedef enum hipError_t {
hipSuccess = 0, ///< Successful completion.
hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL
///< or not in an acceptable range.
hipErrorOutOfMemory = 2,
// Deprecated
hipErrorMemoryAllocation = 2, ///< Memory allocation error.
hipErrorNotInitialized = 3,
// Deprecated
hipErrorInitializationError = 3,
hipErrorDeinitialized = 4,
hipErrorProfilerDisabled = 5,
hipErrorProfilerNotInitialized = 6,
hipErrorProfilerAlreadyStarted = 7,
hipErrorProfilerAlreadyStopped = 8,
hipErrorInvalidConfiguration = 9,
hipErrorInvalidPitchValue = 12,
hipErrorInvalidSymbol = 13,
hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer
hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction
hipErrorInsufficientDriver = 35,
hipErrorMissingConfiguration = 52,
hipErrorPriorLaunchFailure = 53,
hipErrorInvalidDeviceFunction = 98,
hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices
hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices.
hipErrorInvalidImage = 200,
hipErrorInvalidContext = 201, ///< Produced when input context is invalid.
hipErrorContextAlreadyCurrent = 202,
hipErrorMapFailed = 205,
// Deprecated
hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr.
hipErrorUnmapFailed = 206,
hipErrorArrayIsMapped = 207,
hipErrorAlreadyMapped = 208,
hipErrorNoBinaryForGpu = 209,
hipErrorAlreadyAcquired = 210,
hipErrorNotMapped = 211,
hipErrorNotMappedAsArray = 212,
hipErrorNotMappedAsPointer = 213,
hipErrorECCNotCorrectable = 214,
hipErrorUnsupportedLimit = 215,
hipErrorContextAlreadyInUse = 216,
hipErrorPeerAccessUnsupported = 217,
hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
hipErrorInvalidGraphicsContext = 219,
hipErrorInvalidSource = 300,
hipErrorFileNotFound = 301,
hipErrorSharedObjectSymbolNotFound = 302,
hipErrorSharedObjectInitFailed = 303,
hipErrorOperatingSystem = 304,
hipErrorInvalidHandle = 400,
// Deprecated
hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid.
hipErrorNotFound = 500,
hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not
///< ready. This is not actually an error, but is used to distinguish
///< from hipSuccess (which indicates completion). APIs that return
///< this error include hipEventQuery and hipStreamQuery.
hipErrorIllegalAddress = 700,
hipErrorLaunchOutOfResources = 701, ///< Out of resources error.
hipErrorLaunchTimeOut = 702,
hipErrorPeerAccessAlreadyEnabled =
704, ///< Peer access was already enabled from the current device.
hipErrorPeerAccessNotEnabled =
705, ///< Peer access was never enabled from the current device.
hipErrorSetOnActiveProcess = 708,
hipErrorAssert = 710, ///< Produced when the kernel calls assert.
hipErrorHostMemoryAlreadyRegistered =
712, ///< Produced when trying to lock a page-locked memory.
hipErrorHostMemoryNotRegistered =
713, ///< Produced when trying to unlock a non-page-locked memory.
hipErrorLaunchFailure =
719, ///< An exception occurred on the device while executing a kernel.
hipErrorCooperativeLaunchTooLarge =
720, ///< This error indicates that the number of blocks launched per grid for a kernel
///< that was launched via cooperative launch APIs exceeds the maximum number of
///< allowed blocks for the current device
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
hipErrorUnknown = 999, //< Unknown error.
// HSA Runtime Error Codes start here.
hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen
///< in production systems.
hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically
///< not seen in production systems.
hipErrorTbd ///< Marker that more error codes are needed.
} hipError_t;
typedef struct ihipCtx_t* hipCtx_t;
// Note many APIs also use integer deviceIds as an alternative to the device pointer:
typedef int hipDevice_t;
typedef enum hipDeviceP2PAttr {
hipDevP2PAttrPerformanceRank = 0,
hipDevP2PAttrAccessSupported,
hipDevP2PAttrNativeAtomicSupported,
hipDevP2PAttrHipArrayAccessSupported
} hipDeviceP2PAttr;
typedef struct ihipStream_t* hipStream_t;
#define hipIpcMemLazyEnablePeerAccess 0
#define HIP_IPC_HANDLE_SIZE 64
typedef struct hipIpcMemHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcMemHandle_t;
typedef struct hipIpcEventHandle_st {
char reserved[HIP_IPC_HANDLE_SIZE];
} hipIpcEventHandle_t;
typedef struct ihipModule_t* hipModule_t;
typedef struct ihipModuleSymbol_t* hipFunction_t;
typedef struct hipFuncAttributes {
int binaryVersion;
int cacheModeCA;
size_t constSizeBytes;
size_t localSizeBytes;
int maxDynamicSharedSizeBytes;
int maxThreadsPerBlock;
int numRegs;
int preferredShmemCarveout;
int ptxVersion;
size_t sharedSizeBytes;
} hipFuncAttributes;
typedef struct ihipEvent_t* hipEvent_t;
/*
* @brief hipDeviceAttribute_t
* @enum
* @ingroup Enumerations
*/
typedef enum hipDeviceAttribute_t {
hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block.
hipDeviceAttributeMaxBlockDimX, ///< Maximum x-dimension of a block.
hipDeviceAttributeMaxBlockDimY, ///< Maximum y-dimension of a block.
hipDeviceAttributeMaxBlockDimZ, ///< Maximum z-dimension of a block.
hipDeviceAttributeMaxGridDimX, ///< Maximum x-dimension of a grid.
hipDeviceAttributeMaxGridDimY, ///< Maximum y-dimension of a grid.
hipDeviceAttributeMaxGridDimZ, ///< Maximum z-dimension of a grid.
hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in
///< bytes.
hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes.
hipDeviceAttributeWarpSize, ///< Warp size in threads.
hipDeviceAttributeMaxRegistersPerBlock, ///< Maximum number of 32-bit registers available to a
///< thread block. This number is shared by all thread
///< blocks simultaneously resident on a
///< multiprocessor.
hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz.
hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz.
hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits.
hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device.
hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in.
hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2
///< cache.
hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per
///< multiprocessor.
hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number.
hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number.
hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels
///< concurrently.
hipDeviceAttributePciBusId, ///< PCI Bus ID.
hipDeviceAttributePciDeviceId, ///< PCI Device ID.
hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory Per
///< Multiprocessor.
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
hipDeviceAttributeIntegrated, ///< iGPU
hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch
hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices
hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images
hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements
hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements
hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D images in image elements
hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimensions height of 3D images in image elements
hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimensions depth of 3D images in image elements
hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeTextureAlignment, ///<Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
hipDeviceAttributeEccEnabled, ///<Device has ECC support enabled
hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple
///devices with unmatched functions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple
///devices with unmatched grid dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple
///devices with unmatched block dimensions
hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
///devices with unmatched shared memories
hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device
hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system
hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on
/// the device without migration
hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory
/// concurrently with the CPU
hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory
/// without calling hipHostRegister on it
hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via
/// the host's page tables
hipDeviceAttributeCanUseStreamWaitValue ///< '1' if Device supports hipStreamWaitValue32() and
///< hipStreamWaitValue64() , '0' otherwise.
} hipDeviceAttribute_t;
typedef void* hipDeviceptr_t;
/*
* @brief hipJitOption
* @enum
* @ingroup Enumerations
*/
typedef enum hipJitOption {
hipJitOptionMaxRegisters = 0,
hipJitOptionThreadsPerBlock,
hipJitOptionWallTime,
hipJitOptionInfoLogBuffer,
hipJitOptionInfoLogBufferSizeBytes,
hipJitOptionErrorLogBuffer,
hipJitOptionErrorLogBufferSizeBytes,
hipJitOptionOptimizationLevel,
hipJitOptionTargetFromContext,
hipJitOptionTarget,
hipJitOptionFallbackStrategy,
hipJitOptionGenerateDebugInfo,
hipJitOptionLogVerbose,
hipJitOptionGenerateLineInfo,
hipJitOptionCacheMode,
hipJitOptionSm3xOpt,
hipJitOptionFastCompile,
hipJitOptionNumOptions
} hipJitOption;
/**
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipFuncAttribute {
hipFuncAttributeMaxDynamicSharedMemorySize = 8,
hipFuncAttributePreferredSharedMemoryCarveout = 9,
hipFuncAttributeMax
} hipFuncAttribute;
/**
* @warning On AMD devices and some Nvidia devices, these hints and controls are ignored.
*/
typedef enum hipFuncCache_t {
hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default)
hipFuncCachePreferShared, ///< prefer larger shared memory and smaller L1 cache
hipFuncCachePreferL1, ///< prefer larger L1 cache and smaller shared memory
hipFuncCachePreferEqual, ///< prefer equal size L1 cache and shared memory
} hipFuncCache_t;
#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
#define HIP_LAUNCH_PARAM_END ((void*)0x03)
#endif

View File

@@ -1,57 +0,0 @@
#pragma once
#ifndef _TRITON_TOOLS_BENCH_H_
#define _TRITON_TOOLS_BENCH_H_
#include "triton/driver/device.h"
#include "triton/driver/stream.h"
#include <algorithm>
#include <chrono>
#include <functional>
namespace triton {
namespace tools {
class timer {
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::nanoseconds nanoseconds;
public:
explicit timer(bool run = false) {
if (run)
start();
}
void start() { _start = high_resolution_clock::now(); }
nanoseconds get() const {
return std::chrono::duration_cast<nanoseconds>(
high_resolution_clock::now() - _start);
}
private:
high_resolution_clock::time_point _start;
};
inline double bench(std::function<void()> const &op, driver::stream *stream,
size_t warmup = 10, size_t repeat = 200) {
timer tmr;
std::vector<size_t> times;
double total_time = 0;
for (size_t i = 0; i < warmup; i++)
op();
stream->synchronize();
tmr.start();
for (size_t i = 0; i < repeat; i++) {
op();
}
stream->synchronize();
return (float)tmr.get().count() / repeat;
// return *std::min_element(times.begin(), times.end());
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -1,68 +0,0 @@
#pragma once
#ifndef _TRITON_TOOLS_THREAD_GRAPH_H_
#define _TRITON_TOOLS_THREAD_GRAPH_H_
#include <iostream>
#include <map>
#include <set>
#include <vector>
namespace triton {
namespace tools {
template <class node_t> class graph {
typedef std::map<node_t, std::set<node_t>> edges_t;
public:
typedef std::map<size_t, std::vector<node_t>> cmap_t;
typedef std::map<node_t, size_t> nmap_t;
private:
void connected_components_impl(node_t x, std::set<node_t> &nodes,
nmap_t *nmap, cmap_t *cmap, int id) const {
if (nmap)
(*nmap)[x] = id;
if (cmap)
(*cmap)[id].push_back(x);
if (nodes.find(x) != nodes.end()) {
nodes.erase(x);
for (const node_t &y : edges_.at(x))
connected_components_impl(y, nodes, nmap, cmap, id);
}
}
public:
void connected_components(cmap_t *cmap, nmap_t *nmap) const {
if (cmap)
cmap->clear();
if (nmap)
nmap->clear();
std::set<node_t> nodes = nodes_;
unsigned id = 0;
while (!nodes.empty()) {
connected_components_impl(*nodes.begin(), nodes, nmap, cmap, id++);
}
}
void add_edge(node_t x, node_t y) {
nodes_.insert(x);
nodes_.insert(y);
edges_[x].insert(y);
edges_[y].insert(x);
}
void clear() {
nodes_.clear();
edges_.clear();
}
private:
std::set<node_t> nodes_;
edges_t edges_;
};
} // namespace tools
} // namespace triton
#endif

View File

@@ -1,172 +0,0 @@
/*
Copyright (c) 2011, Micael Hildenborg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Micael Hildenborg nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY Micael Hildenborg ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL Micael Hildenborg BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Contributors:
Gustav
Several members in the gamedev.se forum.
Gregory Petrosyan
*/
#ifndef _TRITON_TOOLS_SHA1_HPP_
#define _TRITON_TOOLS_SHA1_HPP_
namespace sha1 {
namespace // local
{
// Rotate an integer value to left.
inline unsigned int rol(const unsigned int value, const unsigned int steps) {
return ((value << steps) | (value >> (32 - steps)));
}
// Sets the first 16 integers in the buffert to zero.
// Used for clearing the W buffert.
inline void clearWBuffert(unsigned int *buffert) {
for (int pos = 16; --pos >= 0;) {
buffert[pos] = 0;
}
}
inline void innerHash(unsigned int *result, unsigned int *w) {
unsigned int a = result[0];
unsigned int b = result[1];
unsigned int c = result[2];
unsigned int d = result[3];
unsigned int e = result[4];
int round = 0;
#define sha1macro(func, val) \
{ \
const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \
e = d; \
d = c; \
c = rol(b, 30); \
b = a; \
a = t; \
}
while (round < 16) {
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
}
while (round < 20) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (~b & d), 0x5a827999)++ round;
}
while (round < 40) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0x6ed9eba1)++ round;
}
while (round < 60) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc)++ round;
}
while (round < 80) {
w[round] =
rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1);
sha1macro(b ^ c ^ d, 0xca62c1d6)++ round;
}
#undef sha1macro
result[0] += a;
result[1] += b;
result[2] += c;
result[3] += d;
result[4] += e;
}
} // namespace
inline void calc(const void *src, const int bytelength, unsigned char *hash) {
// Init the result array.
unsigned int result[5] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476,
0xc3d2e1f0};
// Cast the void src pointer to be the byte array we can work with.
const unsigned char *sarray = (const unsigned char *)src;
// The reusable round buffer
unsigned int w[80];
// Loop through all complete 64byte blocks.
const int endOfFullBlocks = bytelength - 64;
int endCurrentBlock;
int currentBlock = 0;
while (currentBlock <= endOfFullBlocks) {
endCurrentBlock = currentBlock + 64;
// Init the round buffer with the 64 byte block data.
for (int roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4) {
// This line will swap endian on big endian and keep endian on little
// endian.
w[roundPos++] = (unsigned int)sarray[currentBlock + 3] |
(((unsigned int)sarray[currentBlock + 2]) << 8) |
(((unsigned int)sarray[currentBlock + 1]) << 16) |
(((unsigned int)sarray[currentBlock]) << 24);
}
innerHash(result, w);
}
// Handle the last and not full 64 byte block if existing.
endCurrentBlock = bytelength - currentBlock;
clearWBuffert(w);
int lastBlockBytes = 0;
for (; lastBlockBytes < endCurrentBlock; ++lastBlockBytes) {
w[lastBlockBytes >> 2] |=
(unsigned int)sarray[lastBlockBytes + currentBlock]
<< ((3 - (lastBlockBytes & 3)) << 3);
}
w[lastBlockBytes >> 2] |= 0x80 << ((3 - (lastBlockBytes & 3)) << 3);
if (endCurrentBlock >= 56) {
innerHash(result, w);
clearWBuffert(w);
}
w[15] = bytelength << 3;
innerHash(result, w);
// Store hash in result pointer, and make sure we get in in the correct order
// on both endian models.
for (int hashByte = 20; --hashByte >= 0;) {
hash[hashByte] =
(result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff;
}
}
inline void toHexString(const unsigned char *hash, char *hexstring) {
const char hexDigits[] = {"0123456789abcdef"};
for (int hashByte = 20; --hashByte >= 0;) {
hexstring[hashByte << 1] = hexDigits[(hash[hashByte] >> 4) & 0xf];
hexstring[(hashByte << 1) + 1] = hexDigits[hash[hashByte] & 0xf];
}
hexstring[40] = 0;
}
} // namespace sha1
#endif

View File

@@ -1,42 +0,0 @@
#ifndef TRITON_TOOLS_SYS_EXEC_HPP
#define TRITON_TOOLS_SYS_EXEC_HPP
#include <cstdio>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
namespace triton {
namespace tools {
#ifdef _WIN32
#define popen _popen
#define pclose _pclose
#endif
#ifndef WEXITSTATUS
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val)&255)
#endif
int exec(const std::string &cmd, std::string &result) {
char buffer[128];
FILE *pipe = popen(cmd.c_str(), "r");
if (!pipe)
return 0;
result.clear();
try {
while (fgets(buffer, sizeof buffer, pipe) != NULL)
result += buffer;
} catch (...) {
pclose(pipe);
return 0;
}
int status = pclose(pipe);
return WEXITSTATUS(status);
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -1,70 +0,0 @@
/*
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
*
* This file is part of ISAAC.
*
* ISAAC is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#ifndef TDL_TOOLS_SYS_MKDIR_HPP
#define TDL_TOOLS_SYS_MKDIR_HPP
#include <cstdlib>
#include <cstring>
#include <errno.h>
#include <string>
#include <sys/stat.h>
#if defined(_WIN32)
#include <direct.h>
#endif
namespace triton {
namespace tools {
inline int mkdir(std::string const &path) {
#if defined(_WIN32)
return _mkdir(path.c_str());
#else
return ::mkdir(path.c_str(), 0777);
#endif
}
inline int mkpath(std::string const &path) {
int status = 0;
size_t pp = 0;
size_t sp;
while ((sp = path.find('/', pp)) != std::string::npos) {
if (sp != pp) {
status = mkdir(path.substr(0, sp));
}
pp = sp + 1;
}
return (status == 0 || errno == EEXIST) ? 0 : -1;
}
inline int mtime(std::string const &path) {
struct stat st;
if (stat(path.c_str(), &st) != 0)
return 0;
return st.st_mtime;
}
} // namespace tools
} // namespace triton
#endif

View File

@@ -1,81 +0,0 @@
#pragma once
#ifndef _TRITON_TOOLS_THREAD_POOL_H_
#define _TRITON_TOOLS_THREAD_POOL_H_
#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <thread>
#include <vector>
class ThreadPool {
public:
ThreadPool(size_t threads) : stop(false) {
for (size_t i = 0; i < threads; ++i)
workers.emplace_back([this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(
lock, [this] { return this->stop || !this->tasks.empty(); });
if (this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
});
}
template <class F, class... Args>
auto enqueue(F &&f, Args &&...args)
-> std::future<typename std::result_of<F(Args...)>::type> {
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared<std::packaged_task<return_type()>>(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
if (stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread &worker : workers)
worker.join();
}
private:
// need to keep track of threads so we can join them
std::vector<std::thread> workers;
// the task queue
std::queue<std::function<void()>> tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
#endif