Files
triton/include/isaac/driver/dispatch.h
2017-01-17 20:33:46 -05:00

258 lines
12 KiB
C++

/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_DISPATCHER_H
#define ISAAC_DRIVER_DISPATCHER_H
#include <type_traits>
#include <dlfcn.h>
//OpenCL Backend
#include "isaac/driver/external/CL/cl.h"
#include "isaac/driver/external/CL/cl_ext.h"
//CUDA Backend
#include "isaac/driver/external/CUDA/cuda.h"
#include "isaac/driver/external/CUDA/nvrtc.h"
#include "isaac/driver/external/CUDA/cublas.h"
//Exceptions
#include "isaac/driver/common.h"
#include <iostream>
namespace isaac
{
namespace driver
{
class Context;
template<class T> void check(T){}
void check(nvrtcResult err);
void check(CUresult err);
void check(cublasStatus_t err);
void check(cl_int err);
void check_destruction(CUresult);
class dispatch
{
private:
template <class F>
struct return_type;
template <class R, class... A>
struct return_type<R (*)(A...)>
{ typedef R type; };
typedef bool (*f_init_t)();
template<f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
{
initializer();
if(cache == nullptr)
cache = dlsym(lib_h, name);
FunPtrT fptr;
*reinterpret_cast<void **>(&fptr) = cache;
typename return_type<FunPtrT>::type res = (*fptr)(args...);
check(res);
return res;
}
public:
static bool clinit();
static bool cublasinit();
static bool nvrtcinit();
static bool cuinit();
static void release();
//OpenCL
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
static cl_int clReleaseMemObject(cl_mem);
static cl_int clFinish(cl_command_queue);
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
static cl_int clReleaseContext(cl_context);
static cl_int clReleaseEvent(cl_event);
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
static cl_int clReleaseDevice(cl_device_id);
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
static cl_int clReleaseCommandQueue(cl_command_queue);
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
static cl_int clRetainEvent(cl_event);
static cl_int clReleaseProgram(cl_program);
static cl_int clFlush(cl_command_queue);
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
static cl_int clReleaseKernel(cl_kernel);
//CUDA
static CUresult cuCtxDestroy_v2(CUcontext ctx);
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
static CUresult cuMemFree_v2(CUdeviceptr dptr);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
static CUresult cuDriverGetVersion(int *driverVersion);
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
static CUresult cuModuleUnload(CUmodule hmod);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
static CUresult cuDeviceGetCount(int *count);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
static CUresult cuInit(unsigned int Flags);
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
static CUresult cuStreamSynchronize(CUstream hStream);
static CUresult cuStreamDestroy_v2(CUstream hStream);
static CUresult cuEventDestroy_v2(CUevent hEvent);
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
static CUresult cuCtxGetDevice(CUdevice* result);
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
static cublasHandle_t cublasHandle(Context const & ctx);
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
static cublasStatus_t cublasSgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
static cublasStatus_t cublasDgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
private:
static void* opencl_;
static void* cuda_;
static void* nvrtc_;
static void* cublas_;
//OpenCL
static void* clBuildProgram_;
static void* clEnqueueNDRangeKernel_;
static void* clSetKernelArg_;
static void* clReleaseMemObject_;
static void* clFinish_;
static void* clGetMemObjectInfo_;
static void* clGetCommandQueueInfo_;
static void* clReleaseContext_;
static void* clReleaseEvent_;
static void* clEnqueueWriteBuffer_;
static void* clEnqueueReadBuffer_;
static void* clGetProgramBuildInfo_;
static void* clReleaseDevice_;
static void* clCreateContext_;
static void* clGetDeviceIDs_;
static void* clGetContextInfo_;
static void* clGetDeviceInfo_;
static void* clReleaseCommandQueue_;
static void* clGetPlatformIDs_;
static void* clGetPlatformInfo_;
static void* clGetEventProfilingInfo_;
static void* clCreateProgramWithBinary_;
static void* clCreateCommandQueue_;
static void* clRetainEvent_;
static void* clReleaseProgram_;
static void* clFlush_;
static void* clGetProgramInfo_;
static void* clGetKernelInfo_;
static void* clGetKernelWorkGroupInfo_;
static void* clCreateKernel_;
static void* clCreateBuffer_;
static void* clCreateProgramWithSource_;
static void* clReleaseKernel_;
//CUDA
static void* cuCtxDestroy_v2_;
static void* cuEventCreate_;
static void* cuDeviceGet_;
static void* cuMemcpyDtoH_v2_;
static void* cuStreamCreate_;
static void* cuEventElapsedTime_;
static void* cuMemFree_v2_;
static void* cuMemcpyDtoHAsync_v2_;
static void* cuDriverGetVersion_;
static void* cuDeviceGetName_;
static void* cuMemcpyHtoDAsync_v2_;
static void* cuModuleLoad_;
static void* cuLaunchKernel_;
static void* cuModuleUnload_;
static void* cuModuleLoadDataEx_;
static void* cuDeviceGetAttribute_;
static void* cuDeviceGetCount_;
static void* cuMemcpyHtoD_v2_;
static void* cuInit_;
static void* cuEventRecord_;
static void* cuCtxCreate_v2_;
static void* cuModuleGetFunction_;
static void* cuStreamSynchronize_;
static void* cuStreamDestroy_v2_;
static void* cuEventDestroy_v2_;
static void* cuMemAlloc_v2_;
static void* cuPointerGetAttribute_;
static void* cuCtxGetDevice_;
static void* nvrtcCompileProgram_;
static void* nvrtcGetProgramLogSize_;
static void* nvrtcGetPTX_;
static void* nvrtcGetPTXSize_;
static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_;
static void* cublasCreate_v2_;
static void* cublasGetStream_v2_;
static void* cublasSetStream_v2_;
static void* cublasSgemm_v2_;
static void* cublasDgemm_v2_;
};
}
}
#endif