[driver] adding opencl in the driver API

This commit is contained in:
Philippe Tillet
2019-03-18 23:12:14 -07:00
parent b73c3bdd25
commit 02775a226e
41 changed files with 28700 additions and 398 deletions

View File

@@ -2,5 +2,5 @@ foreach(PROG matrix)
add_executable(${PROG} ${PROG}.cpp) add_executable(${PROG} ${PROG}.cpp)
set_target_properties(${PROG} PROPERTIES OUTPUT_NAME ${PROG}) set_target_properties(${PROG} PROPERTIES OUTPUT_NAME ${PROG})
include_directories(/usr/local/cuda/include/) include_directories(/usr/local/cuda/include/)
target_link_libraries(${PROG} triton cuda) target_link_libraries(${PROG} triton)
endforeach(PROG) endforeach(PROG)

View File

@@ -87,7 +87,7 @@ T min(std::vector<T> x)
template<class OP, class SYNC> template<class OP, class SYNC>
double bench(OP const & op, SYNC const & sync, triton::driver::device const & device) double bench(OP const & op, SYNC const & sync, triton::driver::cu_device const & device)
{ {
timer tmr; timer tmr;
std::vector<size_t> times; std::vector<size_t> times;
@@ -108,6 +108,7 @@ double bench(OP const & op, SYNC const & sync, triton::driver::device const & de
int main() { int main() {
// initialize default compute device // initialize default compute device
auto context = triton::driver::backend::contexts::get_default(); auto context = triton::driver::backend::contexts::get_default();
exit(EXIT_SUCCESS);
triton::jit jit(context); triton::jit jit(context);
// matrix multiplication parameters // matrix multiplication parameters
@@ -123,10 +124,10 @@ int main() {
hb[i] = 1; hb[i] = 1;
for(size_t i = 0; i < hc.size(); i++) for(size_t i = 0; i < hc.size(); i++)
hc[i] = 0; hc[i] = 0;
triton::driver::buffer dc(context, hc.size()*4); triton::driver::cu_buffer dc(context, hc.size()*4);
triton::driver::buffer da(context, ha.size()*4); triton::driver::cu_buffer da(context, ha.size()*4);
triton::driver::buffer db(context, hb.size()*4); triton::driver::cu_buffer db(context, hb.size()*4);
triton::driver::stream stream(context); triton::driver::cu_stream stream(context);
stream.write(da, true, 0, ha); stream.write(da, true, 0, ha);
stream.write(db, true, 0, hb); stream.write(db, true, 0, hb);
stream.write(dc, true, 0, hc); stream.write(dc, true, 0, hc);
@@ -134,7 +135,7 @@ int main() {
// benchmark a given matrix multiplication kernel // benchmark a given matrix multiplication kernel
auto benchmark = [&](triton::driver::kernel kernel, auto benchmark = [&](triton::driver::cu_kernel kernel,
triton::jit::launch_information info) { triton::jit::launch_information info) {
// launch info // launch info
unsigned TM = info.global_range_size[0]; unsigned TM = info.global_range_size[0];
@@ -165,7 +166,7 @@ int main() {
// benchmark // benchmark
double ts = bench([&](){stream.enqueue(kernel, grid, {nthreads, 1, 1});}, double ts = bench([&](){stream.enqueue(kernel, grid, {nthreads, 1, 1});},
[&](){ stream.synchronize(); }, [&](){ stream.synchronize(); },
context.device()); context->device());
ts = ts * 1e-9; ts = ts * 1e-9;
double tflops = 2*M*N*K / ts * 1e-12; double tflops = 2*M*N*K / ts * 1e-12;
return tflops; return tflops;
@@ -183,7 +184,7 @@ int main() {
// jit.autotune(src, benchmark); // jit.autotune(src, benchmark);
jit.add_module(src, params); jit.add_module(src, params);
triton::driver::kernel kernel = jit.get_function("matmul"); triton::driver::cu_kernel kernel = jit.get_function("matmul");
triton::jit::launch_information info = jit.get_launch_info("matmul"); triton::jit::launch_information info = jit.get_launch_info("matmul");
std::cout << benchmark(kernel, info) << std::endl; std::cout << benchmark(kernel, info) << std::endl;
stream.read(dc, true, 0, hc); stream.read(dc, true, 0, hc);

View File

@@ -26,6 +26,7 @@
#include <map> #include <map>
#include <list> #include <list>
#include <vector> #include <vector>
#include "triton/driver/context.h"
namespace triton namespace triton
@@ -44,68 +45,104 @@ class kernel;
struct backend struct backend
{ {
// platforms
class platforms
{
friend class backend;
private:
static void init();
public:
static void get(std::vector<driver::platform*> &results);
private:
static std::vector<driver::platform*> cache_;
};
// devices
class devices
{
friend class backend;
private:
static void init(const std::vector<platform *> &platforms);
public:
static void get(std::vector<driver::device*>& devs);
private:
static std::vector<driver::device*> cache_;
};
// modules
class modules class modules
{ {
friend class backend; friend class backend;
public: public:
static void release(); static void release();
static module& get(driver::stream const & stream, std::string const & name, std::string const &src); static driver::module* get(driver::stream* stream, std::string const & name, std::string const &src);
private: private:
static std::map<std::tuple<stream, std::string>, module * > cache_; static std::map<std::tuple<driver::stream*, std::string>, driver::module*> cache_;
}; };
// kernels
class kernels class kernels
{ {
friend class backend; friend class backend;
public: public:
static void release(); static void release();
static kernel & get(driver::module const & program, std::string const & name); static driver::kernel* get(driver::module* mod, const std::string & name);
private: private:
static std::map<std::tuple<module, std::string>, kernel * > cache_; static std::map<std::tuple<module*, std::string>, driver::kernel*> cache_;
}; };
// contexts
class contexts class contexts
{ {
friend class backend; friend class backend;
private: private:
static void init(std::vector<platform> const &); static void init(const std::vector<device *> &);
static void release(); static void release();
public: public:
static driver::context const & get_default(); static driver::context* get_default();
template<class T>
static driver::context const & import(T ctx) static driver::context* import(CUcontext ctx)
{ {
for(driver::context const * x: cache_) for(driver::context* x: cache_){
if((T)*x==ctx) driver::cu_context* cu_x = (driver::cu_context*)x;
return *x; if(*cu_x->cu()==ctx)
cache_.emplace_back(new driver::context(ctx, false)); return x;
return *cache_.back();
} }
static void get(std::list<context const *> &); cache_.emplace_back(new driver::cu_context(ctx, false));
return cache_.back();
}
static void get(std::list<driver::context*> &);
private: private:
static std::list<context const *> cache_; static std::list<driver::context*> cache_;
}; };
// streams
class streams class streams
{ {
friend class backend; friend class backend;
private: private:
static void init(std::list<context const *> const &); static void init(std::list<context*> const &);
static void release(); static void release();
public: public:
static void get(driver::context const &, std::vector<stream *> &streams); static void get(driver::context*, std::vector<driver::stream *> &streams);
static stream & get(driver::context const &, unsigned int id = 0); static driver::stream* get(driver::context*, unsigned int id = 0);
static stream & get_default(); static driver::stream* get_default();
private: private:
static std::map< context, std::vector<stream*> > cache_; static std::map<driver::context*, std::vector<driver::stream*> > cache_;
}; };
static void init(); static void init();
static void release(); static void release();
static void synchronize(triton::driver::context *);
static std::vector<device> devices();
static std::vector<platform> platforms();
static void synchronize(driver::context const &);
static unsigned int default_device; static unsigned int default_device;
}; };

View File

@@ -31,21 +31,33 @@ namespace triton
namespace driver namespace driver
{ {
class stream; class cu_stream;
// Buffer // Base
class buffer: public handle_interface<buffer, CUdeviceptr> class buffer : public polymorphic_resource<CUdeviceptr, cl_mem> {
public:
buffer(driver::context* ctx, CUdeviceptr cl, bool take_ownership);
buffer(driver::context* ctx, cl_mem cl, bool take_ownership);
driver::context* context();
protected:
driver::context* context_;
};
// OpenCL
class ocl_buffer: public buffer
{ {
public: public:
buffer(driver::context const & context, size_t size); ocl_buffer(driver::context* context, size_t size);
buffer(driver::context const & context, CUdeviceptr cu, bool take_ownership); };
void set_zero(stream const & queue, size_t size);
handle<CUdeviceptr> const & cu() const;
handle<CUdeviceptr> & cu();
private: // CUDA
context context_; class cu_buffer: public buffer
handle<CUdeviceptr> cu_; {
public:
cu_buffer(driver::context* context, size_t size);
cu_buffer(driver::context* context, CUdeviceptr cu, bool take_ownership);
void set_zero(cu_stream const & queue, size_t size);
}; };
} }

View File

@@ -31,35 +31,50 @@ namespace triton
namespace driver namespace driver
{ {
class context: public handle_interface<context, CUcontext> class context: public polymorphic_resource<CUcontext, cl_context>{
{ protected:
private:
static std::string get_cache_path(); static std::string get_cache_path();
static CUdevice device(CUcontext);
public: public:
//Constructors context(driver::device *dev, CUcontext cu, bool take_ownership);
explicit context(CUcontext context, bool take_ownership = true); context(driver::device *dev, cl_context cl, bool take_ownership);
explicit context(driver::device const & dvc); driver::device* device() const;
//Accessors
driver::device const & device() const;
std::string const & cache_path() const; std::string const & cache_path() const;
handle<CUcontext> const & cu() const;
private: protected:
handle<CUcontext> cu_; driver::device* dev_;
driver::device dvc_;
std::string cache_path_; std::string cache_path_;
}; };
class ContextSwitcher{ // CUDA
class cu_context: public context {
public: public:
ContextSwitcher(driver::context const & ctx); class context_switcher{
~ContextSwitcher(); public:
context_switcher(driver::context const & ctx);
~context_switcher();
private: private:
driver::context const & ctx_; driver::cu_context const & ctx_;
}; };
private:
static CUdevice get_device_of(CUcontext);
public:
//Constructors
cu_context(CUcontext cu, bool take_ownership = true);
cu_context(driver::device* dev);
};
// OpenCL
class ocl_context: public context {
public:
ocl_context(driver::device* dev);
};
} }
} }

View File

@@ -84,7 +84,7 @@ inline void cublasGemmEx(cublasHandle_t handle, cudaDataType cudt, cublasOperati
/* Simplified API for default GEMM */ /* Simplified API for default GEMM */
inline void cublasGemm(DType dtype, stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, buffer const & A, int32_t lda, buffer const & B, int32_t ldb, scalar beta, buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){ inline void cublasGemm(DType dtype, stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, cu_buffer const & A, int32_t lda, cu_buffer const & B, int32_t ldb, scalar beta, cu_buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){
ContextSwitcher ctx_switch(stream.context()); ContextSwitcher ctx_switch(stream.context());
cublasHandle_t handle = dispatch::cublasHandle(stream.context()); cublasHandle_t handle = dispatch::cublasHandle(stream.context());
dispatch::cublasSetStream_v2(handle, (CUstream)stream); dispatch::cublasSetStream_v2(handle, (CUstream)stream);
@@ -112,7 +112,7 @@ inline cudnnTensorFormat_t format(cudnnDataType_t cutype){
} }
inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t C, int32_t T, int32_t R, int32_t S, inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t C, int32_t T, int32_t R, int32_t S,
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, buffer const & I, buffer const & F, scalar beta, buffer const & O){ int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, cu_buffer const & I, cu_buffer const & F, scalar beta, cu_buffer const & O){
driver::driver::context const & ctx = stream.context(); driver::driver::context const & ctx = stream.context();
ContextSwitcher switch_ctx(ctx); ContextSwitcher switch_ctx(ctx);
@@ -154,7 +154,7 @@ inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
size_t workspace_size; size_t workspace_size;
dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size); dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size);
static buffer work(ctx, 1024*1024*64); static cu_buffer work(ctx, 1024*1024*64);
CUdeviceptr twork = work; CUdeviceptr twork = work;
CUdeviceptr pI = I, pF = F, pO = O; CUdeviceptr pI = I, pF = F, pO = O;
dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO); dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO);
@@ -162,7 +162,7 @@ inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
inline void cudnnPool(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t T, int32_t R, int32_t S, inline void cudnnPool(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t T, int32_t R, int32_t S,
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, buffer const & I, scalar beta, buffer const & O){ int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, cu_buffer const & I, scalar beta, cu_buffer const & O){
driver::driver::context const & ctx = stream.context(); driver::driver::context const & ctx = stream.context();
ContextSwitcher switch_ctx(ctx); ContextSwitcher switch_ctx(ctx);
@@ -200,11 +200,11 @@ inline void cudnnPool(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
dispatch::cudnnPoolingForward(handle, desc, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO); dispatch::cudnnPoolingForward(handle, desc, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
} }
inline void cudnnTransformTensor(driver::stream & stream, inline void cudnnTransformTensor(driver::cu_stream & stream,
DType in_dtype, DType out_dtype, DType in_dtype, DType out_dtype,
cudnnTensorFormat_t in_layout, cudnnTensorFormat_t out_layout, cudnnTensorFormat_t in_layout, cudnnTensorFormat_t out_layout,
int32_t N, int32_t C, int32_t D, int32_t H, int32_t W, int32_t N, int32_t C, int32_t D, int32_t H, int32_t W,
scalar alpha, driver::buffer const & I, scalar beta, driver::buffer& O) scalar alpha, driver::cu_buffer const & I, scalar beta, driver::cu_buffer& O)
{ {
cudnnHandle_t handle = dispatch::cudnnHandle(stream.context()); cudnnHandle_t handle = dispatch::cudnnHandle(stream.context());
dispatch::cudnnSetStream(handle, (CUstream)stream); dispatch::cudnnSetStream(handle, (CUstream)stream);

View File

@@ -32,9 +32,20 @@ namespace triton
namespace driver namespace driver
{ {
// Device // Base device
class device: public handle_interface<device, CUdevice> class device: public polymorphic_resource<CUdevice, cl_device_id>{
{ public:
using polymorphic_resource::polymorphic_resource;
};
// OpenCL device
class ocl_device: public device {
public:
ocl_device(cl_device_id cl, bool take_ownership = true): device(cl, take_ownership) { }
};
// CUDA device
class cu_device: public device {
public: public:
//Supported architectures //Supported architectures
enum class Architecture{ enum class Architecture{
@@ -61,14 +72,12 @@ private:
inline nvmlDevice_t nvml_device() const; inline nvmlDevice_t nvml_device() const;
public: public:
device(CUdevice cu = CUdevice(), bool take_ownership = true): cu_(cu, take_ownership){} cu_device(CUdevice cu = CUdevice(), bool take_ownership = true): device(cu, take_ownership){}
//Accessors //Accessors
Architecture architecture() const; Architecture architecture() const;
handle<CUdevice> const & cu() const;
//Informations //Informations
std::string infos() const; std::string infos() const;
size_t address_bits() const; size_t address_bits() const;
driver::platform platform() const;
std::vector<size_t> max_block_dim() const; std::vector<size_t> max_block_dim() const;
size_t max_threads_per_block() const; size_t max_threads_per_block() const;
size_t max_shared_memory() const; size_t max_shared_memory() const;
@@ -87,7 +96,6 @@ public:
size_t max_mem_clock() const; size_t max_mem_clock() const;
private: private:
handle<CUdevice> cu_;
std::shared_ptr<std::pair<size_t, size_t>> interpreted_as_; std::shared_ptr<std::pair<size_t, size_t>> interpreted_as_;
}; };

View File

@@ -28,10 +28,11 @@
//CUDA Backend //CUDA Backend
#include "triton/external/CUDA/cuda.h" #include "triton/external/CUDA/cuda.h"
#include "triton/external/CUDA/nvrtc.h"
#include "triton/external/CUDA/cublas_v2.h" #include "triton/external/CUDA/cublas_v2.h"
#include "triton/external/CUDA/cudnn.h" #include "triton/external/CUDA/cudnn.h"
#include "triton/external/CUDA/nvml.h" #include "triton/external/CUDA/nvml.h"
#include "triton/external/CL/cl.h"
#include "triton/external/CL/cl_ext.h"
//Exceptions //Exceptions
#include <iostream> #include <iostream>
@@ -42,10 +43,9 @@ namespace triton
namespace driver namespace driver
{ {
class context; class cu_context;
template<class T> void check(T){} template<class T> void check(T){}
void check(nvrtcResult err);
void check(CUresult err); void check(CUresult err);
void check(cublasStatus_t err); void check(cublasStatus_t err);
void check(cudnnStatus_t err); void check(cudnnStatus_t err);
@@ -79,14 +79,48 @@ private:
} }
public: public:
static bool nvrtcinit(); static bool clinit();
static bool nvmlinit(); static bool nvmlinit();
static bool cuinit(); static bool cuinit();
static bool cublasinit(); static bool cublasinit();
static bool cudnninit(); static bool cudnninit();
static void release(); static void release();
//OpenCL
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
static cl_int clReleaseMemObject(cl_mem);
static cl_int clFinish(cl_command_queue);
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
static cl_int clReleaseContext(cl_context);
static cl_int clReleaseEvent(cl_event);
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
static cl_int clReleaseDevice(cl_device_id);
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
static cl_int clReleaseCommandQueue(cl_command_queue);
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
static cl_int clRetainEvent(cl_event);
static cl_int clReleaseProgram(cl_program);
static cl_int clFlush(cl_command_queue);
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
static cl_int clReleaseKernel(cl_kernel);
//CUDA //CUDA
static CUresult cuCtxGetCurrent(CUcontext *pctx); static CUresult cuCtxGetCurrent(CUcontext *pctx);
static CUresult cuCtxSetCurrent(CUcontext ctx); static CUresult cuCtxSetCurrent(CUcontext ctx);
@@ -130,14 +164,7 @@ public:
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock); static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock); static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options); static cublasHandle_t cublasHandle(driver::cu_context const & ctx);
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
static cublasHandle_t cublasHandle(driver::context const & ctx);
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h); static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId); static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId); static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
@@ -146,7 +173,7 @@ public:
static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc); static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc);
static cublasStatus_t cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, const void *beta, void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo); static cublasStatus_t cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, const void *beta, void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo);
static cudnnHandle_t cudnnHandle(driver::context const & ctx); static cudnnHandle_t cudnnHandle(driver::cu_context const & ctx);
static cudnnStatus_t cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc); static cudnnStatus_t cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc); static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc); static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
@@ -167,13 +194,50 @@ public:
static cudnnStatus_t cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); static cudnnStatus_t cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
private: private:
// Libraries
static void* opencl_;
static void* cuda_; static void* cuda_;
static void* nvrtc_;
static void* nvml_; static void* nvml_;
static void* cublas_; static void* cublas_;
static void* cudnn_; static void* cudnn_;
//CUDA // OpenCL functions
static void* clBuildProgram_;
static void* clEnqueueNDRangeKernel_;
static void* clSetKernelArg_;
static void* clReleaseMemObject_;
static void* clFinish_;
static void* clGetMemObjectInfo_;
static void* clGetCommandQueueInfo_;
static void* clReleaseContext_;
static void* clReleaseEvent_;
static void* clEnqueueWriteBuffer_;
static void* clEnqueueReadBuffer_;
static void* clGetProgramBuildInfo_;
static void* clReleaseDevice_;
static void* clCreateContext_;
static void* clGetDeviceIDs_;
static void* clGetContextInfo_;
static void* clGetDeviceInfo_;
static void* clReleaseCommandQueue_;
static void* clGetPlatformIDs_;
static void* clGetPlatformInfo_;
static void* clGetEventProfilingInfo_;
static void* clCreateProgramWithBinary_;
static void* clCreateCommandQueue_;
static void* clRetainEvent_;
static void* clReleaseProgram_;
static void* clFlush_;
static void* clGetProgramInfo_;
static void* clGetKernelInfo_;
static void* clGetKernelWorkGroupInfo_;
static void* clCreateKernel_;
static void* clCreateBuffer_;
static void* clCreateProgramWithSource_;
static void* clReleaseKernel_;
// CUDA functions
static void* cuCtxGetCurrent_; static void* cuCtxGetCurrent_;
static void* cuCtxSetCurrent_; static void* cuCtxSetCurrent_;
static void* cuCtxDestroy_v2_; static void* cuCtxDestroy_v2_;
@@ -188,7 +252,6 @@ private:
static void* cuDeviceGetName_; static void* cuDeviceGetName_;
static void* cuDeviceGetPCIBusId_; static void* cuDeviceGetPCIBusId_;
static void* cuModuleGetGlobal_v2_; static void* cuModuleGetGlobal_v2_;
static void* cuMemcpyHtoDAsync_v2_; static void* cuMemcpyHtoDAsync_v2_;
static void* cuModuleLoad_; static void* cuModuleLoad_;
static void* cuLaunchKernel_; static void* cuLaunchKernel_;
@@ -210,19 +273,12 @@ private:
static void* cuMemsetD8Async_; static void* cuMemsetD8Async_;
static void* cuCtxPushCurrent_v2_; static void* cuCtxPushCurrent_v2_;
static void* cuCtxPopCurrent_v2_; static void* cuCtxPopCurrent_v2_;
// NVML
static void* nvmlInit_v2_; static void* nvmlInit_v2_;
static void* nvmlDeviceGetHandleByPciBusId_v2_; static void* nvmlDeviceGetHandleByPciBusId_v2_;
static void* nvmlDeviceGetClockInfo_; static void* nvmlDeviceGetClockInfo_;
static void* nvmlDeviceGetMaxClockInfo_; static void* nvmlDeviceGetMaxClockInfo_;
// cuBLAS
static void* nvrtcCompileProgram_;
static void* nvrtcGetProgramLogSize_;
static void* nvrtcGetPTX_;
static void* nvrtcGetPTXSize_;
static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_;
static void* cublasCreate_v2_; static void* cublasCreate_v2_;
static void* cublasGetStream_v2_; static void* cublasGetStream_v2_;
static void* cublasSetStream_v2_; static void* cublasSetStream_v2_;
@@ -230,7 +286,7 @@ private:
static void* cublasSgemm_v2_; static void* cublasSgemm_v2_;
static void* cublasDgemm_v2_; static void* cublasDgemm_v2_;
static void* cublasGemmEx_; static void* cublasGemmEx_;
// cuDNN
static void* cudnnCreateConvolutionDescriptor_; static void* cudnnCreateConvolutionDescriptor_;
static void* cudnnCreatePoolingDescriptor_; static void* cudnnCreatePoolingDescriptor_;
static void* cudnnCreateTensorDescriptor_; static void* cudnnCreateTensorDescriptor_;

View File

@@ -32,7 +32,7 @@ namespace driver
{ {
// Event // Event
class Event: public handle_interface<Event, cu_event_t> class Event
{ {
public: public:
float elapsed_time() const; float elapsed_time() const;

View File

@@ -41,8 +41,8 @@ struct cu_event_t{
CUevent second; CUevent second;
}; };
struct cu_platform{ struct CUPlatform{
cu_platform() : status_(dispatch::cuInit(0)) { } CUPlatform() : status_(dispatch::cuInit(0)) { }
operator bool() const { return status_; } operator bool() const { return status_; }
private: private:
CUresult status_; CUresult status_;
@@ -76,6 +76,22 @@ protected:
bool has_ownership_; bool has_ownership_;
}; };
template<class CUType, class CLType>
class polymorphic_resource {
public:
polymorphic_resource(CUType cu, bool take_ownership): cu_(cu, take_ownership){}
polymorphic_resource(CLType cl, bool take_ownership): cl_(cl, take_ownership){}
handle<CUType> cu() { return cu_; }
handle<CLType> cl() { return cl_; }
const handle<CUType>& cu() const { return cu_; }
const handle<CLType>& cl() const { return cl_; }
protected:
handle<CLType> cl_;
handle<CUType> cu_;
};
} }
} }

View File

@@ -34,28 +34,38 @@ namespace triton
namespace driver namespace driver
{ {
class buffer; class cu_buffer;
// Kernel // Base
class kernel: public handle_interface<kernel, CUfunction> class kernel: public polymorphic_resource<CUfunction, cl_kernel> {
{ public:
kernel(driver::module* program, CUfunction fn, bool has_ownership);
kernel(driver::module* program, cl_kernel fn, bool has_ownership);
driver::module* module();
private:
driver::module* program_;
};
// OpenCL
class ocl_kernel: public kernel {
};
// CUDA
class cu_kernel: public kernel {
public: public:
//Constructors //Constructors
kernel(driver::module const & program, const char * name); cu_kernel(driver::module* program, const char * name);
//Accessors
handle<CUfunction> const & cu() const;
driver::module const & module() const;
//Arguments setters //Arguments setters
void setArg(unsigned int index, std::size_t size, void* ptr); void setArg(unsigned int index, std::size_t size, void* ptr);
void setArg(unsigned int index, buffer const &); void setArg(unsigned int index, cu_buffer const &);
template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); } template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); }
//Arguments getters //Arguments getters
void* const* cu_params() const; void* const* cu_params() const;
private: private:
handle<CUfunction> cu_; handle<CUfunction> cu_;
driver::module program_; driver::cu_module* program_;
unsigned int address_bits_;
std::vector<std::shared_ptr<void> > cu_params_store_; std::vector<std::shared_ptr<void> > cu_params_store_;
std::vector<void*> cu_params_; std::vector<void*> cu_params_;
}; };

View File

@@ -39,25 +39,30 @@ namespace triton
namespace driver namespace driver
{ {
class context; class cu_context;
class device; class cu_device;
class module: public handle_interface<module, CUmodule> class module: public polymorphic_resource<CUmodule, cl_program> {
{ public:
static std::string header(device const & device); module(driver::context* ctx, CUmodule mod, bool has_ownership);
module(driver::context* ctx, cl_program mod, bool has_ownership);
driver::context* context() const;
protected:
driver::context* ctx_;
};
class cu_module: public module {
static std::string header(driver::cu_device const & device);
std::string compile_llvm_module(llvm::Module* module); std::string compile_llvm_module(llvm::Module* module);
void init_llvm(); void init_llvm();
public: public:
module(driver::context const & context, llvm::Module *module); cu_module(driver::context* context, llvm::Module *module);
module(driver::context const & context, const std::string& source); cu_module(driver::context* context, const std::string& source);
driver::context const & context() const; cu_buffer symbol(const char * name) const;
handle<CUmodule> const & cu() const;
buffer symbol(const char * name) const;
private: private:
handle<CUmodule> cu_;
driver::context context_;
std::string source_; std::string source_;
}; };

View File

@@ -39,12 +39,37 @@ class device;
class platform class platform
{ {
public: public:
// Constructor
platform(const std::string& name): name_(name){ }
// Accessors // Accessors
std::string name() const { return "CUDA"; } std::string name() const { return name_; }
std::string version() const; // Virtual methods
std::vector<device> devices() const; virtual std::string version() const = 0;
virtual void devices(std::vector<driver::device *> &devices) const = 0;
private: private:
handle<cu_platform> cu_; std::string name_;
};
class cu_platform: public platform
{
public:
cu_platform(): platform("CUDA") { }
std::string version() const;
void devices(std::vector<driver::device*> &devices) const;
private:
handle<CUPlatform> cu_;
};
class cl_platform: public platform
{
public:
cl_platform(cl_platform_id cl): platform("OpenCL"), cl_(cl) { }
std::string version() const;
void devices(std::vector<driver::device*> &devices) const;
private:
handle<cl_platform_id> cl_;
}; };
} }

View File

@@ -35,43 +35,55 @@ namespace triton
namespace driver namespace driver
{ {
class kernel; class cu_kernel;
class Event; class Event;
class Range; class Range;
class buffer; class cu_buffer;
// Command Queue // Base
class stream: public handle_interface<stream, CUstream> class stream: public polymorphic_resource<CUstream, cl_command_queue> {
{ public:
stream(driver::context *ctx, CUstream, bool has_ownership);
stream(driver::context *ctx, cl_command_queue, bool has_ownership);
driver::context* context() const;
virtual void synchronize() = 0;
protected:
driver::context *ctx_;
};
// OpenCL
class cl_stream: public stream {
public: public:
// Constructors // Constructors
stream(CUstream stream, bool take_ownership); cl_stream(driver::context *ctx);
stream(driver::context const & context);
//Accessors // Synchronize
handle<CUstream> const & cu() const; void synchronize();
driver::context const & context() const; };
// CUDA
class cu_stream: public stream {
public:
//Constructors
cu_stream(CUstream str, bool take_ownership);
cu_stream(driver::context* context);
//Synchronize //Synchronize
void synchronize(); void synchronize();
//Enqueue //Enqueue
void enqueue(kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL); void enqueue(cu_kernel const & cu_kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
// Write // Write
void write(driver::buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr); void write(driver::cu_buffer const & cu_buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
template<class T> void write(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
template<class T> void write(driver::buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
{ write(buffer, blocking, offset, x.size()*sizeof(T), x.data()); } { write(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
// Read // Read
void read(driver::buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr); void read(driver::cu_buffer const & cu_buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
template<class T> void read(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
template<class T> void read(driver::buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
{ read(buffer, blocking, offset, x.size()*sizeof(T), x.data()); } { read(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
private:
driver::context context_;
handle<CUstream> cu_;
}; };

1468
include/triton/external/CL/cl.h vendored Normal file

File diff suppressed because it is too large Load Diff

12947
include/triton/external/CL/cl.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

9677
include/triton/external/CL/cl2.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

131
include/triton/external/CL/cl_d3d10.h vendored Normal file
View File

@@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

131
include/triton/external/CL/cl_d3d11.h vendored Normal file
View File

@@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

View File

@@ -0,0 +1,132 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

View File

@@ -0,0 +1,182 @@
/**********************************************************************************
* Copyright (c) 2008-2016 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_dx9_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <d3d9.h>
#include <dxvahd.h>
#include <wtypes.h>
#include <d3d9types.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id /* platform */,
cl_dx9_device_source_intel /* dx9_device_source */,
void* /* dx9_object */,
cl_dx9_device_set_intel /* dx9_device_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id /* platform */,
cl_dx9_device_source_intel /* dx9_device_source */,
void* /* dx9_object */,
cl_dx9_device_set_intel /* dx9_device_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context /* context */,
cl_mem_flags /* flags */,
IDirect3DSurface9* /* resource */,
HANDLE /* sharedHandle */,
UINT /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context /* context */,
cl_mem_flags /* flags */,
IDirect3DSurface9* /* resource */,
HANDLE /* sharedHandle */,
UINT /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */

136
include/triton/external/CL/cl_egl.h vendored Normal file
View File

@@ -0,0 +1,136 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#ifdef __APPLE__
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context /* context */,
CLeglDisplayKHR /* egldisplay */,
CLeglImageKHR /* eglimage */,
cl_mem_flags /* flags */,
const cl_egl_image_properties_khr * /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context /* context */,
CLeglSyncKHR /* sync */,
CLeglDisplayKHR /* display */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

670
include/triton/external/CL/cl_ext.h vendored Normal file
View File

@@ -0,0 +1,670 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <AvailabilityMacros.h>
#else
#include <CL/cl.h>
#endif
/* cl_khr_fp64 extension - no extension #define since it has no functions */
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/* Extension: cl_khr_image2D_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
* for 2D images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the width,
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
*
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
*/
/*************************************
* cl_khr_initalize_memory extension *
*************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/*****************************************
* cl_khr_create_command_queue extension *
*****************************************/
#define cl_khr_create_command_queue 1
typedef cl_bitfield cl_queue_properties_khr;
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithPropertiesKHR( cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties_khr* /* properties */,
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_command_queue
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)( cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties_khr* /* properties */,
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_memory_flags *
*********************************/
#define cl_amd_device_memory_flags 1
#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap
/* cl_device_info */
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} cl_device_topology_amd;
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
/**************************
* cl_amd_offline_devices *
**************************/
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
#ifdef CL_VERSION_1_1
/***********************************
* cl_ext_device_fission extension *
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/* cl_ext_atomic_counters_32 and cl_ext_atomic_counters_64 extensions
* no extension #define since they have no functions
*/
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
#endif /* CL_VERSION_1_1 */
#if defined(CL_VERSION_1_2)
/******************************************
* cl_img_yuv_image extension *
******************************************/
/* Image formats used in clCreateImage */
#define CL_NV21_IMG 0x40D0
#define CL_YV12_IMG 0x40D1
/******************************************
* cl_img_cached_allocations extension *
******************************************/
/* Flag values used by clCreteBuffer */
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
/******************************************
* cl_img_use_gralloc_ptr extension *
******************************************/
/* Flag values used by clCreteBuffer */
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
/* To be used by clGetEventInfo: */
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
#endif /* CL_VERSION_1_2 */
#ifdef CL_VERSION_2_0
/*********************************
* cl_khr_subgroups extension
*********************************/
#define cl_khr_subgroups 1
/* cl_kernel_sub_group_info is declared in CL.h. */
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
#endif /* CL_VERSION_2_0 */
#ifdef CL_VERSION_2_1
/*********************************
* cl_khr_priority_hints extension
*********************************/
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
#endif /* CL_VERSION_2_1 */
#ifdef CL_VERSION_2_1
/*********************************
* cl_khr_throttle_hints extension
*********************************/
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
#endif /* CL_VERSION_2_1 */
#ifdef CL_VERSION_2_2
/*********************************
* cl_khr_subgroup_named_barrier
*********************************/
#define cl_khr_subgroup_named_barrier 1
/* cl_device_info */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
#endif /* CL_VERSION_2_2 */
/**********************************
* cl_arm_import_memory extension *
**********************************/
#ifdef CL_VERSION_1_0
typedef intptr_t cl_import_properties_arm;
/* Default and valid proporties name for cl_arm_import_memory */
#define CL_IMPORT_TYPE_ARM 0x40B2
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
/* Secure DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_SECURE_ARM 0x40B5
/* This extension adds a new function that allows for direct memory import into
* OpenCL via the clImportMemoryARM function.
*
* Memory imported through this interface will be mapped into the device's page
* tables directly, providing zero copy access. It will never fall back to copy
* operations and aliased buffers.
*
* Types of memory supported for import are specified as additional extension
* strings.
*
* This extension produces cl_mem allocations which are compatible with all other
* users of cl_mem in the standard API.
*
* This extension maps pages with the same properties as the normal buffer creation
* function clCreateBuffer.
*/
extern CL_API_ENTRY cl_mem CL_API_CALL
clImportMemoryARM( cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
void *memory,
size_t size,
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
#endif /* CL_VERSION_1_0 */
/******************************************
* cl_arm_shared_virtual_memory extension *
******************************************/
#ifdef CL_VERSION_1_2
/* Used by clGetDeviceInfo */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* Used by clGetMemObjectInfo */
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
/* Used by clSetKernelExecInfoARM: */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
/* To be used by clGetEventInfo: */
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
/* Flag values used by clSVMAllocARM: */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
typedef cl_bitfield cl_svm_mem_flags_arm;
typedef cl_uint cl_kernel_exec_info_arm;
typedef cl_bitfield cl_device_svm_capabilities_arm;
extern CL_API_ENTRY void * CL_API_CALL
clSVMAllocARM(cl_context /* context */,
cl_svm_mem_flags_arm /* flags */,
size_t /* size */,
cl_uint /* alignment */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY void CL_API_CALL
clSVMFreeARM(cl_context /* context */,
void * /* svm_pointer */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFreeARM(cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void * /* user_data */),
void * /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpyARM(cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void * /* dst_ptr */,
const void * /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFillARM(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
const void * /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMapARM(cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* flags */,
void * /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmapARM(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointerARM(cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfoARM(cl_kernel /* kernel */,
cl_kernel_exec_info_arm /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */) CL_EXT_SUFFIX__VERSION_1_2;
#endif /* CL_VERSION_1_2 */
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

View File

@@ -0,0 +1,429 @@
/*******************************************************************************
* Copyright (c) 2008-2017 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2017 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_ext_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __CL_EXT_INTEL_H
#define __CL_EXT_INTEL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl.h>
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_thread_local_exec extension *
****************************************/
#define cl_intel_thread_local_exec 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
#define cl_intel_device_partition_by_names 1
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
/************************************************
* cl_intel_accelerator extension *
* cl_intel_motion_estimation extension *
* cl_intel_advanced_motion_estimation extension *
*************************************************/
#define cl_intel_accelerator 1
#define cl_intel_motion_estimation 1
#define cl_intel_advanced_motion_estimation 1
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
typedef cl_uint cl_accelerator_type_intel;
typedef cl_uint cl_accelerator_info_intel;
typedef struct _cl_motion_estimation_desc_intel {
cl_uint mb_block_type;
cl_uint subpixel_mode;
cl_uint sad_adjust_mode;
cl_uint search_path_type;
} cl_motion_estimation_desc_intel;
/* error codes */
#define CL_INVALID_ACCELERATOR_INTEL -1094
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
/* cl_accelerator_type_intel */
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
/* cl_accelerator_info_intel */
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
/* cl_motion_detect_desc_intel flags */
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
/* cl_device_info */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
#define CL_ME_VERSION_LEGACY_INTEL 0x0
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context /* context */,
cl_accelerator_type_intel /* accelerator_type */,
size_t /* descriptor_size */,
const void* /* descriptor */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
cl_context /* context */,
cl_accelerator_type_intel /* accelerator_type */,
size_t /* descriptor_size */,
const void* /* descriptor */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetAcceleratorInfoINTEL(
cl_accelerator_intel /* accelerator */,
cl_accelerator_info_intel /* param_name */,
size_t /* param_value_size */,
void* /* param_value */,
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
cl_accelerator_intel /* accelerator */,
cl_accelerator_info_intel /* param_name */,
size_t /* param_value_size */,
void* /* param_value */,
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainAcceleratorINTEL(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseAcceleratorINTEL(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_intel_simultaneous_sharing extension *
*******************************************/
#define cl_intel_simultaneous_sharing 1
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/***********************************
* cl_intel_egl_image_yuv extension *
************************************/
#define cl_intel_egl_image_yuv 1
#define CL_EGL_YUV_PLANE_INTEL 0x4107
/********************************
* cl_intel_packed_yuv extension *
*********************************/
#define cl_intel_packed_yuv 1
#define CL_YUYV_INTEL 0x4076
#define CL_UYVY_INTEL 0x4077
#define CL_YVYU_INTEL 0x4078
#define CL_VYUY_INTEL 0x4079
/********************************************
* cl_intel_required_subgroup_size extension *
*********************************************/
#define cl_intel_required_subgroup_size 1
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
/****************************************
* cl_intel_driver_diagnostics extension *
*****************************************/
#define cl_intel_driver_diagnostics 1
typedef cl_uint cl_diagnostics_verbose_level;
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
/********************************
* cl_intel_planar_yuv extension *
*********************************/
#define CL_NV12_INTEL 0x410E
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/*******************************************************
* cl_intel_device_side_avc_motion_estimation extension *
********************************************************/
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_INTEL_H */

167
include/triton/external/CL/cl_gl.h vendored Normal file
View File

@@ -0,0 +1,167 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#define CL_GL_NUM_SAMPLES 0x2012
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

74
include/triton/external/CL/cl_gl_ext.h vendored Normal file
View File

@@ -0,0 +1,74 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl_gl.h>
#endif
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

1458
include/triton/external/CL/cl_platform.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,172 @@
/**********************************************************************************
* Copyright (c) 2008-2016 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_va_api_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id /* platform */,
cl_va_api_device_source_intel /* media_adapter_type */,
void* /* media_adapter */,
cl_va_api_device_set_intel /* media_adapter_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id /* platform */,
cl_va_api_device_source_intel /* media_adapter_type */,
void* /* media_adapter */,
cl_va_api_device_set_intel /* media_adapter_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context /* context */,
cl_mem_flags /* flags */,
VASurfaceID* /* surface */,
cl_uint /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context /* context */,
cl_mem_flags /* flags */,
VASurfaceID* /* surface */,
cl_uint /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */

59
include/triton/external/CL/opencl.h vendored Normal file
View File

@@ -0,0 +1,59 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#include <OpenCL/cl_ext.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

View File

@@ -39,7 +39,7 @@ public:
std::vector<unsigned> global_range_size; std::vector<unsigned> global_range_size;
unsigned num_threads; unsigned num_threads;
}; };
typedef std::function<double(driver::kernel, launch_information)> benchmark_t; typedef std::function<double(driver::cu_kernel, launch_information)> benchmark_t;
struct passes_wrapper { struct passes_wrapper {
passes_wrapper(): shared(&buffer_info), liveness(&buffer_info), passes_wrapper(): shared(&buffer_info), liveness(&buffer_info),
@@ -74,17 +74,17 @@ private:
std::unique_ptr<ir::module> make_triton_module(const std::string &src); std::unique_ptr<ir::module> make_triton_module(const std::string &src);
public: public:
jit(driver::context context); jit(driver::context* context);
void autotune(const std::string &src, benchmark_t benchmark); void autotune(const std::string &src, benchmark_t benchmark);
void add_module(ir::module &module, const std::vector<unsigned>& params = {}); void add_module(ir::module &module, const std::vector<unsigned>& params = {});
void add_module(const std::string &src, const std::vector<unsigned>& params = {}); void add_module(const std::string &src, const std::vector<unsigned>& params = {});
driver::kernel get_function(const std::string &name); driver::cu_kernel get_function(const std::string &name);
launch_information get_launch_info(const std::string &name); launch_information get_launch_info(const std::string &name);
unsigned get_int(const std::string &name); unsigned get_int(const std::string &name);
private: private:
std::vector<driver::module> modules_; std::vector<driver::cu_module> modules_;
driver::context driver_context_; driver::context* driver_context_;
llvm::LLVMContext llvm_context_; llvm::LLVMContext llvm_context_;
ir::context triton_context_; ir::context triton_context_;
std::map<std::string, launch_information> launch_info_map_; std::map<std::string, launch_information> launch_info_map_;

View File

@@ -37,6 +37,58 @@ namespace triton
namespace driver namespace driver
{ {
/*-----------------------------------*/
//----------- Platforms ------------*/
/*-----------------------------------*/
void backend::platforms::init() {
if(!cache_.empty())
return;
//if CUDA is here
if(dispatch::cuinit()){
cache_.push_back(new cu_platform());
}
//if OpenCL is here
if(dispatch::clinit()){
cl_uint num_platforms;
dispatch::clGetPlatformIDs(0, nullptr, &num_platforms);
std::vector<cl_platform_id> ids(num_platforms);
dispatch::clGetPlatformIDs(num_platforms, ids.data(), nullptr);
for(cl_platform_id id: ids)
cache_.push_back(new cl_platform(id));
}
if(cache_.empty())
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
}
void backend::platforms::get(std::vector<platform *> &results) {
std::copy(cache_.begin(), cache_.end(), std::back_inserter(results));
}
std::vector<driver::platform*> backend::platforms::cache_;
/*-----------------------------------*/
//----------- Devices --------------*/
/*-----------------------------------*/
void backend::devices::init(std::vector<platform*> const & platforms) {
if(!cache_.empty())
return;
for(driver::platform* pf: platforms)
pf->devices(cache_);
if(cache_.empty())
throw std::runtime_error("ISAAC: No device available. Make sure that your platform is configured properly");
}
void backend::devices::get(std::vector<device*> &devs) {
std::copy(cache_.begin(), cache_.end(), std::back_inserter(devs));
}
std::vector<driver::device*> backend::devices::cache_;
/*-----------------------------------*/ /*-----------------------------------*/
//---------- Modules ----------------*/ //---------- Modules ----------------*/
/*-----------------------------------*/ /*-----------------------------------*/
@@ -47,14 +99,14 @@ void backend::modules::release(){
cache_.clear(); cache_.clear();
} }
module& backend::modules::get(driver::stream const & stream, std::string const & name, std::string const & src){ driver::module* backend::modules::get(driver::stream* stream, std::string const & name, std::string const & src){
std::tuple<driver::stream, std::string> key(stream, name); std::tuple<driver::stream*, std::string> key(stream, name);
if(cache_.find(key)==cache_.end()) if(cache_.find(key)==cache_.end())
return *cache_.insert(std::make_pair(key, new module(stream.context(), src))).first->second; return &*cache_.insert(std::make_pair(key, new driver::cu_module(((driver::cu_stream*)stream)->context(), src))).first->second;
return *cache_.at(key); return &*cache_.at(key);
} }
std::map<std::tuple<stream, std::string>, module * > backend::modules::cache_; std::map<std::tuple<driver::stream*, std::string>, driver::module*> backend::modules::cache_;
/*-----------------------------------*/ /*-----------------------------------*/
//----------- Kernels --------------*/ //----------- Kernels --------------*/
@@ -66,23 +118,23 @@ void backend::kernels::release(){
cache_.clear(); cache_.clear();
} }
kernel & backend::kernels::get(driver::module const & program, std::string const & name){ driver::kernel* backend::kernels::get(driver::module *mod, std::string const & name){
std::tuple<module, std::string> key(program, name); std::tuple<driver::module*, std::string> key(mod, name);
if(cache_.find(key)==cache_.end()) if(cache_.find(key)==cache_.end())
return *cache_.insert(std::make_pair(key, new kernel(program, name.c_str()))).first->second; return &*cache_.insert(std::make_pair(key, new driver::cu_kernel((driver::cu_module*)mod, name.c_str()))).first->second;
return *cache_.at(key); return cache_.at(key);
} }
std::map<std::tuple<module, std::string>, kernel * > backend::kernels::cache_; std::map<std::tuple<driver::module*, std::string>, driver::kernel*> backend::kernels::cache_;
/*-----------------------------------*/ /*-----------------------------------*/
//------------ Queues --------------*/ //------------ Queues --------------*/
/*-----------------------------------*/ /*-----------------------------------*/
void backend::streams::init(std::list<const context *> const & contexts){ void backend::streams::init(std::list<driver::context*> const & contexts){
for(context const * ctx : contexts) for(driver::context* ctx : contexts)
if(cache_.find(*ctx)==cache_.end()) if(cache_.find(ctx)==cache_.end())
cache_.insert(std::make_pair(*ctx, std::vector<stream*>{new stream(*ctx)})); cache_.insert(std::make_pair(ctx, std::vector<driver::stream*>{new driver::cu_stream(ctx)}));
} }
void backend::streams::release(){ void backend::streams::release(){
@@ -92,33 +144,31 @@ void backend::streams::release(){
cache_.clear(); cache_.clear();
} }
stream & backend::streams::get_default() driver::stream* backend::streams::get_default()
{ return get(contexts::get_default(), 0); } { return get(contexts::get_default(), 0); }
stream & backend::streams::get(driver::context const & context, unsigned int id){ driver::stream* backend::streams::get(driver::context* context, unsigned int id){
init(std::list<driver::context const *>(1,&context)); init(std::list<driver::context*>(1,context));
for(auto & x : cache_) for(auto & x : cache_)
if(x.first==context) if(x.first==context)
return *x.second[id]; return x.second[id];
throw; throw;
} }
void backend::streams::get(driver::context const & context, std::vector<stream*> & queues){ void backend::streams::get(driver::context* context, std::vector<driver::stream*> & queues){
init(std::list<driver::context const *>(1,&context)); init(std::list<driver::context*>(1,context));
queues = cache_.at(context); queues = cache_.at(context);
} }
std::map<context, std::vector<stream*> > backend::streams::cache_; std::map<driver::context*, std::vector<driver::stream*>> backend::streams::cache_;
/*-----------------------------------*/ /*-----------------------------------*/
//------------ Contexts ------------*/ //------------ Contexts ------------*/
/*-----------------------------------*/ /*-----------------------------------*/
void backend::contexts::init(std::vector<platform> const & platforms){ void backend::contexts::init(std::vector<driver::device*> const & devices){
for(platform const & platform: platforms){ for(driver::device* dvc: devices)
for(device const & device: platform.devices()) cache_.push_back(new cu_context(dvc));
cache_.push_back(new context(device));
}
} }
void backend::contexts::release(){ void backend::contexts::release(){
@@ -127,19 +177,19 @@ void backend::contexts::release(){
cache_.clear(); cache_.clear();
} }
driver::context const & backend::contexts::get_default(){ driver::context* backend::contexts::get_default(){
backend::init(); backend::init();
std::list<context const *>::const_iterator it = cache_.begin(); auto it = cache_.begin();
std::advance(it, default_device); std::advance(it, default_device);
return **it; return *it;
} }
void backend::contexts::get(std::list<context const *> & contexts){ void backend::contexts::get(std::list<driver::context*> & contexts){
backend::init(); backend::init();
contexts = cache_; contexts = cache_;
} }
std::list<context const *> backend::contexts::cache_; std::list<driver::context*> backend::contexts::cache_;
@@ -147,28 +197,8 @@ std::list<context const *> backend::contexts::cache_;
//------------ General -------------*/ //------------ General -------------*/
/*-----------------------------------*/ /*-----------------------------------*/
std::vector<device> backend::devices(){ void backend::synchronize(driver::context* context){
std::vector<platform> platforms = backend::platforms(); for(driver::stream * queue: streams::cache_.at(context))
std::vector<device> result;
for(platform const & platform: platforms){
auto devices = platform.devices();
result.insert(result.end(), devices.begin(), devices.end());
}
return result;
}
std::vector<platform> backend::platforms(){
std::vector<platform> platforms;
//if CUDA is here
if(dispatch::cuinit())
platforms.push_back(platform());
if(platforms.empty())
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
return platforms;
}
void backend::synchronize(driver::context const & context){
for(stream * queue: streams::cache_.at(context))
queue->synchronize(); queue->synchronize();
} }
@@ -184,8 +214,13 @@ void backend::release(){
void backend::init(){ void backend::init(){
if(!contexts::cache_.empty()) if(!contexts::cache_.empty())
return; return;
std::vector<platform> platforms = backend::platforms(); // initialize platforms
contexts::init(platforms); backend::platforms::init();
// initialize devices
backend::devices::init(platforms::cache_);
// initialize contexts
backend::contexts::init(devices::cache_);
// initialize streams
streams::init(contexts::cache_); streams::init(contexts::cache_);
} }

View File

@@ -33,28 +33,46 @@ namespace triton
namespace driver namespace driver
{ {
buffer::buffer(driver::context const & context, size_t size) : context_(context)
{ //
ContextSwitcher ctx_switch(context_);
buffer::buffer(driver::context* ctx, CUdeviceptr cu, bool take_ownership)
: polymorphic_resource(cu, take_ownership), context_(ctx) { }
buffer::buffer(driver::context* ctx, cl_mem cl, bool take_ownership)
: polymorphic_resource(cl, take_ownership), context_(ctx) { }
driver::context* buffer::context() {
return context_;
}
//
ocl_buffer::ocl_buffer(driver::context* context, size_t size)
: buffer(context, cl_mem(), true){
cl_int err;
dispatch::clCreateBuffer(*context->cl(), CL_MEM_READ_WRITE, size, NULL, &err);
}
//
cu_buffer::cu_buffer(driver::context* context, size_t size)
: buffer(context, CUdeviceptr(), true) {
cu_context::context_switcher ctx_switch(*context_);
dispatch::cuMemAlloc(&*cu_, size); dispatch::cuMemAlloc(&*cu_, size);
} }
buffer::buffer(driver::context const & context, CUdeviceptr cu, bool take_ownership): cu_buffer::cu_buffer(driver::context* context, CUdeviceptr cu, bool take_ownership)
context_(context), cu_(cu, take_ownership) : buffer(context, cu, take_ownership){
{ } }
void buffer::set_zero(stream const & queue, size_t size) void cu_buffer::set_zero(cu_stream const & queue, size_t size)
{ {
ContextSwitcher ctx_switch(context_); cu_context::context_switcher ctx_switch(*context_);
dispatch::cuMemsetD8Async(*cu_, 0, size, queue); dispatch::cuMemsetD8Async(*cu_, 0, size, *queue.cu());
} }
handle<CUdeviceptr> const & buffer::cu() const
{ return cu_; }
handle<CUdeviceptr> & buffer::cu()
{ return cu_; }
} }
} }

View File

@@ -35,9 +35,28 @@ namespace triton
namespace driver namespace driver
{ {
/* ------------------------ */
// BASE //
/* ------------------------ */
context::context(driver::device *dev, CUcontext cu, bool take_ownership):
polymorphic_resource(cu, take_ownership),
dev_(dev), cache_path_(get_cache_path()) {
}
context::context(driver::device *dev, cl_context cl, bool take_ownership):
polymorphic_resource(cl, take_ownership),
dev_(dev), cache_path_(get_cache_path()){
}
driver::device* context::device() const {
return dev_;
}
std::string context::get_cache_path(){ std::string context::get_cache_path(){
//user-specified cache path //user-specified cache path
std::string result = tools::getenv("ISAAC_CACHE_PATH"); std::string result = tools::getenv("TRITON_CACHE_PATH");
if(!result.empty()){ if(!result.empty()){
if(tools::mkpath(result)==0) if(tools::mkpath(result)==0)
return result; return result;
@@ -46,7 +65,7 @@ std::string context::get_cache_path(){
result = tools::getenv("HOME"); result = tools::getenv("HOME");
if(!result.empty()) if(!result.empty())
{ {
result = result + "/.isaac/cache/"; result = result + "/.triton/cache/";
if(tools::mkpath(result)==0) if(tools::mkpath(result)==0)
return result; return result;
} }
@@ -54,7 +73,28 @@ std::string context::get_cache_path(){
return ""; return "";
} }
CUdevice context::device(CUcontext context){ std::string const & context::cache_path() const{
return cache_path_;
}
/* ------------------------ */
// CUDA //
/* ------------------------ */
// RAII context switcher
cu_context::context_switcher::context_switcher(const context &ctx): ctx_((const cu_context&)ctx) {
dispatch::cuCtxPushCurrent_v2(*ctx_.cu());
}
cu_context::context_switcher::~context_switcher() {
CUcontext tmp;
dispatch::cuCtxPopCurrent_v2(&tmp);
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
}
// import CUdevice
CUdevice cu_context::get_device_of(CUcontext context){
dispatch::cuCtxPushCurrent_v2(context); dispatch::cuCtxPushCurrent_v2(context);
CUdevice res; CUdevice res;
dispatch::cuCtxGetDevice(&res); dispatch::cuCtxGetDevice(&res);
@@ -62,35 +102,24 @@ CUdevice context::device(CUcontext context){
return res; return res;
} }
context::context(CUcontext context, bool take_ownership): cu_(context, take_ownership), dvc_(device(context), false), cache_path_(get_cache_path()) // wrapper for cuda context
{ } cu_context::cu_context(CUcontext context, bool take_ownership): driver::context(new driver::cu_device(get_device_of(context), false),
context, take_ownership) {
}
context::context(driver::device const & device): dvc_(device), cache_path_(get_cache_path()) cu_context::cu_context(driver::device* device): context(device, CUcontext(), true){
{ dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, *((driver::cu_device*)dev_)->cu());
dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, (CUdevice)device);
dispatch::cuCtxPopCurrent_v2(NULL); dispatch::cuCtxPopCurrent_v2(NULL);
} }
device const & context::device() const
{ return dvc_; }
std::string const & context::cache_path() const /* ------------------------ */
{ return cache_path_; } // OpenCL //
/* ------------------------ */
handle<CUcontext> const & context::cu() const ocl_context::ocl_context(driver::device* dev): context(dev, cl_context(), true) {
{ return cu_; } cl_int err;
*cl_ = dispatch::clCreateContext(nullptr, 1, &*dev->cl(), nullptr, nullptr, &err);
/* Context Switcher */
ContextSwitcher::ContextSwitcher(driver::context const & ctx): ctx_(ctx)
{
dispatch::cuCtxPushCurrent_v2(ctx_);
}
ContextSwitcher::~ContextSwitcher()
{
CUcontext tmp;
dispatch::cuCtxPopCurrent_v2(&tmp);
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
} }

View File

@@ -34,34 +34,34 @@ namespace triton
namespace driver namespace driver
{ {
/* Architecture [NVidia] */
device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) const{ /* ------------------------ */
switch(sm.first) // CUDA //
{ /* ------------------------ */
// Architecture
cu_device::Architecture cu_device::nv_arch(std::pair<unsigned int, unsigned int> sm) const {
switch(sm.first) {
case 7: case 7:
switch(sm.second) switch(sm.second){
{
case 0: return Architecture::SM_7_0; case 0: return Architecture::SM_7_0;
} }
case 6: case 6:
switch(sm.second) switch(sm.second){
{
case 0: return Architecture::SM_6_0; case 0: return Architecture::SM_6_0;
case 1: return Architecture::SM_6_1; case 1: return Architecture::SM_6_1;
} }
case 5: case 5:
switch(sm.second) switch(sm.second){
{
case 0: return Architecture::SM_5_0; case 0: return Architecture::SM_5_0;
case 2: return Architecture::SM_5_2; case 2: return Architecture::SM_5_2;
default: return Architecture::UNKNOWN; default: return Architecture::UNKNOWN;
} }
case 3: case 3:
switch(sm.second) switch(sm.second){
{
case 0: return Architecture::SM_3_0; case 0: return Architecture::SM_3_0;
case 5: return Architecture::SM_3_5; case 5: return Architecture::SM_3_5;
case 7: return Architecture::SM_3_7; case 7: return Architecture::SM_3_7;
@@ -69,8 +69,7 @@ device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) c
} }
case 2: case 2:
switch(sm.second) switch(sm.second){
{
case 0: return Architecture::SM_2_0; case 0: return Architecture::SM_2_0;
case 1: return Architecture::SM_2_1; case 1: return Architecture::SM_2_1;
default: return Architecture::UNKNOWN; default: return Architecture::UNKNOWN;
@@ -80,14 +79,16 @@ device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) c
} }
} }
// information query
template<CUdevice_attribute attr> template<CUdevice_attribute attr>
int device::cuGetInfo() const{ int cu_device::cuGetInfo() const{
int res; int res;
dispatch::cuDeviceGetAttribute(&res, attr, *cu_); dispatch::cuDeviceGetAttribute(&res, attr, *cu_);
return res; return res;
} }
nvmlDevice_t device::nvml_device() const{ // convert to nvml
nvmlDevice_t cu_device::nvml_device() const{
std::map<std::string, nvmlDevice_t> map; std::map<std::string, nvmlDevice_t> map;
std::string key = pci_bus_id(); std::string key = pci_bus_id();
if(map.find(key)==map.end()){ if(map.find(key)==map.end()){
@@ -98,34 +99,37 @@ nvmlDevice_t device::nvml_device() const{
return map.at(key); return map.at(key);
} }
/* Architecture */ // architecture
device::Architecture device::architecture() const cu_device::Architecture cu_device::architecture() const{
{ return nv_arch(compute_capability()); } return nv_arch(compute_capability());
}
/* Attributes */ // number of address bits
size_t device::address_bits() const size_t cu_device::address_bits() const{
{ return sizeof(size_t)*8; } return sizeof(size_t)*8;
}
driver::platform device::platform() const // name
{ return platform(); } std::string cu_device::name() const {
std::string device::name() const{
char tmp[128]; char tmp[128];
dispatch::cuDeviceGetName(tmp, 128, *cu_); dispatch::cuDeviceGetName(tmp, 128, *cu_);
return std::string(tmp); return std::string(tmp);
} }
std::string device::pci_bus_id() const{ // PCI bus ID
std::string cu_device::pci_bus_id() const{
char tmp[128]; char tmp[128];
dispatch::cuDeviceGetPCIBusId(tmp, 128, *cu_); dispatch::cuDeviceGetPCIBusId(tmp, 128, *cu_);
return std::string(tmp); return std::string(tmp);
} }
void device::interpret_as(std::pair<size_t, size_t> cc){ // force the device to be interpreted as a particular cc
void cu_device::interpret_as(std::pair<size_t, size_t> cc){
interpreted_as_ = std::make_shared<std::pair<size_t, size_t>>(cc); interpreted_as_ = std::make_shared<std::pair<size_t, size_t>>(cc);
} }
std::pair<size_t, size_t> device::compute_capability() const{ // compute capability
std::pair<size_t, size_t> cu_device::compute_capability() const {
if(interpreted_as_) if(interpreted_as_)
return *interpreted_as_; return *interpreted_as_;
size_t _major = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>(); size_t _major = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>();
@@ -133,17 +137,24 @@ std::pair<size_t, size_t> device::compute_capability() const{
return std::make_pair(_major, _minor); return std::make_pair(_major, _minor);
} }
size_t device::max_threads_per_block() const // maximum number of threads per block
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>(); } size_t cu_device::max_threads_per_block() const {
return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>();
}
size_t device::max_shared_memory() const // maximum amount of shared memory per block
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>(); } size_t cu_device::max_shared_memory() const {
return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>();
}
size_t device::warp_size() const // warp size
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>(); } size_t cu_device::warp_size() const {
return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>();
}
std::vector<size_t> device::max_block_dim() const{ // maximum block dimensions
std::vector<size_t> cu_device::max_block_dim() const {
std::vector<size_t> result(3); std::vector<size_t> result(3);
result[0] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X>(); result[0] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X>();
result[1] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y>(); result[1] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y>();
@@ -151,36 +162,39 @@ std::vector<size_t> device::max_block_dim() const{
return result; return result;
} }
size_t device::current_sm_clock() const{ // current SM clock
size_t cu_device::current_sm_clock() const{
unsigned int result; unsigned int result;
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_SM, &result); dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
return result; return result;
} }
size_t device::max_sm_clock() const{ // max SM clock
size_t cu_device::max_sm_clock() const{
unsigned int result; unsigned int result;
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_SM, &result); dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
return result; return result;
} }
// current memory clock
size_t device::current_mem_clock() const{ size_t cu_device::current_mem_clock() const{
unsigned int result; unsigned int result;
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_MEM, &result); dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
return result; return result;
} }
size_t device::max_mem_clock() const{ // max memory clock
size_t cu_device::max_mem_clock() const{
unsigned int result; unsigned int result;
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_MEM, &result); dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
return result; return result;
} }
/* Infos */ // print infos
std::string device::infos() const{ std::string cu_device::infos() const{
std::ostringstream oss; std::ostringstream oss;
std::vector<size_t> max_wi_sizes = max_block_dim(); std::vector<size_t> max_wi_sizes = max_block_dim();
oss << "Platform: " << platform().name() << std::endl; oss << "Platform: CUDA" << std::endl;
oss << "Name: " << name() << std::endl; oss << "Name: " << name() << std::endl;
oss << "Maximum total work-group size: " << max_threads_per_block() << std::endl; oss << "Maximum total work-group size: " << max_threads_per_block() << std::endl;
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl; oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
@@ -188,9 +202,6 @@ std::string device::infos() const{
return oss.str(); return oss.str();
} }
handle<CUdevice> const & device::cu() const
{ return cu_; }
} }
} }

View File

@@ -72,6 +72,17 @@ namespace driver
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\ #define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); } {return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
//Specialized helpers for OpenCL
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3)
#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4)
#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5)
#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6)
#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
//Specialized helpers for CUDA //Specialized helpers for CUDA
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1) #define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2) #define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
@@ -104,15 +115,24 @@ namespace driver
#define CUDNN_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) #define CUDNN_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
bool dispatch::clinit()
{
if(opencl_==nullptr)
opencl_ = dlopen("libOpenCL.so", RTLD_LAZY);
return opencl_ != nullptr;
}
bool dispatch::cuinit(){ bool dispatch::cuinit(){
if(cuda_==nullptr) if(cuda_==nullptr)
cuda_ = dlopen("libcuda.so", RTLD_LAZY); cuda_ = dlopen("libcuda.so", RTLD_LAZY);
if(cuda_ == nullptr)
return false;
CUresult (*fptr)(unsigned int); CUresult (*fptr)(unsigned int);
cuInit_ = dlsym(cuda_, "cuInit"); cuInit_ = dlsym(cuda_, "cuInit");
*reinterpret_cast<void **>(&fptr) = cuInit_; *reinterpret_cast<void **>(&fptr) = cuInit_;
CUresult res = (*fptr)(0); CUresult res = (*fptr)(0);
check(res); check(res);
return cuda_ != nullptr; return true;
} }
bool dispatch::nvmlinit(){ bool dispatch::nvmlinit(){
@@ -180,17 +200,17 @@ NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlD
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*) NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*) NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
cublasHandle_t dispatch::cublasHandle(driver::context const & ctx){ cublasHandle_t dispatch::cublasHandle(const cu_context &ctx){
static std::map<context, cublasHandle_t> handles; static std::map<CUcontext, cublasHandle_t> handles;
auto pr = handles.insert({ctx, cublasHandle_t()}); auto pr = handles.insert({*ctx.cu(), cublasHandle_t()});
if(pr.second) if(pr.second)
cublasCreate_v2(&pr.first->second); cublasCreate_v2(&pr.first->second);
return pr.first->second; return pr.first->second;
} }
cudnnHandle_t dispatch::cudnnHandle(driver::context const & ctx){ cudnnHandle_t dispatch::cudnnHandle(driver::cu_context const & ctx){
static std::map<context, cudnnHandle_t> handles; static std::map<CUcontext, cudnnHandle_t> handles;
auto pr = handles.insert({ctx, cudnnHandle_t()}); auto pr = handles.insert({*ctx.cu(), cudnnHandle_t()});
if(pr.second) if(pr.second)
cudnnCreate(&pr.first->second); cudnnCreate(&pr.first->second);
return pr.first->second; return pr.first->second;
@@ -231,16 +251,51 @@ CUDNN_DEFINE13(cudnnStatus_t, cudnnConvolutionForward, cudnnHandle_t, const void
CUDNN_DEFINE2(cudnnStatus_t, cudnnSetStream, cudnnHandle_t, cudaStream_t) CUDNN_DEFINE2(cudnnStatus_t, cudnnSetStream, cudnnHandle_t, cudaStream_t)
CUDNN_DEFINE7(cudnnStatus_t, cudnnTransformTensor, cudnnHandle_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*) CUDNN_DEFINE7(cudnnStatus_t, cudnnTransformTensor, cudnnHandle_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
// OpenCL
cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f)
{ return f_impl<dispatch::clinit>(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); }
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }
OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
OCL_DEFINE1(cl_int, clFinish, cl_command_queue)
OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseContext, cl_context)
OCL_DEFINE1(cl_int, clReleaseEvent, cl_event)
OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id)
OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *)
OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue)
OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *)
OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *)
OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *)
OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *)
OCL_DEFINE1(cl_int, clRetainEvent, cl_event)
OCL_DEFINE1(cl_int, clReleaseProgram, cl_program)
OCL_DEFINE1(cl_int, clFlush, cl_command_queue)
OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *)
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
// Release
void dispatch::release(){ void dispatch::release(){
if(cuda_){ if(cuda_){
dlclose(cuda_); dlclose(cuda_);
cuda_ = nullptr; cuda_ = nullptr;
} }
if(nvrtc_){
dlclose(nvrtc_);
nvrtc_ = nullptr;
}
if(cublas_){ if(cublas_){
dlclose(cublas_); dlclose(cublas_);
cublas_ = nullptr; cublas_ = nullptr;
@@ -251,12 +306,47 @@ void dispatch::release(){
} }
} }
void * dispatch::opencl_;
void* dispatch::cuda_; void* dispatch::cuda_;
void* dispatch::nvrtc_;
void* dispatch::nvml_; void* dispatch::nvml_;
void* dispatch::cublas_; void* dispatch::cublas_;
void* dispatch::cudnn_; void* dispatch::cudnn_;
//OpenCL
void* dispatch::clBuildProgram_;
void* dispatch::clEnqueueNDRangeKernel_;
void* dispatch::clSetKernelArg_;
void* dispatch::clReleaseMemObject_;
void* dispatch::clFinish_;
void* dispatch::clGetMemObjectInfo_;
void* dispatch::clGetCommandQueueInfo_;
void* dispatch::clReleaseContext_;
void* dispatch::clReleaseEvent_;
void* dispatch::clEnqueueWriteBuffer_;
void* dispatch::clEnqueueReadBuffer_;
void* dispatch::clGetProgramBuildInfo_;
void* dispatch::clReleaseDevice_;
void* dispatch::clCreateContext_;
void* dispatch::clGetDeviceIDs_;
void* dispatch::clGetContextInfo_;
void* dispatch::clGetDeviceInfo_;
void* dispatch::clReleaseCommandQueue_;
void* dispatch::clGetPlatformIDs_;
void* dispatch::clGetPlatformInfo_;
void* dispatch::clGetEventProfilingInfo_;
void* dispatch::clCreateProgramWithBinary_;
void* dispatch::clCreateCommandQueue_;
void* dispatch::clRetainEvent_;
void* dispatch::clReleaseProgram_;
void* dispatch::clFlush_;
void* dispatch::clGetProgramInfo_;
void* dispatch::clGetKernelInfo_;
void* dispatch::clGetKernelWorkGroupInfo_;
void* dispatch::clCreateKernel_;
void* dispatch::clCreateBuffer_;
void* dispatch::clCreateProgramWithSource_;
void* dispatch::clReleaseKernel_;
//CUDA //CUDA
void* dispatch::cuCtxGetCurrent_; void* dispatch::cuCtxGetCurrent_;
void* dispatch::cuCtxSetCurrent_; void* dispatch::cuCtxSetCurrent_;
@@ -295,13 +385,6 @@ void* dispatch::cuMemsetD8Async_;
void* dispatch::cuCtxPushCurrent_v2_; void* dispatch::cuCtxPushCurrent_v2_;
void* dispatch::cuCtxPopCurrent_v2_; void* dispatch::cuCtxPopCurrent_v2_;
void* dispatch::nvrtcCompileProgram_;
void* dispatch::nvrtcGetProgramLogSize_;
void* dispatch::nvrtcGetPTX_;
void* dispatch::nvrtcGetPTXSize_;
void* dispatch::nvrtcCreateProgram_;
void* dispatch::nvrtcGetProgramLog_;
void* dispatch::nvmlInit_v2_; void* dispatch::nvmlInit_v2_;
void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_; void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_;
void* dispatch::nvmlDeviceGetClockInfo_; void* dispatch::nvmlDeviceGetClockInfo_;

View File

@@ -30,6 +30,9 @@ namespace triton
namespace driver namespace driver
{ {
//OpenCL
inline void _delete(cl_platform_id) { }
inline void _delete(cl_device_id x) { dispatch::clReleaseDevice(x); }
//CUDA //CUDA
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); } inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); } inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
@@ -39,7 +42,7 @@ inline void _delete(CUevent x) { dispatch::cuEventDestroy(x); }
inline void _delete(CUfunction) { } inline void _delete(CUfunction) { }
inline void _delete(CUmodule x) { dispatch::cuModuleUnload(x); } inline void _delete(CUmodule x) { dispatch::cuModuleUnload(x); }
inline void _delete(cu_event_t x) { _delete(x.first); _delete(x.second); } inline void _delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
inline void _delete(cu_platform){} inline void _delete(CUPlatform){}
//Constructor //Constructor
template<class CUType> template<class CUType>
@@ -60,7 +63,10 @@ template class handle<CUdevice>;
template class handle<cu_event_t>; template class handle<cu_event_t>;
template class handle<CUfunction>; template class handle<CUfunction>;
template class handle<CUmodule>; template class handle<CUmodule>;
template class handle<cu_platform>; template class handle<CUPlatform>;
template class handle<cl_platform_id>;
template class handle<cl_device_id>;
} }
} }

View File

@@ -32,13 +32,39 @@ namespace triton
namespace driver namespace driver
{ {
kernel::kernel(driver::module const & program, const char * name) : program_(program), address_bits_(program.context().device().address_bits()){
cu_params_store_.reserve(64); /* ------------------------ */
cu_params_.reserve(64); // Base //
dispatch::cuModuleGetFunction(&*cu_, program, name); /* ------------------------ */
kernel::kernel(driver::module *program, CUfunction fn, bool has_ownership):
polymorphic_resource(fn, has_ownership), program_(program){
} }
void kernel::setArg(unsigned int index, std::size_t size, void* ptr){ kernel::kernel(driver::module *program, cl_kernel fn, bool has_ownership):
polymorphic_resource(fn, has_ownership), program_(program){
}
driver::module* kernel::module() {
return program_;
}
/* ------------------------ */
// OpenCL //
/* ------------------------ */
/* ------------------------ */
// CUDA //
/* ------------------------ */
cu_kernel::cu_kernel(driver::module *program, const char * name) : kernel(program, CUfunction(), true) {
cu_params_store_.reserve(64);
cu_params_.reserve(64);
dispatch::cuModuleGetFunction(&*cu_, *program->cu(), name);
}
void cu_kernel::setArg(unsigned int index, std::size_t size, void* ptr){
if(index + 1> cu_params_store_.size()){ if(index + 1> cu_params_store_.size()){
cu_params_store_.resize(index+1); cu_params_store_.resize(index+1);
cu_params_.resize(index+1); cu_params_.resize(index+1);
@@ -48,18 +74,12 @@ void kernel::setArg(unsigned int index, std::size_t size, void* ptr){
cu_params_[index] = cu_params_store_[index].get(); cu_params_[index] = cu_params_store_[index].get();
} }
void kernel::setArg(unsigned int index, buffer const & data) void cu_kernel::setArg(unsigned int index, cu_buffer const & data)
{ return setArg(index, (CUdeviceptr)data);} { return setArg(index, data.cu());}
void* const* kernel::cu_params() const void* const* cu_kernel::cu_params() const
{ return cu_params_.data(); } { return cu_params_.data(); }
handle<CUfunction> const & kernel::cu() const
{ return cu_; }
driver::module const & kernel::module() const
{ return program_; }
} }

View File

@@ -46,9 +46,34 @@ namespace triton
namespace driver namespace driver
{ {
std::string module::compile_llvm_module(llvm::Module* module) { /* ------------------------ */
init_llvm(); // Base //
/* ------------------------ */
module::module(driver::context* ctx, CUmodule mod, bool has_ownership)
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
}
module::module(driver::context* ctx, cl_program mod, bool has_ownership)
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
}
driver::context* module::context() const {
return ctx_;
}
/* ------------------------ */
// OpenCL //
/* ------------------------ */
/* ------------------------ */
// CUDA //
/* ------------------------ */
std::string cu_module::compile_llvm_module(llvm::Module* module) {
init_llvm();
// create machine // create machine
module->setTargetTriple("nvptx64-nvidia-cuda"); module->setTargetTriple("nvptx64-nvidia-cuda");
std::string error; std::string error;
@@ -67,18 +92,17 @@ std::string module::compile_llvm_module(llvm::Module* module) {
layout += "-p3:32:32-p4:32:32-p5:32:32"; layout += "-p3:32:32-p4:32:32-p5:32:32";
layout += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; layout += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
module->setDataLayout(layout); module->setDataLayout(layout);
// emit machine code // emit machine code
llvm::legacy::PassManager pass; llvm::legacy::PassManager pass;
llvm::SmallVector<char, 0> buffer; llvm::SmallVector<char, 0> buffer;
llvm::raw_svector_ostream stream(buffer); llvm::raw_svector_ostream stream(buffer);
machine->addPassesToEmitFile(pass, stream, nullptr, llvm::TargetMachine::CGFT_AssemblyFile); machine->addPassesToEmitFile(pass, stream, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
pass.run(*module); pass.run(*module);
// done
return std::string(buffer.begin(), buffer.end()); return std::string(buffer.begin(), buffer.end());
} }
void module::init_llvm() { void cu_module::init_llvm() {
static bool init = false; static bool init = false;
if(!init){ if(!init){
llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetInfos();
@@ -90,10 +114,10 @@ void module::init_llvm() {
} }
} }
module::module(driver::context const & context, llvm::Module* ll_module): module(context, compile_llvm_module(ll_module)){ } cu_module::cu_module(driver::context * context, llvm::Module* ll_module): cu_module(context, compile_llvm_module(ll_module)) { }
module::module(driver::context const & context, std::string const & source) : context_(context), source_(source){ cu_module::cu_module(driver::context * context, std::string const & source) : module(context, CUmodule(), true), source_(source){
ContextSwitcher ctx_switch(context_); cu_context::context_switcher ctx_switch(*context);
// JIT compile source-code // JIT compile source-code
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER}; CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
unsigned int errbufsize = 8096; unsigned int errbufsize = 8096;
@@ -108,17 +132,11 @@ module::module(driver::context const & context, std::string const & source) : co
} }
} }
driver::context const & module::context() const cu_buffer cu_module::symbol(const char *name) const{
{ return context_; }
handle<CUmodule> const & module::cu() const
{ return cu_; }
buffer module::symbol(const char *name) const{
CUdeviceptr handle; CUdeviceptr handle;
size_t size; size_t size;
dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name); dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name);
return buffer(context_, handle, false); return cu_buffer(ctx_, handle, false);
} }

View File

@@ -31,22 +31,46 @@ namespace triton
namespace driver namespace driver
{ {
std::string platform::version() const{
/* ------------------------ */
// CUDA //
/* ------------------------ */
std::string cu_platform::version() const{
int version; int version;
dispatch::cuDriverGetVersion(&version); dispatch::cuDriverGetVersion(&version);
return std::to_string(version); return std::to_string(version);
} }
std::vector<device> platform::devices() const{ void cu_platform::devices(std::vector<device *> &devices) const{
std::vector<device> devices;
int N; int N;
dispatch::cuDeviceGetCount(&N); dispatch::cuDeviceGetCount(&N);
for(int i = 0 ; i < N ; ++i){ for(int i = 0 ; i < N ; ++i){
CUdevice dvc; CUdevice dvc;
dispatch::cuDeviceGet(&dvc, i); dispatch::cuDeviceGet(&dvc, i);
devices.push_back(driver::device(dvc)); devices.push_back(new driver::cu_device(dvc));
} }
return devices; }
/* ------------------------ */
// OpenCL //
/* ------------------------ */
std::string cl_platform::version() const {
size_t size;
dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, 0, nullptr, &size);
std::string result(size, 0);
dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, size, (void*)&*result.begin(), nullptr);
return result;
}
void cl_platform::devices(std::vector<device*> &devices) const{
cl_uint num_devices;
dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
std::vector<cl_device_id> ids(num_devices);
dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, num_devices, ids.data(), nullptr);
for(cl_device_id id: ids)
devices.push_back(new driver::ocl_device(id));
} }
} }

View File

@@ -38,57 +38,84 @@ namespace triton
namespace driver namespace driver
{ {
inline CUcontext cucontext(){ /* ------------------------ */
// Base //
/* ------------------------ */
stream::stream(driver::context *ctx, CUstream cu, bool has_ownership)
: polymorphic_resource(cu, has_ownership), ctx_(ctx) {
}
stream::stream(driver::context *ctx, cl_command_queue cl, bool has_ownership)
: polymorphic_resource(cl, has_ownership), ctx_(ctx) {
}
driver::context* stream::context() const {
return ctx_;
}
/* ------------------------ */
// OpenCL //
/* ------------------------ */
void cl_stream::synchronize() {
dispatch::clFinish(*cl_);
}
/* ------------------------ */
// CUDA //
/* ------------------------ */
inline CUcontext get_context() {
CUcontext result; CUcontext result;
dispatch::cuCtxGetCurrent(&result); dispatch::cuCtxGetCurrent(&result);
return result; return result;
} }
stream::stream(CUstream stream, bool take_ownership): context_(cucontext(), take_ownership), cu_(stream, take_ownership) cu_stream::cu_stream(CUstream str, bool take_ownership):
{} stream(backend::contexts::import(get_context()), str, take_ownership) {
}
stream::stream(driver::context const & context): context_(context), cu_(CUstream(), true) cu_stream::cu_stream(driver::context *context): stream((driver::cu_context*)context, CUstream(), true) {
{ cu_context::context_switcher ctx_switch(*ctx_);
ContextSwitcher ctx_switch(context_);
dispatch::cuStreamCreate(&*cu_, 0); dispatch::cuStreamCreate(&*cu_, 0);
} }
void stream::synchronize() void cu_stream::synchronize() {
{ cu_context::context_switcher ctx_switch(*ctx_);
ContextSwitcher ctx_switch(context_);
dispatch::cuStreamSynchronize(*cu_); dispatch::cuStreamSynchronize(*cu_);
} }
driver::context const & stream::context() const void cu_stream::enqueue(driver::cu_kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event) {
{ return context_; } cu_context::context_switcher ctx_switch(*ctx_);
void stream::enqueue(kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event){
ContextSwitcher ctx_switch(context_);
if(event) if(event)
dispatch::cuEventRecord(((cu_event_t)*event).first, *cu_); dispatch::cuEventRecord(event->cu()->first, *cu_);
dispatch::cuLaunchKernel(kernel, grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL); dispatch::cuLaunchKernel(*kernel.cu(), grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL);
if(event) if(event)
dispatch::cuEventRecord(((cu_event_t)*event).second, *cu_); dispatch::cuEventRecord(event->cu()->second, *cu_);
} }
void stream::write(buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr){ void cu_stream::write(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr) {
ContextSwitcher ctx_switch(context_); cu_context::context_switcher ctx_switch(*ctx_);
if(blocking) if(blocking)
dispatch::cuMemcpyHtoD(buffer + offset, ptr, size); dispatch::cuMemcpyHtoD(*buffer.cu() + offset, ptr, size);
else else
dispatch::cuMemcpyHtoDAsync(buffer + offset, ptr, size, *cu_); dispatch::cuMemcpyHtoDAsync(*buffer.cu() + offset, ptr, size, *cu_);
} }
void stream::read(buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr){ void cu_stream::read(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr) {
ContextSwitcher ctx_switch(context_); cu_context::context_switcher ctx_switch(*ctx_);
if(blocking) if(blocking)
dispatch::cuMemcpyDtoH(ptr, buffer + offset, size); dispatch::cuMemcpyDtoH(ptr, *buffer.cu() + offset, size);
else else
dispatch::cuMemcpyDtoHAsync(ptr, buffer + offset, size, *cu_); dispatch::cuMemcpyDtoHAsync(ptr, *buffer.cu() + offset, size, *cu_);
} }
handle<CUstream> const & stream::cu() const
{ return cu_; }
} }

View File

@@ -89,7 +89,7 @@ std::unique_ptr<ir::module> jit::make_triton_module(const std::string &src) {
} }
jit::jit(driver::context context): driver_context_(context) { jit::jit(driver::context *context): driver_context_(context) {
} }
@@ -131,15 +131,15 @@ void jit::autotune(const std::string &src, benchmark_t benchmark) {
} }
passes.tune.init(tt_module); passes.tune.init(tt_module);
passes.init(tt_module); passes.init(tt_module);
const driver::device &device = driver_context_.device(); driver::cu_device* device = (driver::cu_device*)driver_context_->device();
if(passes.allocation.get_allocated_size() > device.max_shared_memory()) if(passes.allocation.get_allocated_size() > device->max_shared_memory())
return; return;
if(passes.tune.get_num_threads() > device.max_threads_per_block()) if(passes.tune.get_num_threads() > device->max_threads_per_block())
return; return;
// Compile // Compile
auto ll_module = make_llvm_module(tt_module, passes); auto ll_module = make_llvm_module(tt_module, passes);
driver::module module(driver_context_, &*ll_module); driver::cu_module module(driver_context_, &*ll_module);
driver::kernel kernel(module, "matmul"); driver::cu_kernel kernel(&module, "matmul");
launch_information info = launch_info_map_.at("matmul"); launch_information info = launch_info_map_.at("matmul");
for(unsigned p: params) for(unsigned p: params)
std::cout << p << " " << std::flush; std::cout << p << " " << std::flush;
@@ -166,12 +166,13 @@ void jit::add_module(ir::module &tt_module, const std::vector<unsigned> &params)
passes.tune.check_constraints(errors); passes.tune.check_constraints(errors);
if(errors.size()) if(errors.size())
throw std::runtime_error("invalid parameters"); throw std::runtime_error("invalid parameters");
if(passes.allocation.get_allocated_size() > driver_context_.device().max_shared_memory()) driver::cu_device* device = (driver::cu_device*)driver_context_->device();
if(passes.allocation.get_allocated_size() > device->max_shared_memory())
throw std::runtime_error("invalid parameters"); throw std::runtime_error("invalid parameters");
// triton module -> llvm module // triton module -> llvm module
auto ll_module = make_llvm_module(tt_module, passes); auto ll_module = make_llvm_module(tt_module, passes);
// llvm module -> machine code // llvm module -> machine code
modules_.push_back(driver::module(driver_context_, &*ll_module)); modules_.push_back(driver::cu_module(driver_context_, &*ll_module));
// add globals // add globals
for(auto x: tt_module.globals()) for(auto x: tt_module.globals())
global_ints_[x.first] = ((ir::metaparameter*)x.second)->get_value(); global_ints_[x.first] = ((ir::metaparameter*)x.second)->get_value();
@@ -182,8 +183,8 @@ void jit::add_module(const std::string &src, const std::vector<unsigned> &params
add_module(*ptt_module, params); add_module(*ptt_module, params);
} }
driver::kernel jit::get_function(const std::string &name) { driver::cu_kernel jit::get_function(const std::string &name) {
return driver::kernel(modules_.front(), name.c_str()); return driver::cu_kernel(&modules_.front(), name.c_str());
} }
jit::launch_information jit::get_launch_info(const std::string &name) { jit::launch_information jit::get_launch_info(const std::string &name) {