[driver] adding opencl in the driver API
This commit is contained in:
@@ -2,5 +2,5 @@ foreach(PROG matrix)
|
||||
add_executable(${PROG} ${PROG}.cpp)
|
||||
set_target_properties(${PROG} PROPERTIES OUTPUT_NAME ${PROG})
|
||||
include_directories(/usr/local/cuda/include/)
|
||||
target_link_libraries(${PROG} triton cuda)
|
||||
target_link_libraries(${PROG} triton)
|
||||
endforeach(PROG)
|
||||
|
@@ -87,7 +87,7 @@ T min(std::vector<T> x)
|
||||
|
||||
|
||||
template<class OP, class SYNC>
|
||||
double bench(OP const & op, SYNC const & sync, triton::driver::device const & device)
|
||||
double bench(OP const & op, SYNC const & sync, triton::driver::cu_device const & device)
|
||||
{
|
||||
timer tmr;
|
||||
std::vector<size_t> times;
|
||||
@@ -108,6 +108,7 @@ double bench(OP const & op, SYNC const & sync, triton::driver::device const & de
|
||||
int main() {
|
||||
// initialize default compute device
|
||||
auto context = triton::driver::backend::contexts::get_default();
|
||||
exit(EXIT_SUCCESS);
|
||||
triton::jit jit(context);
|
||||
|
||||
// matrix multiplication parameters
|
||||
@@ -123,10 +124,10 @@ int main() {
|
||||
hb[i] = 1;
|
||||
for(size_t i = 0; i < hc.size(); i++)
|
||||
hc[i] = 0;
|
||||
triton::driver::buffer dc(context, hc.size()*4);
|
||||
triton::driver::buffer da(context, ha.size()*4);
|
||||
triton::driver::buffer db(context, hb.size()*4);
|
||||
triton::driver::stream stream(context);
|
||||
triton::driver::cu_buffer dc(context, hc.size()*4);
|
||||
triton::driver::cu_buffer da(context, ha.size()*4);
|
||||
triton::driver::cu_buffer db(context, hb.size()*4);
|
||||
triton::driver::cu_stream stream(context);
|
||||
stream.write(da, true, 0, ha);
|
||||
stream.write(db, true, 0, hb);
|
||||
stream.write(dc, true, 0, hc);
|
||||
@@ -134,7 +135,7 @@ int main() {
|
||||
|
||||
|
||||
// benchmark a given matrix multiplication kernel
|
||||
auto benchmark = [&](triton::driver::kernel kernel,
|
||||
auto benchmark = [&](triton::driver::cu_kernel kernel,
|
||||
triton::jit::launch_information info) {
|
||||
// launch info
|
||||
unsigned TM = info.global_range_size[0];
|
||||
@@ -165,7 +166,7 @@ int main() {
|
||||
// benchmark
|
||||
double ts = bench([&](){stream.enqueue(kernel, grid, {nthreads, 1, 1});},
|
||||
[&](){ stream.synchronize(); },
|
||||
context.device());
|
||||
context->device());
|
||||
ts = ts * 1e-9;
|
||||
double tflops = 2*M*N*K / ts * 1e-12;
|
||||
return tflops;
|
||||
@@ -183,7 +184,7 @@ int main() {
|
||||
|
||||
// jit.autotune(src, benchmark);
|
||||
jit.add_module(src, params);
|
||||
triton::driver::kernel kernel = jit.get_function("matmul");
|
||||
triton::driver::cu_kernel kernel = jit.get_function("matmul");
|
||||
triton::jit::launch_information info = jit.get_launch_info("matmul");
|
||||
std::cout << benchmark(kernel, info) << std::endl;
|
||||
stream.read(dc, true, 0, hc);
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include "triton/driver/context.h"
|
||||
|
||||
|
||||
namespace triton
|
||||
@@ -44,68 +45,104 @@ class kernel;
|
||||
struct backend
|
||||
{
|
||||
|
||||
// platforms
|
||||
class platforms
|
||||
{
|
||||
friend class backend;
|
||||
private:
|
||||
static void init();
|
||||
|
||||
public:
|
||||
static void get(std::vector<driver::platform*> &results);
|
||||
|
||||
private:
|
||||
static std::vector<driver::platform*> cache_;
|
||||
};
|
||||
|
||||
// devices
|
||||
class devices
|
||||
{
|
||||
friend class backend;
|
||||
|
||||
private:
|
||||
static void init(const std::vector<platform *> &platforms);
|
||||
|
||||
public:
|
||||
static void get(std::vector<driver::device*>& devs);
|
||||
|
||||
private:
|
||||
static std::vector<driver::device*> cache_;
|
||||
};
|
||||
|
||||
// modules
|
||||
class modules
|
||||
{
|
||||
friend class backend;
|
||||
|
||||
public:
|
||||
static void release();
|
||||
static module& get(driver::stream const & stream, std::string const & name, std::string const &src);
|
||||
static driver::module* get(driver::stream* stream, std::string const & name, std::string const &src);
|
||||
|
||||
private:
|
||||
static std::map<std::tuple<stream, std::string>, module * > cache_;
|
||||
static std::map<std::tuple<driver::stream*, std::string>, driver::module*> cache_;
|
||||
};
|
||||
|
||||
// kernels
|
||||
class kernels
|
||||
{
|
||||
friend class backend;
|
||||
public:
|
||||
static void release();
|
||||
static kernel & get(driver::module const & program, std::string const & name);
|
||||
static driver::kernel* get(driver::module* mod, const std::string & name);
|
||||
private:
|
||||
static std::map<std::tuple<module, std::string>, kernel * > cache_;
|
||||
static std::map<std::tuple<module*, std::string>, driver::kernel*> cache_;
|
||||
};
|
||||
|
||||
// contexts
|
||||
class contexts
|
||||
{
|
||||
friend class backend;
|
||||
private:
|
||||
static void init(std::vector<platform> const &);
|
||||
static void init(const std::vector<device *> &);
|
||||
static void release();
|
||||
public:
|
||||
static driver::context const & get_default();
|
||||
template<class T>
|
||||
static driver::context const & import(T ctx)
|
||||
static driver::context* get_default();
|
||||
|
||||
static driver::context* import(CUcontext ctx)
|
||||
{
|
||||
for(driver::context const * x: cache_)
|
||||
if((T)*x==ctx)
|
||||
return *x;
|
||||
cache_.emplace_back(new driver::context(ctx, false));
|
||||
return *cache_.back();
|
||||
for(driver::context* x: cache_){
|
||||
driver::cu_context* cu_x = (driver::cu_context*)x;
|
||||
if(*cu_x->cu()==ctx)
|
||||
return x;
|
||||
}
|
||||
static void get(std::list<context const *> &);
|
||||
cache_.emplace_back(new driver::cu_context(ctx, false));
|
||||
return cache_.back();
|
||||
}
|
||||
|
||||
static void get(std::list<driver::context*> &);
|
||||
|
||||
private:
|
||||
static std::list<context const *> cache_;
|
||||
static std::list<driver::context*> cache_;
|
||||
};
|
||||
|
||||
// streams
|
||||
class streams
|
||||
{
|
||||
friend class backend;
|
||||
private:
|
||||
static void init(std::list<context const *> const &);
|
||||
static void init(std::list<context*> const &);
|
||||
static void release();
|
||||
public:
|
||||
static void get(driver::context const &, std::vector<stream *> &streams);
|
||||
static stream & get(driver::context const &, unsigned int id = 0);
|
||||
static stream & get_default();
|
||||
static void get(driver::context*, std::vector<driver::stream *> &streams);
|
||||
static driver::stream* get(driver::context*, unsigned int id = 0);
|
||||
static driver::stream* get_default();
|
||||
private:
|
||||
static std::map< context, std::vector<stream*> > cache_;
|
||||
static std::map<driver::context*, std::vector<driver::stream*> > cache_;
|
||||
};
|
||||
|
||||
static void init();
|
||||
static void release();
|
||||
|
||||
static std::vector<device> devices();
|
||||
static std::vector<platform> platforms();
|
||||
static void synchronize(driver::context const &);
|
||||
static void synchronize(triton::driver::context *);
|
||||
|
||||
static unsigned int default_device;
|
||||
};
|
||||
|
@@ -31,21 +31,33 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class stream;
|
||||
class cu_stream;
|
||||
|
||||
// Buffer
|
||||
class buffer: public handle_interface<buffer, CUdeviceptr>
|
||||
// Base
|
||||
class buffer : public polymorphic_resource<CUdeviceptr, cl_mem> {
|
||||
public:
|
||||
buffer(driver::context* ctx, CUdeviceptr cl, bool take_ownership);
|
||||
buffer(driver::context* ctx, cl_mem cl, bool take_ownership);
|
||||
driver::context* context();
|
||||
|
||||
protected:
|
||||
driver::context* context_;
|
||||
};
|
||||
|
||||
// OpenCL
|
||||
class ocl_buffer: public buffer
|
||||
{
|
||||
public:
|
||||
buffer(driver::context const & context, size_t size);
|
||||
buffer(driver::context const & context, CUdeviceptr cu, bool take_ownership);
|
||||
void set_zero(stream const & queue, size_t size);
|
||||
handle<CUdeviceptr> const & cu() const;
|
||||
handle<CUdeviceptr> & cu();
|
||||
ocl_buffer(driver::context* context, size_t size);
|
||||
};
|
||||
|
||||
private:
|
||||
context context_;
|
||||
handle<CUdeviceptr> cu_;
|
||||
// CUDA
|
||||
class cu_buffer: public buffer
|
||||
{
|
||||
public:
|
||||
cu_buffer(driver::context* context, size_t size);
|
||||
cu_buffer(driver::context* context, CUdeviceptr cu, bool take_ownership);
|
||||
void set_zero(cu_stream const & queue, size_t size);
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -31,35 +31,50 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class context: public handle_interface<context, CUcontext>
|
||||
{
|
||||
private:
|
||||
class context: public polymorphic_resource<CUcontext, cl_context>{
|
||||
protected:
|
||||
static std::string get_cache_path();
|
||||
static CUdevice device(CUcontext);
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
explicit context(CUcontext context, bool take_ownership = true);
|
||||
explicit context(driver::device const & dvc);
|
||||
//Accessors
|
||||
driver::device const & device() const;
|
||||
context(driver::device *dev, CUcontext cu, bool take_ownership);
|
||||
context(driver::device *dev, cl_context cl, bool take_ownership);
|
||||
driver::device* device() const;
|
||||
std::string const & cache_path() const;
|
||||
handle<CUcontext> const & cu() const;
|
||||
|
||||
private:
|
||||
handle<CUcontext> cu_;
|
||||
driver::device dvc_;
|
||||
protected:
|
||||
driver::device* dev_;
|
||||
std::string cache_path_;
|
||||
};
|
||||
|
||||
class ContextSwitcher{
|
||||
// CUDA
|
||||
class cu_context: public context {
|
||||
public:
|
||||
ContextSwitcher(driver::context const & ctx);
|
||||
~ContextSwitcher();
|
||||
class context_switcher{
|
||||
public:
|
||||
context_switcher(driver::context const & ctx);
|
||||
~context_switcher();
|
||||
private:
|
||||
driver::cu_context const & ctx_;
|
||||
};
|
||||
|
||||
private:
|
||||
driver::context const & ctx_;
|
||||
static CUdevice get_device_of(CUcontext);
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
cu_context(CUcontext cu, bool take_ownership = true);
|
||||
cu_context(driver::device* dev);
|
||||
};
|
||||
|
||||
// OpenCL
|
||||
class ocl_context: public context {
|
||||
public:
|
||||
ocl_context(driver::device* dev);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -84,7 +84,7 @@ inline void cublasGemmEx(cublasHandle_t handle, cudaDataType cudt, cublasOperati
|
||||
|
||||
|
||||
/* Simplified API for default GEMM */
|
||||
inline void cublasGemm(DType dtype, stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, buffer const & A, int32_t lda, buffer const & B, int32_t ldb, scalar beta, buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){
|
||||
inline void cublasGemm(DType dtype, stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, cu_buffer const & A, int32_t lda, cu_buffer const & B, int32_t ldb, scalar beta, cu_buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){
|
||||
ContextSwitcher ctx_switch(stream.context());
|
||||
cublasHandle_t handle = dispatch::cublasHandle(stream.context());
|
||||
dispatch::cublasSetStream_v2(handle, (CUstream)stream);
|
||||
@@ -112,7 +112,7 @@ inline cudnnTensorFormat_t format(cudnnDataType_t cutype){
|
||||
}
|
||||
|
||||
inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t C, int32_t T, int32_t R, int32_t S,
|
||||
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, buffer const & I, buffer const & F, scalar beta, buffer const & O){
|
||||
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, cu_buffer const & I, cu_buffer const & F, scalar beta, cu_buffer const & O){
|
||||
driver::driver::context const & ctx = stream.context();
|
||||
ContextSwitcher switch_ctx(ctx);
|
||||
|
||||
@@ -154,7 +154,7 @@ inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
|
||||
|
||||
size_t workspace_size;
|
||||
dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size);
|
||||
static buffer work(ctx, 1024*1024*64);
|
||||
static cu_buffer work(ctx, 1024*1024*64);
|
||||
CUdeviceptr twork = work;
|
||||
CUdeviceptr pI = I, pF = F, pO = O;
|
||||
dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO);
|
||||
@@ -162,7 +162,7 @@ inline void cudnnConv(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
|
||||
|
||||
|
||||
inline void cudnnPool(DType dtype, stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t T, int32_t R, int32_t S,
|
||||
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, buffer const & I, scalar beta, buffer const & O){
|
||||
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, cu_buffer const & I, scalar beta, cu_buffer const & O){
|
||||
driver::driver::context const & ctx = stream.context();
|
||||
ContextSwitcher switch_ctx(ctx);
|
||||
|
||||
@@ -200,11 +200,11 @@ inline void cudnnPool(DType dtype, stream& stream, int32_t D, int32_t H, int32_t
|
||||
dispatch::cudnnPoolingForward(handle, desc, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
|
||||
}
|
||||
|
||||
inline void cudnnTransformTensor(driver::stream & stream,
|
||||
inline void cudnnTransformTensor(driver::cu_stream & stream,
|
||||
DType in_dtype, DType out_dtype,
|
||||
cudnnTensorFormat_t in_layout, cudnnTensorFormat_t out_layout,
|
||||
int32_t N, int32_t C, int32_t D, int32_t H, int32_t W,
|
||||
scalar alpha, driver::buffer const & I, scalar beta, driver::buffer& O)
|
||||
scalar alpha, driver::cu_buffer const & I, scalar beta, driver::cu_buffer& O)
|
||||
{
|
||||
cudnnHandle_t handle = dispatch::cudnnHandle(stream.context());
|
||||
dispatch::cudnnSetStream(handle, (CUstream)stream);
|
||||
|
@@ -32,9 +32,20 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
// Device
|
||||
class device: public handle_interface<device, CUdevice>
|
||||
{
|
||||
// Base device
|
||||
class device: public polymorphic_resource<CUdevice, cl_device_id>{
|
||||
public:
|
||||
using polymorphic_resource::polymorphic_resource;
|
||||
};
|
||||
|
||||
// OpenCL device
|
||||
class ocl_device: public device {
|
||||
public:
|
||||
ocl_device(cl_device_id cl, bool take_ownership = true): device(cl, take_ownership) { }
|
||||
};
|
||||
|
||||
// CUDA device
|
||||
class cu_device: public device {
|
||||
public:
|
||||
//Supported architectures
|
||||
enum class Architecture{
|
||||
@@ -61,14 +72,12 @@ private:
|
||||
inline nvmlDevice_t nvml_device() const;
|
||||
|
||||
public:
|
||||
device(CUdevice cu = CUdevice(), bool take_ownership = true): cu_(cu, take_ownership){}
|
||||
cu_device(CUdevice cu = CUdevice(), bool take_ownership = true): device(cu, take_ownership){}
|
||||
//Accessors
|
||||
Architecture architecture() const;
|
||||
handle<CUdevice> const & cu() const;
|
||||
//Informations
|
||||
std::string infos() const;
|
||||
size_t address_bits() const;
|
||||
driver::platform platform() const;
|
||||
std::vector<size_t> max_block_dim() const;
|
||||
size_t max_threads_per_block() const;
|
||||
size_t max_shared_memory() const;
|
||||
@@ -87,7 +96,6 @@ public:
|
||||
size_t max_mem_clock() const;
|
||||
|
||||
private:
|
||||
handle<CUdevice> cu_;
|
||||
std::shared_ptr<std::pair<size_t, size_t>> interpreted_as_;
|
||||
};
|
||||
|
||||
|
@@ -28,10 +28,11 @@
|
||||
|
||||
//CUDA Backend
|
||||
#include "triton/external/CUDA/cuda.h"
|
||||
#include "triton/external/CUDA/nvrtc.h"
|
||||
#include "triton/external/CUDA/cublas_v2.h"
|
||||
#include "triton/external/CUDA/cudnn.h"
|
||||
#include "triton/external/CUDA/nvml.h"
|
||||
#include "triton/external/CL/cl.h"
|
||||
#include "triton/external/CL/cl_ext.h"
|
||||
|
||||
//Exceptions
|
||||
#include <iostream>
|
||||
@@ -42,10 +43,9 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class context;
|
||||
class cu_context;
|
||||
|
||||
template<class T> void check(T){}
|
||||
void check(nvrtcResult err);
|
||||
void check(CUresult err);
|
||||
void check(cublasStatus_t err);
|
||||
void check(cudnnStatus_t err);
|
||||
@@ -79,14 +79,48 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
static bool nvrtcinit();
|
||||
static bool clinit();
|
||||
static bool nvmlinit();
|
||||
static bool cuinit();
|
||||
static bool cublasinit();
|
||||
static bool cudnninit();
|
||||
|
||||
static void release();
|
||||
|
||||
//OpenCL
|
||||
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
|
||||
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
|
||||
static cl_int clReleaseMemObject(cl_mem);
|
||||
static cl_int clFinish(cl_command_queue);
|
||||
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
|
||||
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseContext(cl_context);
|
||||
static cl_int clReleaseEvent(cl_event);
|
||||
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseDevice(cl_device_id);
|
||||
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
|
||||
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
|
||||
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
|
||||
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseCommandQueue(cl_command_queue);
|
||||
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
|
||||
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
|
||||
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
|
||||
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
|
||||
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
|
||||
static cl_int clRetainEvent(cl_event);
|
||||
static cl_int clReleaseProgram(cl_program);
|
||||
static cl_int clFlush(cl_command_queue);
|
||||
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
|
||||
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
|
||||
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
|
||||
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
|
||||
static cl_int clReleaseKernel(cl_kernel);
|
||||
|
||||
//CUDA
|
||||
static CUresult cuCtxGetCurrent(CUcontext *pctx);
|
||||
static CUresult cuCtxSetCurrent(CUcontext ctx);
|
||||
@@ -130,14 +164,7 @@ public:
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
|
||||
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
|
||||
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
static cublasHandle_t cublasHandle(driver::context const & ctx);
|
||||
static cublasHandle_t cublasHandle(driver::cu_context const & ctx);
|
||||
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
|
||||
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
|
||||
@@ -146,7 +173,7 @@ public:
|
||||
static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc);
|
||||
static cublasStatus_t cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, const void *beta, void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo);
|
||||
|
||||
static cudnnHandle_t cudnnHandle(driver::context const & ctx);
|
||||
static cudnnHandle_t cudnnHandle(driver::cu_context const & ctx);
|
||||
static cudnnStatus_t cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
||||
static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
|
||||
static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
||||
@@ -167,13 +194,50 @@ public:
|
||||
static cudnnStatus_t cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
|
||||
|
||||
private:
|
||||
|
||||
// Libraries
|
||||
static void* opencl_;
|
||||
static void* cuda_;
|
||||
static void* nvrtc_;
|
||||
static void* nvml_;
|
||||
static void* cublas_;
|
||||
static void* cudnn_;
|
||||
|
||||
//CUDA
|
||||
// OpenCL functions
|
||||
static void* clBuildProgram_;
|
||||
static void* clEnqueueNDRangeKernel_;
|
||||
static void* clSetKernelArg_;
|
||||
static void* clReleaseMemObject_;
|
||||
static void* clFinish_;
|
||||
static void* clGetMemObjectInfo_;
|
||||
static void* clGetCommandQueueInfo_;
|
||||
static void* clReleaseContext_;
|
||||
static void* clReleaseEvent_;
|
||||
static void* clEnqueueWriteBuffer_;
|
||||
static void* clEnqueueReadBuffer_;
|
||||
static void* clGetProgramBuildInfo_;
|
||||
static void* clReleaseDevice_;
|
||||
static void* clCreateContext_;
|
||||
static void* clGetDeviceIDs_;
|
||||
static void* clGetContextInfo_;
|
||||
static void* clGetDeviceInfo_;
|
||||
static void* clReleaseCommandQueue_;
|
||||
static void* clGetPlatformIDs_;
|
||||
static void* clGetPlatformInfo_;
|
||||
static void* clGetEventProfilingInfo_;
|
||||
static void* clCreateProgramWithBinary_;
|
||||
static void* clCreateCommandQueue_;
|
||||
static void* clRetainEvent_;
|
||||
static void* clReleaseProgram_;
|
||||
static void* clFlush_;
|
||||
static void* clGetProgramInfo_;
|
||||
static void* clGetKernelInfo_;
|
||||
static void* clGetKernelWorkGroupInfo_;
|
||||
static void* clCreateKernel_;
|
||||
static void* clCreateBuffer_;
|
||||
static void* clCreateProgramWithSource_;
|
||||
static void* clReleaseKernel_;
|
||||
|
||||
// CUDA functions
|
||||
static void* cuCtxGetCurrent_;
|
||||
static void* cuCtxSetCurrent_;
|
||||
static void* cuCtxDestroy_v2_;
|
||||
@@ -188,7 +252,6 @@ private:
|
||||
static void* cuDeviceGetName_;
|
||||
static void* cuDeviceGetPCIBusId_;
|
||||
static void* cuModuleGetGlobal_v2_;
|
||||
|
||||
static void* cuMemcpyHtoDAsync_v2_;
|
||||
static void* cuModuleLoad_;
|
||||
static void* cuLaunchKernel_;
|
||||
@@ -210,19 +273,12 @@ private:
|
||||
static void* cuMemsetD8Async_;
|
||||
static void* cuCtxPushCurrent_v2_;
|
||||
static void* cuCtxPopCurrent_v2_;
|
||||
|
||||
// NVML
|
||||
static void* nvmlInit_v2_;
|
||||
static void* nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void* nvmlDeviceGetClockInfo_;
|
||||
static void* nvmlDeviceGetMaxClockInfo_;
|
||||
|
||||
static void* nvrtcCompileProgram_;
|
||||
static void* nvrtcGetProgramLogSize_;
|
||||
static void* nvrtcGetPTX_;
|
||||
static void* nvrtcGetPTXSize_;
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
|
||||
// cuBLAS
|
||||
static void* cublasCreate_v2_;
|
||||
static void* cublasGetStream_v2_;
|
||||
static void* cublasSetStream_v2_;
|
||||
@@ -230,7 +286,7 @@ private:
|
||||
static void* cublasSgemm_v2_;
|
||||
static void* cublasDgemm_v2_;
|
||||
static void* cublasGemmEx_;
|
||||
|
||||
// cuDNN
|
||||
static void* cudnnCreateConvolutionDescriptor_;
|
||||
static void* cudnnCreatePoolingDescriptor_;
|
||||
static void* cudnnCreateTensorDescriptor_;
|
||||
|
@@ -32,7 +32,7 @@ namespace driver
|
||||
{
|
||||
|
||||
// Event
|
||||
class Event: public handle_interface<Event, cu_event_t>
|
||||
class Event
|
||||
{
|
||||
public:
|
||||
float elapsed_time() const;
|
||||
|
@@ -41,8 +41,8 @@ struct cu_event_t{
|
||||
CUevent second;
|
||||
};
|
||||
|
||||
struct cu_platform{
|
||||
cu_platform() : status_(dispatch::cuInit(0)) { }
|
||||
struct CUPlatform{
|
||||
CUPlatform() : status_(dispatch::cuInit(0)) { }
|
||||
operator bool() const { return status_; }
|
||||
private:
|
||||
CUresult status_;
|
||||
@@ -76,6 +76,22 @@ protected:
|
||||
bool has_ownership_;
|
||||
};
|
||||
|
||||
template<class CUType, class CLType>
|
||||
class polymorphic_resource {
|
||||
public:
|
||||
polymorphic_resource(CUType cu, bool take_ownership): cu_(cu, take_ownership){}
|
||||
polymorphic_resource(CLType cl, bool take_ownership): cl_(cl, take_ownership){}
|
||||
|
||||
handle<CUType> cu() { return cu_; }
|
||||
handle<CLType> cl() { return cl_; }
|
||||
const handle<CUType>& cu() const { return cu_; }
|
||||
const handle<CLType>& cl() const { return cl_; }
|
||||
|
||||
protected:
|
||||
handle<CLType> cl_;
|
||||
handle<CUType> cu_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -34,28 +34,38 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class buffer;
|
||||
class cu_buffer;
|
||||
|
||||
// Kernel
|
||||
class kernel: public handle_interface<kernel, CUfunction>
|
||||
{
|
||||
// Base
|
||||
class kernel: public polymorphic_resource<CUfunction, cl_kernel> {
|
||||
public:
|
||||
kernel(driver::module* program, CUfunction fn, bool has_ownership);
|
||||
kernel(driver::module* program, cl_kernel fn, bool has_ownership);
|
||||
driver::module* module();
|
||||
|
||||
private:
|
||||
driver::module* program_;
|
||||
};
|
||||
|
||||
// OpenCL
|
||||
class ocl_kernel: public kernel {
|
||||
};
|
||||
|
||||
// CUDA
|
||||
class cu_kernel: public kernel {
|
||||
public:
|
||||
//Constructors
|
||||
kernel(driver::module const & program, const char * name);
|
||||
//Accessors
|
||||
handle<CUfunction> const & cu() const;
|
||||
driver::module const & module() const;
|
||||
cu_kernel(driver::module* program, const char * name);
|
||||
//Arguments setters
|
||||
void setArg(unsigned int index, std::size_t size, void* ptr);
|
||||
void setArg(unsigned int index, buffer const &);
|
||||
void setArg(unsigned int index, cu_buffer const &);
|
||||
template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); }
|
||||
//Arguments getters
|
||||
void* const* cu_params() const;
|
||||
|
||||
private:
|
||||
handle<CUfunction> cu_;
|
||||
driver::module program_;
|
||||
unsigned int address_bits_;
|
||||
driver::cu_module* program_;
|
||||
std::vector<std::shared_ptr<void> > cu_params_store_;
|
||||
std::vector<void*> cu_params_;
|
||||
};
|
||||
|
@@ -39,25 +39,30 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class context;
|
||||
class device;
|
||||
class cu_context;
|
||||
class cu_device;
|
||||
|
||||
class module: public handle_interface<module, CUmodule>
|
||||
{
|
||||
static std::string header(device const & device);
|
||||
class module: public polymorphic_resource<CUmodule, cl_program> {
|
||||
public:
|
||||
module(driver::context* ctx, CUmodule mod, bool has_ownership);
|
||||
module(driver::context* ctx, cl_program mod, bool has_ownership);
|
||||
driver::context* context() const;
|
||||
|
||||
protected:
|
||||
driver::context* ctx_;
|
||||
};
|
||||
|
||||
class cu_module: public module {
|
||||
static std::string header(driver::cu_device const & device);
|
||||
std::string compile_llvm_module(llvm::Module* module);
|
||||
void init_llvm();
|
||||
|
||||
public:
|
||||
module(driver::context const & context, llvm::Module *module);
|
||||
module(driver::context const & context, const std::string& source);
|
||||
driver::context const & context() const;
|
||||
handle<CUmodule> const & cu() const;
|
||||
buffer symbol(const char * name) const;
|
||||
cu_module(driver::context* context, llvm::Module *module);
|
||||
cu_module(driver::context* context, const std::string& source);
|
||||
cu_buffer symbol(const char * name) const;
|
||||
|
||||
private:
|
||||
handle<CUmodule> cu_;
|
||||
driver::context context_;
|
||||
std::string source_;
|
||||
};
|
||||
|
||||
|
@@ -39,12 +39,37 @@ class device;
|
||||
class platform
|
||||
{
|
||||
public:
|
||||
//Accessors
|
||||
std::string name() const { return "CUDA"; }
|
||||
std::string version() const;
|
||||
std::vector<device> devices() const;
|
||||
// Constructor
|
||||
platform(const std::string& name): name_(name){ }
|
||||
// Accessors
|
||||
std::string name() const { return name_; }
|
||||
// Virtual methods
|
||||
virtual std::string version() const = 0;
|
||||
virtual void devices(std::vector<driver::device *> &devices) const = 0;
|
||||
private:
|
||||
handle<cu_platform> cu_;
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
class cu_platform: public platform
|
||||
{
|
||||
public:
|
||||
cu_platform(): platform("CUDA") { }
|
||||
std::string version() const;
|
||||
void devices(std::vector<driver::device*> &devices) const;
|
||||
|
||||
private:
|
||||
handle<CUPlatform> cu_;
|
||||
};
|
||||
|
||||
class cl_platform: public platform
|
||||
{
|
||||
public:
|
||||
cl_platform(cl_platform_id cl): platform("OpenCL"), cl_(cl) { }
|
||||
std::string version() const;
|
||||
void devices(std::vector<driver::device*> &devices) const;
|
||||
|
||||
private:
|
||||
handle<cl_platform_id> cl_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -35,43 +35,55 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class kernel;
|
||||
class cu_kernel;
|
||||
class Event;
|
||||
class Range;
|
||||
class buffer;
|
||||
class cu_buffer;
|
||||
|
||||
// Command Queue
|
||||
class stream: public handle_interface<stream, CUstream>
|
||||
{
|
||||
// Base
|
||||
class stream: public polymorphic_resource<CUstream, cl_command_queue> {
|
||||
public:
|
||||
stream(driver::context *ctx, CUstream, bool has_ownership);
|
||||
stream(driver::context *ctx, cl_command_queue, bool has_ownership);
|
||||
driver::context* context() const;
|
||||
virtual void synchronize() = 0;
|
||||
|
||||
protected:
|
||||
driver::context *ctx_;
|
||||
};
|
||||
|
||||
// OpenCL
|
||||
class cl_stream: public stream {
|
||||
public:
|
||||
// Constructors
|
||||
cl_stream(driver::context *ctx);
|
||||
|
||||
// Synchronize
|
||||
void synchronize();
|
||||
};
|
||||
|
||||
// CUDA
|
||||
class cu_stream: public stream {
|
||||
public:
|
||||
//Constructors
|
||||
stream(CUstream stream, bool take_ownership);
|
||||
stream(driver::context const & context);
|
||||
|
||||
//Accessors
|
||||
handle<CUstream> const & cu() const;
|
||||
driver::context const & context() const;
|
||||
cu_stream(CUstream str, bool take_ownership);
|
||||
cu_stream(driver::context* context);
|
||||
|
||||
//Synchronize
|
||||
void synchronize();
|
||||
|
||||
//Enqueue
|
||||
void enqueue(kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
|
||||
void enqueue(cu_kernel const & cu_kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
|
||||
|
||||
// Write
|
||||
void write(driver::buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
|
||||
|
||||
template<class T> void write(driver::buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
|
||||
void write(driver::cu_buffer const & cu_buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
|
||||
template<class T> void write(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
|
||||
{ write(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
|
||||
|
||||
// Read
|
||||
void read(driver::buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||
|
||||
template<class T> void read(driver::buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
|
||||
void read(driver::cu_buffer const & cu_buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||
template<class T> void read(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
|
||||
{ read(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
|
||||
private:
|
||||
driver::context context_;
|
||||
handle<CUstream> cu_;
|
||||
};
|
||||
|
||||
|
||||
|
1468
include/triton/external/CL/cl.h
vendored
Normal file
1468
include/triton/external/CL/cl.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
12947
include/triton/external/CL/cl.hpp
vendored
Normal file
12947
include/triton/external/CL/cl.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
9677
include/triton/external/CL/cl2.hpp
vendored
Normal file
9677
include/triton/external/CL/cl2.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
131
include/triton/external/CL/cl_d3d10.h
vendored
Normal file
131
include/triton/external/CL/cl_d3d10.h
vendored
Normal file
@@ -0,0 +1,131 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#include <d3d10.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
131
include/triton/external/CL/cl_d3d11.h
vendored
Normal file
131
include/triton/external/CL/cl_d3d11.h
vendored
Normal file
@@ -0,0 +1,131 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
132
include/triton/external/CL/cl_dx9_media_sharing.h
vendored
Normal file
132
include/triton/external/CL/cl_dx9_media_sharing.h
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <d3d9.h>
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
182
include/triton/external/CL/cl_dx9_media_sharing_intel.h
vendored
Normal file
182
include/triton/external/CL/cl_dx9_media_sharing_intel.h
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2016 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_dx9_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <d3d9.h>
|
||||
#include <dxvahd.h>
|
||||
#include <wtypes.h>
|
||||
#include <d3d9types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_dx9_media_sharing extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_device_source_intel;
|
||||
typedef cl_uint cl_dx9_device_set_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||
|
||||
/* cl_dx9_device_source_intel */
|
||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||
|
||||
/* cl_dx9_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||
/******************************************************************************/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromDX9INTEL(
|
||||
cl_platform_id /* platform */,
|
||||
cl_dx9_device_source_intel /* dx9_device_source */,
|
||||
void* /* dx9_object */,
|
||||
cl_dx9_device_set_intel /* dx9_device_set */,
|
||||
cl_uint /* num_entries */,
|
||||
cl_device_id* /* devices */,
|
||||
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||
cl_platform_id /* platform */,
|
||||
cl_dx9_device_source_intel /* dx9_device_source */,
|
||||
void* /* dx9_object */,
|
||||
cl_dx9_device_set_intel /* dx9_device_set */,
|
||||
cl_uint /* num_entries */,
|
||||
cl_device_id* /* devices */,
|
||||
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromDX9MediaSurfaceINTEL(
|
||||
cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
IDirect3DSurface9* /* resource */,
|
||||
HANDLE /* sharedHandle */,
|
||||
UINT /* plane */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||
cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
IDirect3DSurface9* /* resource */,
|
||||
HANDLE /* sharedHandle */,
|
||||
UINT /* plane */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireDX9ObjectsINTEL(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseDX9ObjectsINTEL(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
|
||||
|
136
include/triton/external/CL/cl_egl.h
vendored
Normal file
136
include/triton/external/CL/cl_egl.h
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context /* context */,
|
||||
CLeglDisplayKHR /* egldisplay */,
|
||||
CLeglImageKHR /* eglimage */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_egl_image_properties_khr * /* properties */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context /* context */,
|
||||
CLeglSyncKHR /* sync */,
|
||||
CLeglDisplayKHR /* display */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
670
include/triton/external/CL/cl_ext.h
vendored
Normal file
670
include/triton/external/CL/cl_ext.h
vendored
Normal file
@@ -0,0 +1,670 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#include <AvailabilityMacros.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
|
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
|
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
|
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
|
||||
* for 2D images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the width,
|
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
|
||||
*
|
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
|
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
|
||||
*/
|
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||
|
||||
|
||||
/*****************************************
|
||||
* cl_khr_create_command_queue extension *
|
||||
*****************************************/
|
||||
#define cl_khr_create_command_queue 1
|
||||
|
||||
typedef cl_bitfield cl_queue_properties_khr;
|
||||
|
||||
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
||||
clCreateCommandQueueWithPropertiesKHR( cl_context /* context */,
|
||||
cl_device_id /* device */,
|
||||
const cl_queue_properties_khr* /* properties */,
|
||||
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
typedef CL_API_ENTRY cl_command_queue
|
||||
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)( cl_context /* context */,
|
||||
cl_device_id /* device */,
|
||||
const cl_queue_properties_khr* /* properties */,
|
||||
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_memory_flags *
|
||||
*********************************/
|
||||
#define cl_amd_device_memory_flags 1
|
||||
|
||||
#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct { cl_uint type; cl_uint data[5]; } raw;
|
||||
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
|
||||
} cl_device_topology_amd;
|
||||
|
||||
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
|
||||
|
||||
|
||||
/**************************
|
||||
* cl_amd_offline_devices *
|
||||
**************************/
|
||||
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
*********************************/
|
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension *
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
/* cl_ext_atomic_counters_32 and cl_ext_atomic_counters_64 extensions
|
||||
* no extension #define since they have no functions
|
||||
*/
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Legal values will be defined in layered extensions. */
|
||||
cl_uint allocation_type;
|
||||
|
||||
/* Host cache policy for this external memory allocation. */
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* ION file descriptor */
|
||||
int ion_filedesc;
|
||||
|
||||
/* Host pointer to the ION allocated memory */
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
#endif /* CL_VERSION_1_1 */
|
||||
|
||||
#if defined(CL_VERSION_1_2)
|
||||
|
||||
/******************************************
|
||||
* cl_img_yuv_image extension *
|
||||
******************************************/
|
||||
|
||||
/* Image formats used in clCreateImage */
|
||||
#define CL_NV21_IMG 0x40D0
|
||||
#define CL_YV12_IMG 0x40D1
|
||||
|
||||
/******************************************
|
||||
* cl_img_cached_allocations extension *
|
||||
******************************************/
|
||||
|
||||
/* Flag values used by clCreteBuffer */
|
||||
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
|
||||
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
|
||||
|
||||
/******************************************
|
||||
* cl_img_use_gralloc_ptr extension *
|
||||
******************************************/
|
||||
|
||||
/* Flag values used by clCreteBuffer */
|
||||
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
|
||||
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
|
||||
|
||||
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
|
||||
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif /* CL_VERSION_1_2 */
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
/*********************************
|
||||
* cl_khr_subgroups extension
|
||||
*********************************/
|
||||
#define cl_khr_subgroups 1
|
||||
|
||||
/* cl_kernel_sub_group_info is declared in CL.h. */
|
||||
|
||||
/* cl_kernel_sub_group_info */
|
||||
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
||||
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
|
||||
cl_device_id /*in_device*/,
|
||||
cl_kernel_sub_group_info /* param_name */,
|
||||
size_t /*input_value_size*/,
|
||||
const void * /*input_value*/,
|
||||
size_t /*param_value_size*/,
|
||||
void* /*param_value*/,
|
||||
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
|
||||
cl_device_id /*in_device*/,
|
||||
cl_kernel_sub_group_info /* param_name */,
|
||||
size_t /*input_value_size*/,
|
||||
const void * /*input_value*/,
|
||||
size_t /*param_value_size*/,
|
||||
void* /*param_value*/,
|
||||
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
#endif /* CL_VERSION_2_0 */
|
||||
|
||||
#ifdef CL_VERSION_2_1
|
||||
/*********************************
|
||||
* cl_khr_priority_hints extension
|
||||
*********************************/
|
||||
#define cl_khr_priority_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_priority_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
||||
|
||||
/* cl_queue_priority_khr */
|
||||
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
||||
|
||||
#endif /* CL_VERSION_2_1 */
|
||||
|
||||
#ifdef CL_VERSION_2_1
|
||||
/*********************************
|
||||
* cl_khr_throttle_hints extension
|
||||
*********************************/
|
||||
#define cl_khr_throttle_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_throttle_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
||||
|
||||
/* cl_queue_throttle_khr */
|
||||
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
||||
|
||||
#endif /* CL_VERSION_2_1 */
|
||||
|
||||
#ifdef CL_VERSION_2_2
|
||||
/*********************************
|
||||
* cl_khr_subgroup_named_barrier
|
||||
*********************************/
|
||||
#define cl_khr_subgroup_named_barrier 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
|
||||
|
||||
#endif /* CL_VERSION_2_2 */
|
||||
|
||||
/**********************************
|
||||
* cl_arm_import_memory extension *
|
||||
**********************************/
|
||||
|
||||
#ifdef CL_VERSION_1_0
|
||||
|
||||
typedef intptr_t cl_import_properties_arm;
|
||||
|
||||
/* Default and valid proporties name for cl_arm_import_memory */
|
||||
#define CL_IMPORT_TYPE_ARM 0x40B2
|
||||
|
||||
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
|
||||
|
||||
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
|
||||
|
||||
/* Secure DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_SECURE_ARM 0x40B5
|
||||
|
||||
/* This extension adds a new function that allows for direct memory import into
|
||||
* OpenCL via the clImportMemoryARM function.
|
||||
*
|
||||
* Memory imported through this interface will be mapped into the device's page
|
||||
* tables directly, providing zero copy access. It will never fall back to copy
|
||||
* operations and aliased buffers.
|
||||
*
|
||||
* Types of memory supported for import are specified as additional extension
|
||||
* strings.
|
||||
*
|
||||
* This extension produces cl_mem allocations which are compatible with all other
|
||||
* users of cl_mem in the standard API.
|
||||
*
|
||||
* This extension maps pages with the same properties as the normal buffer creation
|
||||
* function clCreateBuffer.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clImportMemoryARM( cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_import_properties_arm *properties,
|
||||
void *memory,
|
||||
size_t size,
|
||||
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
#endif /* CL_VERSION_1_0 */
|
||||
|
||||
/******************************************
|
||||
* cl_arm_shared_virtual_memory extension *
|
||||
******************************************/
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
/* Used by clGetDeviceInfo */
|
||||
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
|
||||
|
||||
/* Used by clGetMemObjectInfo */
|
||||
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
|
||||
|
||||
/* Used by clSetKernelExecInfoARM: */
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
|
||||
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
|
||||
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
|
||||
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
|
||||
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
|
||||
|
||||
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
|
||||
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
|
||||
|
||||
/* Flag values used by clSVMAllocARM: */
|
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
|
||||
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
|
||||
|
||||
typedef cl_bitfield cl_svm_mem_flags_arm;
|
||||
typedef cl_uint cl_kernel_exec_info_arm;
|
||||
typedef cl_bitfield cl_device_svm_capabilities_arm;
|
||||
|
||||
extern CL_API_ENTRY void * CL_API_CALL
|
||||
clSVMAllocARM(cl_context /* context */,
|
||||
cl_svm_mem_flags_arm /* flags */,
|
||||
size_t /* size */,
|
||||
cl_uint /* alignment */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY void CL_API_CALL
|
||||
clSVMFreeARM(cl_context /* context */,
|
||||
void * /* svm_pointer */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMFreeARM(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_svm_pointers */,
|
||||
void *[] /* svm_pointers[] */,
|
||||
void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
|
||||
cl_uint /* num_svm_pointers */,
|
||||
void *[] /* svm_pointers[] */,
|
||||
void * /* user_data */),
|
||||
void * /* user_data */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemcpyARM(cl_command_queue /* command_queue */,
|
||||
cl_bool /* blocking_copy */,
|
||||
void * /* dst_ptr */,
|
||||
const void * /* src_ptr */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemFillARM(cl_command_queue /* command_queue */,
|
||||
void * /* svm_ptr */,
|
||||
const void * /* pattern */,
|
||||
size_t /* pattern_size */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMapARM(cl_command_queue /* command_queue */,
|
||||
cl_bool /* blocking_map */,
|
||||
cl_map_flags /* flags */,
|
||||
void * /* svm_ptr */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMUnmapARM(cl_command_queue /* command_queue */,
|
||||
void * /* svm_ptr */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelArgSVMPointerARM(cl_kernel /* kernel */,
|
||||
cl_uint /* arg_index */,
|
||||
const void * /* arg_value */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelExecInfoARM(cl_kernel /* kernel */,
|
||||
cl_kernel_exec_info_arm /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
const void * /* param_value */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif /* CL_VERSION_1_2 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
429
include/triton/external/CL/cl_ext_intel.h
vendored
Normal file
429
include/triton/external/CL/cl_ext_intel.h
vendored
Normal file
@@ -0,0 +1,429 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2017 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2017 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_ext_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __CL_EXT_INTEL_H
|
||||
#define __CL_EXT_INTEL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_thread_local_exec extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_thread_local_exec 1
|
||||
|
||||
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
|
||||
|
||||
/***********************************************
|
||||
* cl_intel_device_partition_by_names extension *
|
||||
************************************************/
|
||||
|
||||
#define cl_intel_device_partition_by_names 1
|
||||
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
|
||||
|
||||
/************************************************
|
||||
* cl_intel_accelerator extension *
|
||||
* cl_intel_motion_estimation extension *
|
||||
* cl_intel_advanced_motion_estimation extension *
|
||||
*************************************************/
|
||||
|
||||
#define cl_intel_accelerator 1
|
||||
#define cl_intel_motion_estimation 1
|
||||
#define cl_intel_advanced_motion_estimation 1
|
||||
|
||||
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
|
||||
typedef cl_uint cl_accelerator_type_intel;
|
||||
typedef cl_uint cl_accelerator_info_intel;
|
||||
|
||||
typedef struct _cl_motion_estimation_desc_intel {
|
||||
cl_uint mb_block_type;
|
||||
cl_uint subpixel_mode;
|
||||
cl_uint sad_adjust_mode;
|
||||
cl_uint search_path_type;
|
||||
} cl_motion_estimation_desc_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_ACCELERATOR_INTEL -1094
|
||||
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
|
||||
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
|
||||
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
|
||||
|
||||
/* cl_accelerator_type_intel */
|
||||
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
|
||||
|
||||
/* cl_accelerator_info_intel */
|
||||
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
|
||||
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
|
||||
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
|
||||
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
|
||||
|
||||
/* cl_motion_detect_desc_intel flags */
|
||||
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
|
||||
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
|
||||
|
||||
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
|
||||
|
||||
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
|
||||
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
|
||||
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
|
||||
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
|
||||
|
||||
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
|
||||
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
|
||||
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
|
||||
|
||||
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
|
||||
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
|
||||
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
|
||||
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
|
||||
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
|
||||
|
||||
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
|
||||
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
|
||||
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
|
||||
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
|
||||
|
||||
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
|
||||
|
||||
#define CL_ME_VERSION_LEGACY_INTEL 0x0
|
||||
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
|
||||
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
|
||||
|
||||
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
|
||||
clCreateAcceleratorINTEL(
|
||||
cl_context /* context */,
|
||||
cl_accelerator_type_intel /* accelerator_type */,
|
||||
size_t /* descriptor_size */,
|
||||
const void* /* descriptor */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
|
||||
cl_context /* context */,
|
||||
cl_accelerator_type_intel /* accelerator_type */,
|
||||
size_t /* descriptor_size */,
|
||||
const void* /* descriptor */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetAcceleratorInfoINTEL(
|
||||
cl_accelerator_intel /* accelerator */,
|
||||
cl_accelerator_info_intel /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
|
||||
cl_accelerator_intel /* accelerator */,
|
||||
cl_accelerator_info_intel /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void* /* param_value */,
|
||||
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainAcceleratorINTEL(
|
||||
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseAcceleratorINTEL(
|
||||
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/******************************************
|
||||
* cl_intel_simultaneous_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_simultaneous_sharing 1
|
||||
|
||||
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
|
||||
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
|
||||
|
||||
/***********************************
|
||||
* cl_intel_egl_image_yuv extension *
|
||||
************************************/
|
||||
|
||||
#define cl_intel_egl_image_yuv 1
|
||||
|
||||
#define CL_EGL_YUV_PLANE_INTEL 0x4107
|
||||
|
||||
/********************************
|
||||
* cl_intel_packed_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define cl_intel_packed_yuv 1
|
||||
|
||||
#define CL_YUYV_INTEL 0x4076
|
||||
#define CL_UYVY_INTEL 0x4077
|
||||
#define CL_YVYU_INTEL 0x4078
|
||||
#define CL_VYUY_INTEL 0x4079
|
||||
|
||||
/********************************************
|
||||
* cl_intel_required_subgroup_size extension *
|
||||
*********************************************/
|
||||
|
||||
#define cl_intel_required_subgroup_size 1
|
||||
|
||||
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
|
||||
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
|
||||
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
|
||||
|
||||
/****************************************
|
||||
* cl_intel_driver_diagnostics extension *
|
||||
*****************************************/
|
||||
|
||||
#define cl_intel_driver_diagnostics 1
|
||||
|
||||
typedef cl_uint cl_diagnostics_verbose_level;
|
||||
|
||||
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
|
||||
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
|
||||
|
||||
/********************************
|
||||
* cl_intel_planar_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define CL_NV12_INTEL 0x410E
|
||||
|
||||
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
|
||||
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
|
||||
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
|
||||
|
||||
/*******************************************************
|
||||
* cl_intel_device_side_avc_motion_estimation extension *
|
||||
********************************************************/
|
||||
|
||||
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
|
||||
|
||||
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
|
||||
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
|
||||
|
||||
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
|
||||
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||
|
||||
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
|
||||
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||
|
||||
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CL_EXT_INTEL_H */
|
||||
|
167
include/triton/external/CL/cl_gl.h
vendored
Normal file
167
include/triton/external/CL/cl_gl.h
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* bufobj */,
|
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* renderbuffer */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem /* memobj */,
|
||||
cl_gl_object_type * /* gl_object_type */,
|
||||
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem /* memobj */,
|
||||
cl_gl_texture_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
|
||||
cl_gl_context_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
74
include/triton/external/CL/cl_gl_ext.h
vendored
Normal file
74
include/triton/external/CL/cl_gl_ext.h
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
|
||||
/* OpenGL dependencies. */
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#else
|
||||
#include <CL/cl_gl.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For each extension, follow this template
|
||||
* cl_VEN_extname extension */
|
||||
/* #define cl_VEN_extname 1
|
||||
* ... define new types, if any
|
||||
* ... define new tokens, if any
|
||||
* ... define new APIs, if any
|
||||
*
|
||||
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
|
||||
* This allows us to avoid having to decide whether to include GL headers or GLES here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
* See section 9.9 in the OpenCL 1.1 spec for more information
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context /* context */,
|
||||
cl_GLsync /* cl_GLsync */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
1458
include/triton/external/CL/cl_platform.h
vendored
Normal file
1458
include/triton/external/CL/cl_platform.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
172
include/triton/external/CL/cl_va_api_media_sharing_intel.h
vendored
Normal file
172
include/triton/external/CL/cl_va_api_media_sharing_intel.h
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2016 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_va_api_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
|
||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <va/va.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************
|
||||
* cl_intel_va_api_media_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_va_api_media_sharing 1
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||
|
||||
/* cl_va_api_device_source_intel */
|
||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||
|
||||
/* cl_va_api_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||
|
||||
typedef cl_uint cl_va_api_device_source_intel;
|
||||
typedef cl_uint cl_va_api_device_set_intel;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||
cl_platform_id /* platform */,
|
||||
cl_va_api_device_source_intel /* media_adapter_type */,
|
||||
void* /* media_adapter */,
|
||||
cl_va_api_device_set_intel /* media_adapter_set */,
|
||||
cl_uint /* num_entries */,
|
||||
cl_device_id* /* devices */,
|
||||
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||
cl_platform_id /* platform */,
|
||||
cl_va_api_device_source_intel /* media_adapter_type */,
|
||||
void* /* media_adapter */,
|
||||
cl_va_api_device_set_intel /* media_adapter_set */,
|
||||
cl_uint /* num_entries */,
|
||||
cl_device_id* /* devices */,
|
||||
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||
cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
VASurfaceID* /* surface */,
|
||||
cl_uint /* plane */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||
cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
VASurfaceID* /* surface */,
|
||||
cl_uint /* plane */,
|
||||
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem* /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||
|
59
include/triton/external/CL/opencl.h
vendored
Normal file
59
include/triton/external/CL/opencl.h
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#include <OpenCL/cl.h>
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#include <OpenCL/cl_gl_ext.h>
|
||||
#include <OpenCL/cl_ext.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
|
@@ -39,7 +39,7 @@ public:
|
||||
std::vector<unsigned> global_range_size;
|
||||
unsigned num_threads;
|
||||
};
|
||||
typedef std::function<double(driver::kernel, launch_information)> benchmark_t;
|
||||
typedef std::function<double(driver::cu_kernel, launch_information)> benchmark_t;
|
||||
|
||||
struct passes_wrapper {
|
||||
passes_wrapper(): shared(&buffer_info), liveness(&buffer_info),
|
||||
@@ -74,17 +74,17 @@ private:
|
||||
std::unique_ptr<ir::module> make_triton_module(const std::string &src);
|
||||
|
||||
public:
|
||||
jit(driver::context context);
|
||||
jit(driver::context* context);
|
||||
void autotune(const std::string &src, benchmark_t benchmark);
|
||||
void add_module(ir::module &module, const std::vector<unsigned>& params = {});
|
||||
void add_module(const std::string &src, const std::vector<unsigned>& params = {});
|
||||
driver::kernel get_function(const std::string &name);
|
||||
driver::cu_kernel get_function(const std::string &name);
|
||||
launch_information get_launch_info(const std::string &name);
|
||||
unsigned get_int(const std::string &name);
|
||||
|
||||
private:
|
||||
std::vector<driver::module> modules_;
|
||||
driver::context driver_context_;
|
||||
std::vector<driver::cu_module> modules_;
|
||||
driver::context* driver_context_;
|
||||
llvm::LLVMContext llvm_context_;
|
||||
ir::context triton_context_;
|
||||
std::map<std::string, launch_information> launch_info_map_;
|
||||
|
@@ -37,6 +37,58 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
/*-----------------------------------*/
|
||||
//----------- Platforms ------------*/
|
||||
/*-----------------------------------*/
|
||||
|
||||
void backend::platforms::init() {
|
||||
if(!cache_.empty())
|
||||
return;
|
||||
//if CUDA is here
|
||||
if(dispatch::cuinit()){
|
||||
cache_.push_back(new cu_platform());
|
||||
}
|
||||
//if OpenCL is here
|
||||
if(dispatch::clinit()){
|
||||
cl_uint num_platforms;
|
||||
dispatch::clGetPlatformIDs(0, nullptr, &num_platforms);
|
||||
std::vector<cl_platform_id> ids(num_platforms);
|
||||
dispatch::clGetPlatformIDs(num_platforms, ids.data(), nullptr);
|
||||
for(cl_platform_id id: ids)
|
||||
cache_.push_back(new cl_platform(id));
|
||||
}
|
||||
if(cache_.empty())
|
||||
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
|
||||
}
|
||||
|
||||
void backend::platforms::get(std::vector<platform *> &results) {
|
||||
std::copy(cache_.begin(), cache_.end(), std::back_inserter(results));
|
||||
}
|
||||
|
||||
std::vector<driver::platform*> backend::platforms::cache_;
|
||||
|
||||
|
||||
/*-----------------------------------*/
|
||||
//----------- Devices --------------*/
|
||||
/*-----------------------------------*/
|
||||
|
||||
void backend::devices::init(std::vector<platform*> const & platforms) {
|
||||
if(!cache_.empty())
|
||||
return;
|
||||
for(driver::platform* pf: platforms)
|
||||
pf->devices(cache_);
|
||||
if(cache_.empty())
|
||||
throw std::runtime_error("ISAAC: No device available. Make sure that your platform is configured properly");
|
||||
}
|
||||
|
||||
void backend::devices::get(std::vector<device*> &devs) {
|
||||
std::copy(cache_.begin(), cache_.end(), std::back_inserter(devs));
|
||||
}
|
||||
|
||||
std::vector<driver::device*> backend::devices::cache_;
|
||||
|
||||
|
||||
|
||||
/*-----------------------------------*/
|
||||
//---------- Modules ----------------*/
|
||||
/*-----------------------------------*/
|
||||
@@ -47,14 +99,14 @@ void backend::modules::release(){
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
module& backend::modules::get(driver::stream const & stream, std::string const & name, std::string const & src){
|
||||
std::tuple<driver::stream, std::string> key(stream, name);
|
||||
driver::module* backend::modules::get(driver::stream* stream, std::string const & name, std::string const & src){
|
||||
std::tuple<driver::stream*, std::string> key(stream, name);
|
||||
if(cache_.find(key)==cache_.end())
|
||||
return *cache_.insert(std::make_pair(key, new module(stream.context(), src))).first->second;
|
||||
return *cache_.at(key);
|
||||
return &*cache_.insert(std::make_pair(key, new driver::cu_module(((driver::cu_stream*)stream)->context(), src))).first->second;
|
||||
return &*cache_.at(key);
|
||||
}
|
||||
|
||||
std::map<std::tuple<stream, std::string>, module * > backend::modules::cache_;
|
||||
std::map<std::tuple<driver::stream*, std::string>, driver::module*> backend::modules::cache_;
|
||||
|
||||
/*-----------------------------------*/
|
||||
//----------- Kernels --------------*/
|
||||
@@ -66,23 +118,23 @@ void backend::kernels::release(){
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
kernel & backend::kernels::get(driver::module const & program, std::string const & name){
|
||||
std::tuple<module, std::string> key(program, name);
|
||||
driver::kernel* backend::kernels::get(driver::module *mod, std::string const & name){
|
||||
std::tuple<driver::module*, std::string> key(mod, name);
|
||||
if(cache_.find(key)==cache_.end())
|
||||
return *cache_.insert(std::make_pair(key, new kernel(program, name.c_str()))).first->second;
|
||||
return *cache_.at(key);
|
||||
return &*cache_.insert(std::make_pair(key, new driver::cu_kernel((driver::cu_module*)mod, name.c_str()))).first->second;
|
||||
return cache_.at(key);
|
||||
}
|
||||
|
||||
std::map<std::tuple<module, std::string>, kernel * > backend::kernels::cache_;
|
||||
std::map<std::tuple<driver::module*, std::string>, driver::kernel*> backend::kernels::cache_;
|
||||
|
||||
/*-----------------------------------*/
|
||||
//------------ Queues --------------*/
|
||||
/*-----------------------------------*/
|
||||
|
||||
void backend::streams::init(std::list<const context *> const & contexts){
|
||||
for(context const * ctx : contexts)
|
||||
if(cache_.find(*ctx)==cache_.end())
|
||||
cache_.insert(std::make_pair(*ctx, std::vector<stream*>{new stream(*ctx)}));
|
||||
void backend::streams::init(std::list<driver::context*> const & contexts){
|
||||
for(driver::context* ctx : contexts)
|
||||
if(cache_.find(ctx)==cache_.end())
|
||||
cache_.insert(std::make_pair(ctx, std::vector<driver::stream*>{new driver::cu_stream(ctx)}));
|
||||
}
|
||||
|
||||
void backend::streams::release(){
|
||||
@@ -92,33 +144,31 @@ void backend::streams::release(){
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
stream & backend::streams::get_default()
|
||||
driver::stream* backend::streams::get_default()
|
||||
{ return get(contexts::get_default(), 0); }
|
||||
|
||||
stream & backend::streams::get(driver::context const & context, unsigned int id){
|
||||
init(std::list<driver::context const *>(1,&context));
|
||||
driver::stream* backend::streams::get(driver::context* context, unsigned int id){
|
||||
init(std::list<driver::context*>(1,context));
|
||||
for(auto & x : cache_)
|
||||
if(x.first==context)
|
||||
return *x.second[id];
|
||||
return x.second[id];
|
||||
throw;
|
||||
}
|
||||
|
||||
void backend::streams::get(driver::context const & context, std::vector<stream*> & queues){
|
||||
init(std::list<driver::context const *>(1,&context));
|
||||
void backend::streams::get(driver::context* context, std::vector<driver::stream*> & queues){
|
||||
init(std::list<driver::context*>(1,context));
|
||||
queues = cache_.at(context);
|
||||
}
|
||||
|
||||
std::map<context, std::vector<stream*> > backend::streams::cache_;
|
||||
std::map<driver::context*, std::vector<driver::stream*>> backend::streams::cache_;
|
||||
|
||||
/*-----------------------------------*/
|
||||
//------------ Contexts ------------*/
|
||||
/*-----------------------------------*/
|
||||
|
||||
void backend::contexts::init(std::vector<platform> const & platforms){
|
||||
for(platform const & platform: platforms){
|
||||
for(device const & device: platform.devices())
|
||||
cache_.push_back(new context(device));
|
||||
}
|
||||
void backend::contexts::init(std::vector<driver::device*> const & devices){
|
||||
for(driver::device* dvc: devices)
|
||||
cache_.push_back(new cu_context(dvc));
|
||||
}
|
||||
|
||||
void backend::contexts::release(){
|
||||
@@ -127,19 +177,19 @@ void backend::contexts::release(){
|
||||
cache_.clear();
|
||||
}
|
||||
|
||||
driver::context const & backend::contexts::get_default(){
|
||||
driver::context* backend::contexts::get_default(){
|
||||
backend::init();
|
||||
std::list<context const *>::const_iterator it = cache_.begin();
|
||||
auto it = cache_.begin();
|
||||
std::advance(it, default_device);
|
||||
return **it;
|
||||
return *it;
|
||||
}
|
||||
|
||||
void backend::contexts::get(std::list<context const *> & contexts){
|
||||
void backend::contexts::get(std::list<driver::context*> & contexts){
|
||||
backend::init();
|
||||
contexts = cache_;
|
||||
}
|
||||
|
||||
std::list<context const *> backend::contexts::cache_;
|
||||
std::list<driver::context*> backend::contexts::cache_;
|
||||
|
||||
|
||||
|
||||
@@ -147,28 +197,8 @@ std::list<context const *> backend::contexts::cache_;
|
||||
//------------ General -------------*/
|
||||
/*-----------------------------------*/
|
||||
|
||||
std::vector<device> backend::devices(){
|
||||
std::vector<platform> platforms = backend::platforms();
|
||||
std::vector<device> result;
|
||||
for(platform const & platform: platforms){
|
||||
auto devices = platform.devices();
|
||||
result.insert(result.end(), devices.begin(), devices.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<platform> backend::platforms(){
|
||||
std::vector<platform> platforms;
|
||||
//if CUDA is here
|
||||
if(dispatch::cuinit())
|
||||
platforms.push_back(platform());
|
||||
if(platforms.empty())
|
||||
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
|
||||
return platforms;
|
||||
}
|
||||
|
||||
void backend::synchronize(driver::context const & context){
|
||||
for(stream * queue: streams::cache_.at(context))
|
||||
void backend::synchronize(driver::context* context){
|
||||
for(driver::stream * queue: streams::cache_.at(context))
|
||||
queue->synchronize();
|
||||
}
|
||||
|
||||
@@ -184,8 +214,13 @@ void backend::release(){
|
||||
void backend::init(){
|
||||
if(!contexts::cache_.empty())
|
||||
return;
|
||||
std::vector<platform> platforms = backend::platforms();
|
||||
contexts::init(platforms);
|
||||
// initialize platforms
|
||||
backend::platforms::init();
|
||||
// initialize devices
|
||||
backend::devices::init(platforms::cache_);
|
||||
// initialize contexts
|
||||
backend::contexts::init(devices::cache_);
|
||||
// initialize streams
|
||||
streams::init(contexts::cache_);
|
||||
}
|
||||
|
||||
|
@@ -33,28 +33,46 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
buffer::buffer(driver::context const & context, size_t size) : context_(context)
|
||||
{
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
|
||||
//
|
||||
|
||||
buffer::buffer(driver::context* ctx, CUdeviceptr cu, bool take_ownership)
|
||||
: polymorphic_resource(cu, take_ownership), context_(ctx) { }
|
||||
|
||||
buffer::buffer(driver::context* ctx, cl_mem cl, bool take_ownership)
|
||||
: polymorphic_resource(cl, take_ownership), context_(ctx) { }
|
||||
|
||||
driver::context* buffer::context() {
|
||||
return context_;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
ocl_buffer::ocl_buffer(driver::context* context, size_t size)
|
||||
: buffer(context, cl_mem(), true){
|
||||
cl_int err;
|
||||
dispatch::clCreateBuffer(*context->cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
|
||||
cu_buffer::cu_buffer(driver::context* context, size_t size)
|
||||
: buffer(context, CUdeviceptr(), true) {
|
||||
cu_context::context_switcher ctx_switch(*context_);
|
||||
dispatch::cuMemAlloc(&*cu_, size);
|
||||
}
|
||||
|
||||
buffer::buffer(driver::context const & context, CUdeviceptr cu, bool take_ownership):
|
||||
context_(context), cu_(cu, take_ownership)
|
||||
{ }
|
||||
cu_buffer::cu_buffer(driver::context* context, CUdeviceptr cu, bool take_ownership)
|
||||
: buffer(context, cu, take_ownership){
|
||||
}
|
||||
|
||||
void buffer::set_zero(stream const & queue, size_t size)
|
||||
void cu_buffer::set_zero(cu_stream const & queue, size_t size)
|
||||
{
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
dispatch::cuMemsetD8Async(*cu_, 0, size, queue);
|
||||
cu_context::context_switcher ctx_switch(*context_);
|
||||
dispatch::cuMemsetD8Async(*cu_, 0, size, *queue.cu());
|
||||
}
|
||||
|
||||
handle<CUdeviceptr> const & buffer::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
handle<CUdeviceptr> & buffer::cu()
|
||||
{ return cu_; }
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -35,9 +35,28 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
/* ------------------------ */
|
||||
// BASE //
|
||||
/* ------------------------ */
|
||||
|
||||
context::context(driver::device *dev, CUcontext cu, bool take_ownership):
|
||||
polymorphic_resource(cu, take_ownership),
|
||||
dev_(dev), cache_path_(get_cache_path()) {
|
||||
}
|
||||
|
||||
context::context(driver::device *dev, cl_context cl, bool take_ownership):
|
||||
polymorphic_resource(cl, take_ownership),
|
||||
dev_(dev), cache_path_(get_cache_path()){
|
||||
|
||||
}
|
||||
|
||||
driver::device* context::device() const {
|
||||
return dev_;
|
||||
}
|
||||
|
||||
std::string context::get_cache_path(){
|
||||
//user-specified cache path
|
||||
std::string result = tools::getenv("ISAAC_CACHE_PATH");
|
||||
std::string result = tools::getenv("TRITON_CACHE_PATH");
|
||||
if(!result.empty()){
|
||||
if(tools::mkpath(result)==0)
|
||||
return result;
|
||||
@@ -46,7 +65,7 @@ std::string context::get_cache_path(){
|
||||
result = tools::getenv("HOME");
|
||||
if(!result.empty())
|
||||
{
|
||||
result = result + "/.isaac/cache/";
|
||||
result = result + "/.triton/cache/";
|
||||
if(tools::mkpath(result)==0)
|
||||
return result;
|
||||
}
|
||||
@@ -54,7 +73,28 @@ std::string context::get_cache_path(){
|
||||
return "";
|
||||
}
|
||||
|
||||
CUdevice context::device(CUcontext context){
|
||||
std::string const & context::cache_path() const{
|
||||
return cache_path_;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
// RAII context switcher
|
||||
cu_context::context_switcher::context_switcher(const context &ctx): ctx_((const cu_context&)ctx) {
|
||||
dispatch::cuCtxPushCurrent_v2(*ctx_.cu());
|
||||
}
|
||||
|
||||
cu_context::context_switcher::~context_switcher() {
|
||||
CUcontext tmp;
|
||||
dispatch::cuCtxPopCurrent_v2(&tmp);
|
||||
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
|
||||
}
|
||||
|
||||
// import CUdevice
|
||||
CUdevice cu_context::get_device_of(CUcontext context){
|
||||
dispatch::cuCtxPushCurrent_v2(context);
|
||||
CUdevice res;
|
||||
dispatch::cuCtxGetDevice(&res);
|
||||
@@ -62,35 +102,24 @@ CUdevice context::device(CUcontext context){
|
||||
return res;
|
||||
}
|
||||
|
||||
context::context(CUcontext context, bool take_ownership): cu_(context, take_ownership), dvc_(device(context), false), cache_path_(get_cache_path())
|
||||
{ }
|
||||
// wrapper for cuda context
|
||||
cu_context::cu_context(CUcontext context, bool take_ownership): driver::context(new driver::cu_device(get_device_of(context), false),
|
||||
context, take_ownership) {
|
||||
}
|
||||
|
||||
context::context(driver::device const & device): dvc_(device), cache_path_(get_cache_path())
|
||||
{
|
||||
dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, (CUdevice)device);
|
||||
cu_context::cu_context(driver::device* device): context(device, CUcontext(), true){
|
||||
dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, *((driver::cu_device*)dev_)->cu());
|
||||
dispatch::cuCtxPopCurrent_v2(NULL);
|
||||
}
|
||||
|
||||
device const & context::device() const
|
||||
{ return dvc_; }
|
||||
|
||||
std::string const & context::cache_path() const
|
||||
{ return cache_path_; }
|
||||
/* ------------------------ */
|
||||
// OpenCL //
|
||||
/* ------------------------ */
|
||||
|
||||
handle<CUcontext> const & context::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
/* Context Switcher */
|
||||
ContextSwitcher::ContextSwitcher(driver::context const & ctx): ctx_(ctx)
|
||||
{
|
||||
dispatch::cuCtxPushCurrent_v2(ctx_);
|
||||
}
|
||||
|
||||
ContextSwitcher::~ContextSwitcher()
|
||||
{
|
||||
CUcontext tmp;
|
||||
dispatch::cuCtxPopCurrent_v2(&tmp);
|
||||
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
|
||||
ocl_context::ocl_context(driver::device* dev): context(dev, cl_context(), true) {
|
||||
cl_int err;
|
||||
*cl_ = dispatch::clCreateContext(nullptr, 1, &*dev->cl(), nullptr, nullptr, &err);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -34,34 +34,34 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
/* Architecture [NVidia] */
|
||||
device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) const{
|
||||
switch(sm.first)
|
||||
{
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
// Architecture
|
||||
cu_device::Architecture cu_device::nv_arch(std::pair<unsigned int, unsigned int> sm) const {
|
||||
switch(sm.first) {
|
||||
case 7:
|
||||
switch(sm.second)
|
||||
{
|
||||
switch(sm.second){
|
||||
case 0: return Architecture::SM_7_0;
|
||||
}
|
||||
|
||||
case 6:
|
||||
switch(sm.second)
|
||||
{
|
||||
switch(sm.second){
|
||||
case 0: return Architecture::SM_6_0;
|
||||
case 1: return Architecture::SM_6_1;
|
||||
}
|
||||
|
||||
case 5:
|
||||
switch(sm.second)
|
||||
{
|
||||
switch(sm.second){
|
||||
case 0: return Architecture::SM_5_0;
|
||||
case 2: return Architecture::SM_5_2;
|
||||
default: return Architecture::UNKNOWN;
|
||||
}
|
||||
|
||||
case 3:
|
||||
switch(sm.second)
|
||||
{
|
||||
switch(sm.second){
|
||||
case 0: return Architecture::SM_3_0;
|
||||
case 5: return Architecture::SM_3_5;
|
||||
case 7: return Architecture::SM_3_7;
|
||||
@@ -69,8 +69,7 @@ device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) c
|
||||
}
|
||||
|
||||
case 2:
|
||||
switch(sm.second)
|
||||
{
|
||||
switch(sm.second){
|
||||
case 0: return Architecture::SM_2_0;
|
||||
case 1: return Architecture::SM_2_1;
|
||||
default: return Architecture::UNKNOWN;
|
||||
@@ -80,14 +79,16 @@ device::Architecture device::nv_arch(std::pair<unsigned int, unsigned int> sm) c
|
||||
}
|
||||
}
|
||||
|
||||
// information query
|
||||
template<CUdevice_attribute attr>
|
||||
int device::cuGetInfo() const{
|
||||
int cu_device::cuGetInfo() const{
|
||||
int res;
|
||||
dispatch::cuDeviceGetAttribute(&res, attr, *cu_);
|
||||
return res;
|
||||
}
|
||||
|
||||
nvmlDevice_t device::nvml_device() const{
|
||||
// convert to nvml
|
||||
nvmlDevice_t cu_device::nvml_device() const{
|
||||
std::map<std::string, nvmlDevice_t> map;
|
||||
std::string key = pci_bus_id();
|
||||
if(map.find(key)==map.end()){
|
||||
@@ -98,34 +99,37 @@ nvmlDevice_t device::nvml_device() const{
|
||||
return map.at(key);
|
||||
}
|
||||
|
||||
/* Architecture */
|
||||
device::Architecture device::architecture() const
|
||||
{ return nv_arch(compute_capability()); }
|
||||
// architecture
|
||||
cu_device::Architecture cu_device::architecture() const{
|
||||
return nv_arch(compute_capability());
|
||||
}
|
||||
|
||||
/* Attributes */
|
||||
size_t device::address_bits() const
|
||||
{ return sizeof(size_t)*8; }
|
||||
// number of address bits
|
||||
size_t cu_device::address_bits() const{
|
||||
return sizeof(size_t)*8;
|
||||
}
|
||||
|
||||
driver::platform device::platform() const
|
||||
{ return platform(); }
|
||||
|
||||
std::string device::name() const{
|
||||
// name
|
||||
std::string cu_device::name() const {
|
||||
char tmp[128];
|
||||
dispatch::cuDeviceGetName(tmp, 128, *cu_);
|
||||
return std::string(tmp);
|
||||
}
|
||||
|
||||
std::string device::pci_bus_id() const{
|
||||
// PCI bus ID
|
||||
std::string cu_device::pci_bus_id() const{
|
||||
char tmp[128];
|
||||
dispatch::cuDeviceGetPCIBusId(tmp, 128, *cu_);
|
||||
return std::string(tmp);
|
||||
}
|
||||
|
||||
void device::interpret_as(std::pair<size_t, size_t> cc){
|
||||
// force the device to be interpreted as a particular cc
|
||||
void cu_device::interpret_as(std::pair<size_t, size_t> cc){
|
||||
interpreted_as_ = std::make_shared<std::pair<size_t, size_t>>(cc);
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> device::compute_capability() const{
|
||||
// compute capability
|
||||
std::pair<size_t, size_t> cu_device::compute_capability() const {
|
||||
if(interpreted_as_)
|
||||
return *interpreted_as_;
|
||||
size_t _major = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>();
|
||||
@@ -133,17 +137,24 @@ std::pair<size_t, size_t> device::compute_capability() const{
|
||||
return std::make_pair(_major, _minor);
|
||||
}
|
||||
|
||||
size_t device::max_threads_per_block() const
|
||||
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>(); }
|
||||
// maximum number of threads per block
|
||||
size_t cu_device::max_threads_per_block() const {
|
||||
return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>();
|
||||
}
|
||||
|
||||
size_t device::max_shared_memory() const
|
||||
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>(); }
|
||||
// maximum amount of shared memory per block
|
||||
size_t cu_device::max_shared_memory() const {
|
||||
return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>();
|
||||
}
|
||||
|
||||
size_t device::warp_size() const
|
||||
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>(); }
|
||||
// warp size
|
||||
size_t cu_device::warp_size() const {
|
||||
return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>();
|
||||
}
|
||||
|
||||
|
||||
std::vector<size_t> device::max_block_dim() const{
|
||||
// maximum block dimensions
|
||||
std::vector<size_t> cu_device::max_block_dim() const {
|
||||
std::vector<size_t> result(3);
|
||||
result[0] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X>();
|
||||
result[1] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y>();
|
||||
@@ -151,36 +162,39 @@ std::vector<size_t> device::max_block_dim() const{
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t device::current_sm_clock() const{
|
||||
// current SM clock
|
||||
size_t cu_device::current_sm_clock() const{
|
||||
unsigned int result;
|
||||
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t device::max_sm_clock() const{
|
||||
// max SM clock
|
||||
size_t cu_device::max_sm_clock() const{
|
||||
unsigned int result;
|
||||
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
size_t device::current_mem_clock() const{
|
||||
// current memory clock
|
||||
size_t cu_device::current_mem_clock() const{
|
||||
unsigned int result;
|
||||
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t device::max_mem_clock() const{
|
||||
// max memory clock
|
||||
size_t cu_device::max_mem_clock() const{
|
||||
unsigned int result;
|
||||
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Infos */
|
||||
std::string device::infos() const{
|
||||
// print infos
|
||||
std::string cu_device::infos() const{
|
||||
std::ostringstream oss;
|
||||
std::vector<size_t> max_wi_sizes = max_block_dim();
|
||||
oss << "Platform: " << platform().name() << std::endl;
|
||||
oss << "Platform: CUDA" << std::endl;
|
||||
oss << "Name: " << name() << std::endl;
|
||||
oss << "Maximum total work-group size: " << max_threads_per_block() << std::endl;
|
||||
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
|
||||
@@ -188,9 +202,6 @@ std::string device::infos() const{
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
handle<CUdevice> const & device::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -72,6 +72,17 @@ namespace driver
|
||||
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
|
||||
|
||||
//Specialized helpers for OpenCL
|
||||
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
|
||||
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
|
||||
#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3)
|
||||
#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4)
|
||||
#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
|
||||
//Specialized helpers for CUDA
|
||||
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
||||
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||
@@ -104,15 +115,24 @@ namespace driver
|
||||
#define CUDNN_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
|
||||
|
||||
|
||||
bool dispatch::clinit()
|
||||
{
|
||||
if(opencl_==nullptr)
|
||||
opencl_ = dlopen("libOpenCL.so", RTLD_LAZY);
|
||||
return opencl_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::cuinit(){
|
||||
if(cuda_==nullptr)
|
||||
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
|
||||
if(cuda_ == nullptr)
|
||||
return false;
|
||||
CUresult (*fptr)(unsigned int);
|
||||
cuInit_ = dlsym(cuda_, "cuInit");
|
||||
*reinterpret_cast<void **>(&fptr) = cuInit_;
|
||||
CUresult res = (*fptr)(0);
|
||||
check(res);
|
||||
return cuda_ != nullptr;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dispatch::nvmlinit(){
|
||||
@@ -180,17 +200,17 @@ NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlD
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
|
||||
cublasHandle_t dispatch::cublasHandle(driver::context const & ctx){
|
||||
static std::map<context, cublasHandle_t> handles;
|
||||
auto pr = handles.insert({ctx, cublasHandle_t()});
|
||||
cublasHandle_t dispatch::cublasHandle(const cu_context &ctx){
|
||||
static std::map<CUcontext, cublasHandle_t> handles;
|
||||
auto pr = handles.insert({*ctx.cu(), cublasHandle_t()});
|
||||
if(pr.second)
|
||||
cublasCreate_v2(&pr.first->second);
|
||||
return pr.first->second;
|
||||
}
|
||||
|
||||
cudnnHandle_t dispatch::cudnnHandle(driver::context const & ctx){
|
||||
static std::map<context, cudnnHandle_t> handles;
|
||||
auto pr = handles.insert({ctx, cudnnHandle_t()});
|
||||
cudnnHandle_t dispatch::cudnnHandle(driver::cu_context const & ctx){
|
||||
static std::map<CUcontext, cudnnHandle_t> handles;
|
||||
auto pr = handles.insert({*ctx.cu(), cudnnHandle_t()});
|
||||
if(pr.second)
|
||||
cudnnCreate(&pr.first->second);
|
||||
return pr.first->second;
|
||||
@@ -231,16 +251,51 @@ CUDNN_DEFINE13(cudnnStatus_t, cudnnConvolutionForward, cudnnHandle_t, const void
|
||||
CUDNN_DEFINE2(cudnnStatus_t, cudnnSetStream, cudnnHandle_t, cudaStream_t)
|
||||
CUDNN_DEFINE7(cudnnStatus_t, cudnnTransformTensor, cudnnHandle_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
|
||||
|
||||
// OpenCL
|
||||
cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f)
|
||||
{ return f_impl<dispatch::clinit>(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); }
|
||||
|
||||
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
|
||||
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }
|
||||
|
||||
OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
|
||||
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
|
||||
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
|
||||
OCL_DEFINE1(cl_int, clFinish, cl_command_queue)
|
||||
OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseContext, cl_context)
|
||||
OCL_DEFINE1(cl_int, clReleaseEvent, cl_event)
|
||||
OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
|
||||
OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
|
||||
OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id)
|
||||
OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *)
|
||||
OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue)
|
||||
OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *)
|
||||
OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *)
|
||||
OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *)
|
||||
OCL_DEFINE1(cl_int, clRetainEvent, cl_event)
|
||||
OCL_DEFINE1(cl_int, clReleaseProgram, cl_program)
|
||||
OCL_DEFINE1(cl_int, clFlush, cl_command_queue)
|
||||
OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
|
||||
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
|
||||
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
|
||||
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
|
||||
|
||||
// Release
|
||||
void dispatch::release(){
|
||||
if(cuda_){
|
||||
dlclose(cuda_);
|
||||
cuda_ = nullptr;
|
||||
}
|
||||
if(nvrtc_){
|
||||
dlclose(nvrtc_);
|
||||
nvrtc_ = nullptr;
|
||||
}
|
||||
if(cublas_){
|
||||
dlclose(cublas_);
|
||||
cublas_ = nullptr;
|
||||
@@ -251,12 +306,47 @@ void dispatch::release(){
|
||||
}
|
||||
}
|
||||
|
||||
void * dispatch::opencl_;
|
||||
void* dispatch::cuda_;
|
||||
void* dispatch::nvrtc_;
|
||||
void* dispatch::nvml_;
|
||||
void* dispatch::cublas_;
|
||||
void* dispatch::cudnn_;
|
||||
|
||||
//OpenCL
|
||||
void* dispatch::clBuildProgram_;
|
||||
void* dispatch::clEnqueueNDRangeKernel_;
|
||||
void* dispatch::clSetKernelArg_;
|
||||
void* dispatch::clReleaseMemObject_;
|
||||
void* dispatch::clFinish_;
|
||||
void* dispatch::clGetMemObjectInfo_;
|
||||
void* dispatch::clGetCommandQueueInfo_;
|
||||
void* dispatch::clReleaseContext_;
|
||||
void* dispatch::clReleaseEvent_;
|
||||
void* dispatch::clEnqueueWriteBuffer_;
|
||||
void* dispatch::clEnqueueReadBuffer_;
|
||||
void* dispatch::clGetProgramBuildInfo_;
|
||||
void* dispatch::clReleaseDevice_;
|
||||
void* dispatch::clCreateContext_;
|
||||
void* dispatch::clGetDeviceIDs_;
|
||||
void* dispatch::clGetContextInfo_;
|
||||
void* dispatch::clGetDeviceInfo_;
|
||||
void* dispatch::clReleaseCommandQueue_;
|
||||
void* dispatch::clGetPlatformIDs_;
|
||||
void* dispatch::clGetPlatformInfo_;
|
||||
void* dispatch::clGetEventProfilingInfo_;
|
||||
void* dispatch::clCreateProgramWithBinary_;
|
||||
void* dispatch::clCreateCommandQueue_;
|
||||
void* dispatch::clRetainEvent_;
|
||||
void* dispatch::clReleaseProgram_;
|
||||
void* dispatch::clFlush_;
|
||||
void* dispatch::clGetProgramInfo_;
|
||||
void* dispatch::clGetKernelInfo_;
|
||||
void* dispatch::clGetKernelWorkGroupInfo_;
|
||||
void* dispatch::clCreateKernel_;
|
||||
void* dispatch::clCreateBuffer_;
|
||||
void* dispatch::clCreateProgramWithSource_;
|
||||
void* dispatch::clReleaseKernel_;
|
||||
|
||||
//CUDA
|
||||
void* dispatch::cuCtxGetCurrent_;
|
||||
void* dispatch::cuCtxSetCurrent_;
|
||||
@@ -295,13 +385,6 @@ void* dispatch::cuMemsetD8Async_;
|
||||
void* dispatch::cuCtxPushCurrent_v2_;
|
||||
void* dispatch::cuCtxPopCurrent_v2_;
|
||||
|
||||
void* dispatch::nvrtcCompileProgram_;
|
||||
void* dispatch::nvrtcGetProgramLogSize_;
|
||||
void* dispatch::nvrtcGetPTX_;
|
||||
void* dispatch::nvrtcGetPTXSize_;
|
||||
void* dispatch::nvrtcCreateProgram_;
|
||||
void* dispatch::nvrtcGetProgramLog_;
|
||||
|
||||
void* dispatch::nvmlInit_v2_;
|
||||
void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
void* dispatch::nvmlDeviceGetClockInfo_;
|
||||
|
@@ -30,6 +30,9 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
//OpenCL
|
||||
inline void _delete(cl_platform_id) { }
|
||||
inline void _delete(cl_device_id x) { dispatch::clReleaseDevice(x); }
|
||||
//CUDA
|
||||
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
|
||||
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
|
||||
@@ -39,7 +42,7 @@ inline void _delete(CUevent x) { dispatch::cuEventDestroy(x); }
|
||||
inline void _delete(CUfunction) { }
|
||||
inline void _delete(CUmodule x) { dispatch::cuModuleUnload(x); }
|
||||
inline void _delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
|
||||
inline void _delete(cu_platform){}
|
||||
inline void _delete(CUPlatform){}
|
||||
|
||||
//Constructor
|
||||
template<class CUType>
|
||||
@@ -60,7 +63,10 @@ template class handle<CUdevice>;
|
||||
template class handle<cu_event_t>;
|
||||
template class handle<CUfunction>;
|
||||
template class handle<CUmodule>;
|
||||
template class handle<cu_platform>;
|
||||
template class handle<CUPlatform>;
|
||||
|
||||
template class handle<cl_platform_id>;
|
||||
template class handle<cl_device_id>;
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -32,13 +32,39 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
kernel::kernel(driver::module const & program, const char * name) : program_(program), address_bits_(program.context().device().address_bits()){
|
||||
cu_params_store_.reserve(64);
|
||||
cu_params_.reserve(64);
|
||||
dispatch::cuModuleGetFunction(&*cu_, program, name);
|
||||
|
||||
/* ------------------------ */
|
||||
// Base //
|
||||
/* ------------------------ */
|
||||
|
||||
kernel::kernel(driver::module *program, CUfunction fn, bool has_ownership):
|
||||
polymorphic_resource(fn, has_ownership), program_(program){
|
||||
}
|
||||
|
||||
void kernel::setArg(unsigned int index, std::size_t size, void* ptr){
|
||||
kernel::kernel(driver::module *program, cl_kernel fn, bool has_ownership):
|
||||
polymorphic_resource(fn, has_ownership), program_(program){
|
||||
}
|
||||
|
||||
driver::module* kernel::module() {
|
||||
return program_;
|
||||
}
|
||||
|
||||
/* ------------------------ */
|
||||
// OpenCL //
|
||||
/* ------------------------ */
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
cu_kernel::cu_kernel(driver::module *program, const char * name) : kernel(program, CUfunction(), true) {
|
||||
cu_params_store_.reserve(64);
|
||||
cu_params_.reserve(64);
|
||||
dispatch::cuModuleGetFunction(&*cu_, *program->cu(), name);
|
||||
}
|
||||
|
||||
void cu_kernel::setArg(unsigned int index, std::size_t size, void* ptr){
|
||||
if(index + 1> cu_params_store_.size()){
|
||||
cu_params_store_.resize(index+1);
|
||||
cu_params_.resize(index+1);
|
||||
@@ -48,18 +74,12 @@ void kernel::setArg(unsigned int index, std::size_t size, void* ptr){
|
||||
cu_params_[index] = cu_params_store_[index].get();
|
||||
}
|
||||
|
||||
void kernel::setArg(unsigned int index, buffer const & data)
|
||||
{ return setArg(index, (CUdeviceptr)data);}
|
||||
void cu_kernel::setArg(unsigned int index, cu_buffer const & data)
|
||||
{ return setArg(index, data.cu());}
|
||||
|
||||
void* const* kernel::cu_params() const
|
||||
void* const* cu_kernel::cu_params() const
|
||||
{ return cu_params_.data(); }
|
||||
|
||||
handle<CUfunction> const & kernel::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
driver::module const & kernel::module() const
|
||||
{ return program_; }
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@@ -46,9 +46,34 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
std::string module::compile_llvm_module(llvm::Module* module) {
|
||||
init_llvm();
|
||||
/* ------------------------ */
|
||||
// Base //
|
||||
/* ------------------------ */
|
||||
|
||||
module::module(driver::context* ctx, CUmodule mod, bool has_ownership)
|
||||
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
||||
}
|
||||
|
||||
module::module(driver::context* ctx, cl_program mod, bool has_ownership)
|
||||
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
||||
}
|
||||
|
||||
driver::context* module::context() const {
|
||||
return ctx_;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// OpenCL //
|
||||
/* ------------------------ */
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
std::string cu_module::compile_llvm_module(llvm::Module* module) {
|
||||
init_llvm();
|
||||
// create machine
|
||||
module->setTargetTriple("nvptx64-nvidia-cuda");
|
||||
std::string error;
|
||||
@@ -67,18 +92,17 @@ std::string module::compile_llvm_module(llvm::Module* module) {
|
||||
layout += "-p3:32:32-p4:32:32-p5:32:32";
|
||||
layout += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
|
||||
module->setDataLayout(layout);
|
||||
|
||||
// emit machine code
|
||||
llvm::legacy::PassManager pass;
|
||||
llvm::SmallVector<char, 0> buffer;
|
||||
llvm::raw_svector_ostream stream(buffer);
|
||||
machine->addPassesToEmitFile(pass, stream, nullptr, llvm::TargetMachine::CGFT_AssemblyFile);
|
||||
pass.run(*module);
|
||||
|
||||
// done
|
||||
return std::string(buffer.begin(), buffer.end());
|
||||
}
|
||||
|
||||
void module::init_llvm() {
|
||||
void cu_module::init_llvm() {
|
||||
static bool init = false;
|
||||
if(!init){
|
||||
llvm::InitializeAllTargetInfos();
|
||||
@@ -90,10 +114,10 @@ void module::init_llvm() {
|
||||
}
|
||||
}
|
||||
|
||||
module::module(driver::context const & context, llvm::Module* ll_module): module(context, compile_llvm_module(ll_module)){ }
|
||||
cu_module::cu_module(driver::context * context, llvm::Module* ll_module): cu_module(context, compile_llvm_module(ll_module)) { }
|
||||
|
||||
module::module(driver::context const & context, std::string const & source) : context_(context), source_(source){
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
cu_module::cu_module(driver::context * context, std::string const & source) : module(context, CUmodule(), true), source_(source){
|
||||
cu_context::context_switcher ctx_switch(*context);
|
||||
// JIT compile source-code
|
||||
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
||||
unsigned int errbufsize = 8096;
|
||||
@@ -108,17 +132,11 @@ module::module(driver::context const & context, std::string const & source) : co
|
||||
}
|
||||
}
|
||||
|
||||
driver::context const & module::context() const
|
||||
{ return context_; }
|
||||
|
||||
handle<CUmodule> const & module::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
buffer module::symbol(const char *name) const{
|
||||
cu_buffer cu_module::symbol(const char *name) const{
|
||||
CUdeviceptr handle;
|
||||
size_t size;
|
||||
dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name);
|
||||
return buffer(context_, handle, false);
|
||||
return cu_buffer(ctx_, handle, false);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -31,22 +31,46 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
std::string platform::version() const{
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
std::string cu_platform::version() const{
|
||||
int version;
|
||||
dispatch::cuDriverGetVersion(&version);
|
||||
return std::to_string(version);
|
||||
}
|
||||
|
||||
std::vector<device> platform::devices() const{
|
||||
std::vector<device> devices;
|
||||
void cu_platform::devices(std::vector<device *> &devices) const{
|
||||
int N;
|
||||
dispatch::cuDeviceGetCount(&N);
|
||||
for(int i = 0 ; i < N ; ++i){
|
||||
CUdevice dvc;
|
||||
dispatch::cuDeviceGet(&dvc, i);
|
||||
devices.push_back(driver::device(dvc));
|
||||
devices.push_back(new driver::cu_device(dvc));
|
||||
}
|
||||
return devices;
|
||||
}
|
||||
|
||||
/* ------------------------ */
|
||||
// OpenCL //
|
||||
/* ------------------------ */
|
||||
|
||||
std::string cl_platform::version() const {
|
||||
size_t size;
|
||||
dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, 0, nullptr, &size);
|
||||
std::string result(size, 0);
|
||||
dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, size, (void*)&*result.begin(), nullptr);
|
||||
return result;
|
||||
}
|
||||
|
||||
void cl_platform::devices(std::vector<device*> &devices) const{
|
||||
cl_uint num_devices;
|
||||
dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
|
||||
std::vector<cl_device_id> ids(num_devices);
|
||||
dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, num_devices, ids.data(), nullptr);
|
||||
for(cl_device_id id: ids)
|
||||
devices.push_back(new driver::ocl_device(id));
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -38,57 +38,84 @@ namespace triton
|
||||
namespace driver
|
||||
{
|
||||
|
||||
inline CUcontext cucontext(){
|
||||
/* ------------------------ */
|
||||
// Base //
|
||||
/* ------------------------ */
|
||||
|
||||
stream::stream(driver::context *ctx, CUstream cu, bool has_ownership)
|
||||
: polymorphic_resource(cu, has_ownership), ctx_(ctx) {
|
||||
|
||||
}
|
||||
|
||||
stream::stream(driver::context *ctx, cl_command_queue cl, bool has_ownership)
|
||||
: polymorphic_resource(cl, has_ownership), ctx_(ctx) {
|
||||
|
||||
}
|
||||
|
||||
driver::context* stream::context() const {
|
||||
return ctx_;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// OpenCL //
|
||||
/* ------------------------ */
|
||||
|
||||
|
||||
void cl_stream::synchronize() {
|
||||
dispatch::clFinish(*cl_);
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------ */
|
||||
// CUDA //
|
||||
/* ------------------------ */
|
||||
|
||||
inline CUcontext get_context() {
|
||||
CUcontext result;
|
||||
dispatch::cuCtxGetCurrent(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
stream::stream(CUstream stream, bool take_ownership): context_(cucontext(), take_ownership), cu_(stream, take_ownership)
|
||||
{}
|
||||
cu_stream::cu_stream(CUstream str, bool take_ownership):
|
||||
stream(backend::contexts::import(get_context()), str, take_ownership) {
|
||||
}
|
||||
|
||||
stream::stream(driver::context const & context): context_(context), cu_(CUstream(), true)
|
||||
{
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
cu_stream::cu_stream(driver::context *context): stream((driver::cu_context*)context, CUstream(), true) {
|
||||
cu_context::context_switcher ctx_switch(*ctx_);
|
||||
dispatch::cuStreamCreate(&*cu_, 0);
|
||||
}
|
||||
|
||||
void stream::synchronize()
|
||||
{
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
void cu_stream::synchronize() {
|
||||
cu_context::context_switcher ctx_switch(*ctx_);
|
||||
dispatch::cuStreamSynchronize(*cu_);
|
||||
}
|
||||
|
||||
driver::context const & stream::context() const
|
||||
{ return context_; }
|
||||
|
||||
void stream::enqueue(kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event){
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
void cu_stream::enqueue(driver::cu_kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event) {
|
||||
cu_context::context_switcher ctx_switch(*ctx_);
|
||||
if(event)
|
||||
dispatch::cuEventRecord(((cu_event_t)*event).first, *cu_);
|
||||
dispatch::cuLaunchKernel(kernel, grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL);
|
||||
dispatch::cuEventRecord(event->cu()->first, *cu_);
|
||||
dispatch::cuLaunchKernel(*kernel.cu(), grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL);
|
||||
if(event)
|
||||
dispatch::cuEventRecord(((cu_event_t)*event).second, *cu_);
|
||||
dispatch::cuEventRecord(event->cu()->second, *cu_);
|
||||
}
|
||||
|
||||
void stream::write(buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr){
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
void cu_stream::write(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr) {
|
||||
cu_context::context_switcher ctx_switch(*ctx_);
|
||||
if(blocking)
|
||||
dispatch::cuMemcpyHtoD(buffer + offset, ptr, size);
|
||||
dispatch::cuMemcpyHtoD(*buffer.cu() + offset, ptr, size);
|
||||
else
|
||||
dispatch::cuMemcpyHtoDAsync(buffer + offset, ptr, size, *cu_);
|
||||
dispatch::cuMemcpyHtoDAsync(*buffer.cu() + offset, ptr, size, *cu_);
|
||||
}
|
||||
|
||||
void stream::read(buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr){
|
||||
ContextSwitcher ctx_switch(context_);
|
||||
void cu_stream::read(driver::cu_buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr) {
|
||||
cu_context::context_switcher ctx_switch(*ctx_);
|
||||
if(blocking)
|
||||
dispatch::cuMemcpyDtoH(ptr, buffer + offset, size);
|
||||
dispatch::cuMemcpyDtoH(ptr, *buffer.cu() + offset, size);
|
||||
else
|
||||
dispatch::cuMemcpyDtoHAsync(ptr, buffer + offset, size, *cu_);
|
||||
dispatch::cuMemcpyDtoHAsync(ptr, *buffer.cu() + offset, size, *cu_);
|
||||
}
|
||||
|
||||
handle<CUstream> const & stream::cu() const
|
||||
{ return cu_; }
|
||||
|
||||
}
|
||||
|
||||
|
21
lib/jit.cpp
21
lib/jit.cpp
@@ -89,7 +89,7 @@ std::unique_ptr<ir::module> jit::make_triton_module(const std::string &src) {
|
||||
}
|
||||
|
||||
|
||||
jit::jit(driver::context context): driver_context_(context) {
|
||||
jit::jit(driver::context *context): driver_context_(context) {
|
||||
}
|
||||
|
||||
|
||||
@@ -131,15 +131,15 @@ void jit::autotune(const std::string &src, benchmark_t benchmark) {
|
||||
}
|
||||
passes.tune.init(tt_module);
|
||||
passes.init(tt_module);
|
||||
const driver::device &device = driver_context_.device();
|
||||
if(passes.allocation.get_allocated_size() > device.max_shared_memory())
|
||||
driver::cu_device* device = (driver::cu_device*)driver_context_->device();
|
||||
if(passes.allocation.get_allocated_size() > device->max_shared_memory())
|
||||
return;
|
||||
if(passes.tune.get_num_threads() > device.max_threads_per_block())
|
||||
if(passes.tune.get_num_threads() > device->max_threads_per_block())
|
||||
return;
|
||||
// Compile
|
||||
auto ll_module = make_llvm_module(tt_module, passes);
|
||||
driver::module module(driver_context_, &*ll_module);
|
||||
driver::kernel kernel(module, "matmul");
|
||||
driver::cu_module module(driver_context_, &*ll_module);
|
||||
driver::cu_kernel kernel(&module, "matmul");
|
||||
launch_information info = launch_info_map_.at("matmul");
|
||||
for(unsigned p: params)
|
||||
std::cout << p << " " << std::flush;
|
||||
@@ -166,12 +166,13 @@ void jit::add_module(ir::module &tt_module, const std::vector<unsigned> ¶ms)
|
||||
passes.tune.check_constraints(errors);
|
||||
if(errors.size())
|
||||
throw std::runtime_error("invalid parameters");
|
||||
if(passes.allocation.get_allocated_size() > driver_context_.device().max_shared_memory())
|
||||
driver::cu_device* device = (driver::cu_device*)driver_context_->device();
|
||||
if(passes.allocation.get_allocated_size() > device->max_shared_memory())
|
||||
throw std::runtime_error("invalid parameters");
|
||||
// triton module -> llvm module
|
||||
auto ll_module = make_llvm_module(tt_module, passes);
|
||||
// llvm module -> machine code
|
||||
modules_.push_back(driver::module(driver_context_, &*ll_module));
|
||||
modules_.push_back(driver::cu_module(driver_context_, &*ll_module));
|
||||
// add globals
|
||||
for(auto x: tt_module.globals())
|
||||
global_ints_[x.first] = ((ir::metaparameter*)x.second)->get_value();
|
||||
@@ -182,8 +183,8 @@ void jit::add_module(const std::string &src, const std::vector<unsigned> ¶ms
|
||||
add_module(*ptt_module, params);
|
||||
}
|
||||
|
||||
driver::kernel jit::get_function(const std::string &name) {
|
||||
return driver::kernel(modules_.front(), name.c_str());
|
||||
driver::cu_kernel jit::get_function(const std::string &name) {
|
||||
return driver::cu_kernel(&modules_.front(), name.c_str());
|
||||
}
|
||||
|
||||
jit::launch_information jit::get_launch_info(const std::string &name) {
|
||||
|
Reference in New Issue
Block a user