[DRIVER] Removed OpenCL support
There is no plan to support OpenCL anytime soon (Vulkan would be preferred). Removing the adequate portion of the driver code
This commit is contained in:
@@ -14,10 +14,9 @@ namespace driver
|
|||||||
class stream;
|
class stream;
|
||||||
|
|
||||||
// Base
|
// Base
|
||||||
class buffer : public polymorphic_resource<CUdeviceptr, cl_mem, host_buffer_t> {
|
class buffer : public polymorphic_resource<CUdeviceptr, host_buffer_t> {
|
||||||
public:
|
public:
|
||||||
buffer(driver::context* ctx, size_t size, CUdeviceptr cl, bool take_ownership);
|
buffer(driver::context* ctx, size_t size, CUdeviceptr cl, bool take_ownership);
|
||||||
buffer(driver::context* ctx, size_t size, cl_mem cl, bool take_ownership);
|
|
||||||
buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership);
|
buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership);
|
||||||
uintptr_t addr_as_uintptr_t();
|
uintptr_t addr_as_uintptr_t();
|
||||||
static buffer* create(driver::context* ctx, size_t size);
|
static buffer* create(driver::context* ctx, size_t size);
|
||||||
@@ -36,13 +35,6 @@ public:
|
|||||||
host_buffer(driver::context* context, size_t size);
|
host_buffer(driver::context* context, size_t size);
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class ocl_buffer: public buffer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ocl_buffer(driver::context* context, size_t size);
|
|
||||||
};
|
|
||||||
|
|
||||||
// CUDA
|
// CUDA
|
||||||
class cu_buffer: public buffer
|
class cu_buffer: public buffer
|
||||||
{
|
{
|
||||||
|
@@ -11,13 +11,12 @@ namespace triton
|
|||||||
namespace driver
|
namespace driver
|
||||||
{
|
{
|
||||||
|
|
||||||
class context: public polymorphic_resource<CUcontext, cl_context, host_context_t>{
|
class context: public polymorphic_resource<CUcontext, host_context_t>{
|
||||||
protected:
|
protected:
|
||||||
static std::string get_cache_path();
|
static std::string get_cache_path();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
context(driver::device *dev, CUcontext cu, bool take_ownership);
|
context(driver::device *dev, CUcontext cu, bool take_ownership);
|
||||||
context(driver::device *dev, cl_context cl, bool take_ownership);
|
|
||||||
context(driver::device *dev, host_context_t hst, bool take_ownership);
|
context(driver::device *dev, host_context_t hst, bool take_ownership);
|
||||||
driver::device* device() const;
|
driver::device* device() const;
|
||||||
std::string const & cache_path() const;
|
std::string const & cache_path() const;
|
||||||
@@ -55,15 +54,6 @@ public:
|
|||||||
cu_context(driver::device* dev);
|
cu_context(driver::device* dev);
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class ocl_context: public context {
|
|
||||||
public:
|
|
||||||
ocl_context(driver::device* dev);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -20,7 +20,7 @@ namespace driver
|
|||||||
class context;
|
class context;
|
||||||
|
|
||||||
// Base device
|
// Base device
|
||||||
class device: public polymorphic_resource<CUdevice, cl_device_id, host_device_t>{
|
class device: public polymorphic_resource<CUdevice, host_device_t>{
|
||||||
public:
|
public:
|
||||||
using polymorphic_resource::polymorphic_resource;
|
using polymorphic_resource::polymorphic_resource;
|
||||||
virtual size_t max_threads_per_block() const = 0;
|
virtual size_t max_threads_per_block() const = 0;
|
||||||
@@ -37,15 +37,6 @@ public:
|
|||||||
std::unique_ptr<codegen::target> make_target() const;
|
std::unique_ptr<codegen::target> make_target() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL device
|
|
||||||
class ocl_device: public device {
|
|
||||||
public:
|
|
||||||
ocl_device(cl_device_id cl, bool take_ownership = true): device(cl, take_ownership) { }
|
|
||||||
size_t max_threads_per_block() const;
|
|
||||||
size_t max_shared_memory() const;
|
|
||||||
std::unique_ptr<codegen::target> make_target() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
// CUDA device
|
// CUDA device
|
||||||
class cu_device: public device {
|
class cu_device: public device {
|
||||||
public:
|
public:
|
||||||
|
@@ -9,8 +9,6 @@
|
|||||||
//CUDA Backend
|
//CUDA Backend
|
||||||
#include "triton/external/CUDA/cuda.h"
|
#include "triton/external/CUDA/cuda.h"
|
||||||
#include "triton/external/CUDA/nvml.h"
|
#include "triton/external/CUDA/nvml.h"
|
||||||
#include "triton/external/CL/cl.h"
|
|
||||||
#include "triton/external/CL/cl_ext.h"
|
|
||||||
|
|
||||||
//Exceptions
|
//Exceptions
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@@ -30,7 +28,6 @@ class cu_context;
|
|||||||
|
|
||||||
template<class T> void check(T){}
|
template<class T> void check(T){}
|
||||||
void check(CUresult err);
|
void check(CUresult err);
|
||||||
void check(cl_int err);
|
|
||||||
|
|
||||||
class dispatch
|
class dispatch
|
||||||
{
|
{
|
||||||
@@ -61,48 +58,11 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static bool clinit();
|
|
||||||
static bool nvmlinit();
|
static bool nvmlinit();
|
||||||
static bool cuinit();
|
static bool cuinit();
|
||||||
static bool spvllvminit();
|
static bool spvllvminit();
|
||||||
static void release();
|
static void release();
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
|
|
||||||
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
|
||||||
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
|
|
||||||
static cl_int clReleaseMemObject(cl_mem);
|
|
||||||
static cl_int clFinish(cl_command_queue);
|
|
||||||
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clReleaseContext(cl_context);
|
|
||||||
static cl_int clReleaseEvent(cl_event);
|
|
||||||
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
|
||||||
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
|
||||||
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clReleaseDevice(cl_device_id);
|
|
||||||
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
|
|
||||||
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
|
|
||||||
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clReleaseCommandQueue(cl_command_queue);
|
|
||||||
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
|
|
||||||
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
|
|
||||||
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
|
|
||||||
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
|
|
||||||
static cl_int clRetainEvent(cl_event);
|
|
||||||
static cl_int clReleaseProgram(cl_program);
|
|
||||||
static cl_int clFlush(cl_command_queue);
|
|
||||||
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
|
|
||||||
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
|
|
||||||
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
|
|
||||||
static cl_int clCreateKernelsInProgram(cl_program, cl_uint, cl_kernel*, cl_uint*);
|
|
||||||
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
|
|
||||||
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
|
|
||||||
static cl_int clReleaseKernel(cl_kernel);
|
|
||||||
|
|
||||||
// CUDA
|
// CUDA
|
||||||
static CUresult cuCtxGetCurrent(CUcontext *pctx);
|
static CUresult cuCtxGetCurrent(CUcontext *pctx);
|
||||||
static CUresult cuCtxSetCurrent(CUcontext ctx);
|
static CUresult cuCtxSetCurrent(CUcontext ctx);
|
||||||
@@ -157,7 +117,6 @@ public:
|
|||||||
private:
|
private:
|
||||||
|
|
||||||
// Libraries
|
// Libraries
|
||||||
static void* opencl_;
|
|
||||||
static void* cuda_;
|
static void* cuda_;
|
||||||
static void* nvml_;
|
static void* nvml_;
|
||||||
static void* vulkan_;
|
static void* vulkan_;
|
||||||
@@ -165,41 +124,6 @@ private:
|
|||||||
static void* spvcross_;
|
static void* spvcross_;
|
||||||
static void* opengl_;
|
static void* opengl_;
|
||||||
|
|
||||||
// OpenCL functions
|
|
||||||
static void* clBuildProgram_;
|
|
||||||
static void* clEnqueueNDRangeKernel_;
|
|
||||||
static void* clSetKernelArg_;
|
|
||||||
static void* clReleaseMemObject_;
|
|
||||||
static void* clFinish_;
|
|
||||||
static void* clGetMemObjectInfo_;
|
|
||||||
static void* clGetCommandQueueInfo_;
|
|
||||||
static void* clReleaseContext_;
|
|
||||||
static void* clReleaseEvent_;
|
|
||||||
static void* clEnqueueWriteBuffer_;
|
|
||||||
static void* clEnqueueReadBuffer_;
|
|
||||||
static void* clGetProgramBuildInfo_;
|
|
||||||
static void* clReleaseDevice_;
|
|
||||||
static void* clCreateContext_;
|
|
||||||
static void* clGetDeviceIDs_;
|
|
||||||
static void* clGetContextInfo_;
|
|
||||||
static void* clGetDeviceInfo_;
|
|
||||||
static void* clReleaseCommandQueue_;
|
|
||||||
static void* clGetPlatformIDs_;
|
|
||||||
static void* clGetPlatformInfo_;
|
|
||||||
static void* clGetEventProfilingInfo_;
|
|
||||||
static void* clCreateProgramWithBinary_;
|
|
||||||
static void* clCreateCommandQueue_;
|
|
||||||
static void* clRetainEvent_;
|
|
||||||
static void* clReleaseProgram_;
|
|
||||||
static void* clFlush_;
|
|
||||||
static void* clGetProgramInfo_;
|
|
||||||
static void* clGetKernelInfo_;
|
|
||||||
static void* clGetKernelWorkGroupInfo_;
|
|
||||||
static void* clCreateKernel_;
|
|
||||||
static void* clCreateKernelsInProgram_;
|
|
||||||
static void* clCreateBuffer_;
|
|
||||||
static void* clCreateProgramWithSource_;
|
|
||||||
static void* clReleaseKernel_;
|
|
||||||
|
|
||||||
// CUDA functions
|
// CUDA functions
|
||||||
static void* cuCtxGetCurrent_;
|
static void* cuCtxGetCurrent_;
|
||||||
|
@@ -141,66 +141,6 @@ namespace triton
|
|||||||
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
|
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ocl
|
|
||||||
{
|
|
||||||
|
|
||||||
class base: public std::exception{};
|
|
||||||
|
|
||||||
#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } }
|
|
||||||
|
|
||||||
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level");
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size");
|
|
||||||
#ifdef CL_INVALID_PROPERTY
|
|
||||||
ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -33,7 +33,6 @@ namespace driver
|
|||||||
|
|
||||||
enum backend_t {
|
enum backend_t {
|
||||||
CUDA,
|
CUDA,
|
||||||
OpenCL,
|
|
||||||
Host
|
Host
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -120,24 +119,20 @@ protected:
|
|||||||
bool has_ownership_;
|
bool has_ownership_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class CUType, class CLType, class HostType>
|
template<class CUType, class HostType>
|
||||||
class polymorphic_resource {
|
class polymorphic_resource {
|
||||||
public:
|
public:
|
||||||
polymorphic_resource(CUType cu, bool take_ownership): cu_(cu, take_ownership), backend_(CUDA){}
|
polymorphic_resource(CUType cu, bool take_ownership): cu_(cu, take_ownership), backend_(CUDA){}
|
||||||
polymorphic_resource(CLType cl, bool take_ownership): cl_(cl, take_ownership), backend_(OpenCL){}
|
|
||||||
polymorphic_resource(HostType hst, bool take_ownership): hst_(hst, take_ownership), backend_(Host){}
|
polymorphic_resource(HostType hst, bool take_ownership): hst_(hst, take_ownership), backend_(Host){}
|
||||||
virtual ~polymorphic_resource() { }
|
virtual ~polymorphic_resource() { }
|
||||||
|
|
||||||
handle<CUType> cu() { return cu_; }
|
handle<CUType> cu() { return cu_; }
|
||||||
handle<CLType> cl() { return cl_; }
|
|
||||||
handle<HostType> hst() { return hst_; }
|
handle<HostType> hst() { return hst_; }
|
||||||
const handle<CUType>& cu() const { return cu_; }
|
const handle<CUType>& cu() const { return cu_; }
|
||||||
const handle<CLType>& cl() const { return cl_; }
|
|
||||||
const handle<HostType>& hst() const { return hst_; }
|
const handle<HostType>& hst() const { return hst_; }
|
||||||
backend_t backend() { return backend_; }
|
backend_t backend() { return backend_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
handle<CLType> cl_;
|
|
||||||
handle<CUType> cu_;
|
handle<CUType> cu_;
|
||||||
handle<HostType> hst_;
|
handle<HostType> hst_;
|
||||||
backend_t backend_;
|
backend_t backend_;
|
||||||
|
@@ -21,10 +21,9 @@ namespace driver
|
|||||||
class cu_buffer;
|
class cu_buffer;
|
||||||
|
|
||||||
// Base
|
// Base
|
||||||
class kernel: public polymorphic_resource<CUfunction, cl_kernel, host_function_t> {
|
class kernel: public polymorphic_resource<CUfunction, host_function_t> {
|
||||||
public:
|
public:
|
||||||
kernel(driver::module* program, CUfunction fn, bool has_ownership);
|
kernel(driver::module* program, CUfunction fn, bool has_ownership);
|
||||||
kernel(driver::module* program, cl_kernel fn, bool has_ownership);
|
|
||||||
kernel(driver::module* program, host_function_t fn, bool has_ownership);
|
kernel(driver::module* program, host_function_t fn, bool has_ownership);
|
||||||
// Getters
|
// Getters
|
||||||
driver::module* module();
|
driver::module* module();
|
||||||
@@ -53,17 +52,6 @@ private:
|
|||||||
std::vector<void*> params_;
|
std::vector<void*> params_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class ocl_kernel: public kernel {
|
|
||||||
public:
|
|
||||||
//Constructors
|
|
||||||
ocl_kernel(driver::module* program, const char* name);
|
|
||||||
// Arguments setters
|
|
||||||
void setArg(unsigned int index, std::size_t size, void* ptr);
|
|
||||||
void setArg(unsigned int index, driver::buffer* buffer);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
// CUDA
|
// CUDA
|
||||||
class cu_kernel: public kernel {
|
class cu_kernel: public kernel {
|
||||||
public:
|
public:
|
||||||
|
@@ -25,7 +25,7 @@ class cu_context;
|
|||||||
class cu_device;
|
class cu_device;
|
||||||
|
|
||||||
// Base
|
// Base
|
||||||
class module: public polymorphic_resource<CUmodule, cl_program, host_module_t> {
|
class module: public polymorphic_resource<CUmodule, host_module_t> {
|
||||||
protected:
|
protected:
|
||||||
void init_llvm();
|
void init_llvm();
|
||||||
|
|
||||||
@@ -36,7 +36,6 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
module(driver::context* ctx, CUmodule mod, bool has_ownership);
|
module(driver::context* ctx, CUmodule mod, bool has_ownership);
|
||||||
module(driver::context* ctx, cl_program mod, bool has_ownership);
|
|
||||||
module(driver::context* ctx, host_module_t mod, bool has_ownership);
|
module(driver::context* ctx, host_module_t mod, bool has_ownership);
|
||||||
static module* create(driver::context* ctx, std::unique_ptr<llvm::Module> src);
|
static module* create(driver::context* ctx, std::unique_ptr<llvm::Module> src);
|
||||||
driver::context* context() const;
|
driver::context* context() const;
|
||||||
@@ -59,13 +58,6 @@ public:
|
|||||||
std::unique_ptr<buffer> symbol(const char * name) const;
|
std::unique_ptr<buffer> symbol(const char * name) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class ocl_module: public module{
|
|
||||||
public:
|
|
||||||
ocl_module(driver::context* context, std::unique_ptr<llvm::Module> module);
|
|
||||||
std::unique_ptr<buffer> symbol(const char * name) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
// CUDA
|
// CUDA
|
||||||
class cu_module: public module {
|
class cu_module: public module {
|
||||||
std::string compile_llvm_module(std::unique_ptr<llvm::Module> module, driver::device* device);
|
std::string compile_llvm_module(std::unique_ptr<llvm::Module> module, driver::device* device);
|
||||||
|
@@ -42,18 +42,6 @@ private:
|
|||||||
handle<CUPlatform> cu_;
|
handle<CUPlatform> cu_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class cl_platform: public platform
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
cl_platform(cl_platform_id cl): platform("OpenCL"), cl_(cl) { }
|
|
||||||
std::string version() const;
|
|
||||||
void devices(std::vector<driver::device*> &devices) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
handle<cl_platform_id> cl_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Host
|
// Host
|
||||||
class host_platform: public platform
|
class host_platform: public platform
|
||||||
{
|
{
|
||||||
|
@@ -21,10 +21,9 @@ class Range;
|
|||||||
class cu_buffer;
|
class cu_buffer;
|
||||||
|
|
||||||
// Base
|
// Base
|
||||||
class stream: public polymorphic_resource<CUstream, cl_command_queue, host_stream_t> {
|
class stream: public polymorphic_resource<CUstream, host_stream_t> {
|
||||||
public:
|
public:
|
||||||
stream(driver::context *ctx, CUstream, bool has_ownership);
|
stream(driver::context *ctx, CUstream, bool has_ownership);
|
||||||
stream(driver::context *ctx, cl_command_queue, bool has_ownership);
|
|
||||||
stream(driver::context *ctx, host_stream_t, bool has_ownership);
|
stream(driver::context *ctx, host_stream_t, bool has_ownership);
|
||||||
// factory
|
// factory
|
||||||
static driver::stream* create(driver::context* ctx);
|
static driver::stream* create(driver::context* ctx);
|
||||||
@@ -58,19 +57,6 @@ public:
|
|||||||
void read(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
void read(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||||
};
|
};
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
class cl_stream: public stream {
|
|
||||||
public:
|
|
||||||
// Constructors
|
|
||||||
cl_stream(driver::context *ctx);
|
|
||||||
|
|
||||||
// Overridden
|
|
||||||
void synchronize();
|
|
||||||
void enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event *event, void **args, size_t args_size);
|
|
||||||
void write(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
|
|
||||||
void read(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
|
||||||
};
|
|
||||||
|
|
||||||
// CUDA
|
// CUDA
|
||||||
class cu_stream: public stream {
|
class cu_stream: public stream {
|
||||||
public:
|
public:
|
||||||
|
@@ -38,9 +38,6 @@ namespace driver
|
|||||||
buffer::buffer(driver::context* ctx, size_t size, CUdeviceptr cu, bool take_ownership)
|
buffer::buffer(driver::context* ctx, size_t size, CUdeviceptr cu, bool take_ownership)
|
||||||
: polymorphic_resource(cu, take_ownership), context_(ctx), size_(size) { }
|
: polymorphic_resource(cu, take_ownership), context_(ctx), size_(size) { }
|
||||||
|
|
||||||
buffer::buffer(driver::context* ctx, size_t size, cl_mem cl, bool take_ownership)
|
|
||||||
: polymorphic_resource(cl, take_ownership), context_(ctx), size_(size) { }
|
|
||||||
|
|
||||||
buffer::buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership)
|
buffer::buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership)
|
||||||
: polymorphic_resource(hst, take_ownership), context_(ctx), size_(size) { }
|
: polymorphic_resource(hst, take_ownership), context_(ctx), size_(size) { }
|
||||||
|
|
||||||
@@ -65,7 +62,6 @@ uintptr_t buffer::addr_as_uintptr_t() {
|
|||||||
buffer* buffer::create(driver::context* ctx, size_t size) {
|
buffer* buffer::create(driver::context* ctx, size_t size) {
|
||||||
switch(ctx->backend()){
|
switch(ctx->backend()){
|
||||||
case CUDA: return new cu_buffer(ctx, size);
|
case CUDA: return new cu_buffer(ctx, size);
|
||||||
case OpenCL: return new ocl_buffer(ctx, size);
|
|
||||||
case Host: return new host_buffer(ctx, size);
|
case Host: return new host_buffer(ctx, size);
|
||||||
default: throw std::runtime_error("unknown backend");
|
default: throw std::runtime_error("unknown backend");
|
||||||
}
|
}
|
||||||
@@ -78,15 +74,6 @@ host_buffer::host_buffer(driver::context *context, size_t size)
|
|||||||
hst_->data = new char[size];
|
hst_->data = new char[size];
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
|
|
||||||
ocl_buffer::ocl_buffer(driver::context* context, size_t size)
|
|
||||||
: buffer(context, size, cl_mem(), true){
|
|
||||||
cl_int err;
|
|
||||||
*cl_ = dispatch::clCreateBuffer(*context->cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
|
||||||
check(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
|
@@ -41,11 +41,6 @@ context::context(driver::device *dev, CUcontext cu, bool take_ownership):
|
|||||||
dev_(dev), cache_path_(get_cache_path()) {
|
dev_(dev), cache_path_(get_cache_path()) {
|
||||||
}
|
}
|
||||||
|
|
||||||
context::context(driver::device *dev, cl_context cl, bool take_ownership):
|
|
||||||
polymorphic_resource(cl, take_ownership),
|
|
||||||
dev_(dev), cache_path_(get_cache_path()){
|
|
||||||
}
|
|
||||||
|
|
||||||
context::context(driver::device *dev, host_context_t hst, bool take_ownership):
|
context::context(driver::device *dev, host_context_t hst, bool take_ownership):
|
||||||
polymorphic_resource(hst, take_ownership),
|
polymorphic_resource(hst, take_ownership),
|
||||||
dev_(dev), cache_path_(get_cache_path()){
|
dev_(dev), cache_path_(get_cache_path()){
|
||||||
@@ -54,7 +49,6 @@ context::context(driver::device *dev, host_context_t hst, bool take_ownership):
|
|||||||
context* context::create(driver::device *dev){
|
context* context::create(driver::device *dev){
|
||||||
switch(dev->backend()){
|
switch(dev->backend()){
|
||||||
case CUDA: return new cu_context(dev);
|
case CUDA: return new cu_context(dev);
|
||||||
case OpenCL: return new ocl_context(dev);
|
|
||||||
case Host: return new host_context(dev);
|
case Host: return new host_context(dev);
|
||||||
default: throw std::runtime_error("unknown backend");
|
default: throw std::runtime_error("unknown backend");
|
||||||
}
|
}
|
||||||
@@ -131,17 +125,5 @@ cu_context::cu_context(driver::device* device): context(device, CUcontext(), tru
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
ocl_context::ocl_context(driver::device* dev): context(dev, cl_context(), true) {
|
|
||||||
cl_int err;
|
|
||||||
*cl_ = dispatch::clCreateContext(nullptr, 1, &*dev->cl(), nullptr, nullptr, &err);
|
|
||||||
check(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -44,25 +44,6 @@ std::unique_ptr<codegen::target> host_device::make_target() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
// maximum amount of shared memory per block
|
|
||||||
size_t ocl_device::max_shared_memory() const {
|
|
||||||
throw std::runtime_error("not implemented");
|
|
||||||
// return ocl::info<CL_DEVICE_LOCAL_MEM_SIZE>(*cl_);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ocl_device::max_threads_per_block() const {
|
|
||||||
throw std::runtime_error("not implemented");
|
|
||||||
// return ocl::info<CL_DEVICE_MAX_WORK_ITEM_SIZES>(*cl_).at(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<codegen::target> ocl_device::make_target() const {
|
|
||||||
return std::unique_ptr<codegen::amd_cl_target>(new codegen::amd_cl_target());
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
// CUDA //
|
// CUDA //
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
|
@@ -72,17 +72,6 @@ namespace driver
|
|||||||
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
|
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
|
||||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
|
||||||
|
|
||||||
//Specialized helpers for OpenCL
|
|
||||||
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
|
|
||||||
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
|
|
||||||
#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3)
|
|
||||||
#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4)
|
|
||||||
#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5)
|
|
||||||
#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6)
|
|
||||||
#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
|
||||||
#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
|
||||||
#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
|
||||||
|
|
||||||
//Specialized helpers for CUDA
|
//Specialized helpers for CUDA
|
||||||
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
||||||
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||||
@@ -101,12 +90,6 @@ namespace driver
|
|||||||
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||||
#define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
|
#define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
|
||||||
|
|
||||||
bool dispatch::clinit()
|
|
||||||
{
|
|
||||||
if(opencl_==nullptr)
|
|
||||||
opencl_ = dlopen("libOpenCL.so", RTLD_LAZY);
|
|
||||||
return opencl_ != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool dispatch::cuinit(){
|
bool dispatch::cuinit(){
|
||||||
if(cuda_==nullptr){
|
if(cuda_==nullptr){
|
||||||
@@ -189,46 +172,6 @@ NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t
|
|||||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t, unsigned int, unsigned int)
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t, unsigned int, unsigned int)
|
||||||
|
|
||||||
// OpenCL
|
|
||||||
cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f)
|
|
||||||
{ return f_impl<dispatch::clinit>(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); }
|
|
||||||
|
|
||||||
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
|
|
||||||
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }
|
|
||||||
|
|
||||||
OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
|
|
||||||
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
|
|
||||||
OCL_DEFINE1(cl_int, clFinish, cl_command_queue)
|
|
||||||
OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseContext, cl_context)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseEvent, cl_event)
|
|
||||||
OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
|
|
||||||
OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
|
|
||||||
OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id)
|
|
||||||
OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue)
|
|
||||||
OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *)
|
|
||||||
OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *)
|
|
||||||
OCL_DEFINE1(cl_int, clRetainEvent, cl_event)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseProgram, cl_program)
|
|
||||||
OCL_DEFINE1(cl_int, clFlush, cl_command_queue)
|
|
||||||
OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
|
|
||||||
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
|
|
||||||
OCL_DEFINE4(cl_int, clCreateKernelsInProgram, cl_program, cl_uint, cl_kernel*, cl_uint*)
|
|
||||||
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
|
|
||||||
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
|
|
||||||
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
|
|
||||||
|
|
||||||
// LLVM to SPIR-V
|
// LLVM to SPIR-V
|
||||||
int dispatch::initializeLLVMToSPIRVPass(llvm::PassRegistry ®istry){
|
int dispatch::initializeLLVMToSPIRVPass(llvm::PassRegistry ®istry){
|
||||||
return f_impl<dispatch::spvllvminit>(spvllvm_, initializeLLVMToSPIRVPass, initializeLLVMToSPIRVPass_, "initializeLLVMToSPIRVPass", std::ref(registry));
|
return f_impl<dispatch::spvllvminit>(spvllvm_, initializeLLVMToSPIRVPass, initializeLLVMToSPIRVPass_, "initializeLLVMToSPIRVPass", std::ref(registry));
|
||||||
@@ -246,47 +189,10 @@ void dispatch::release(){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void * dispatch::opencl_;
|
|
||||||
void* dispatch::cuda_;
|
void* dispatch::cuda_;
|
||||||
void* dispatch::nvml_;
|
void* dispatch::nvml_;
|
||||||
void* dispatch::spvllvm_;
|
void* dispatch::spvllvm_;
|
||||||
|
|
||||||
//OpenCL
|
|
||||||
void* dispatch::clBuildProgram_;
|
|
||||||
void* dispatch::clEnqueueNDRangeKernel_;
|
|
||||||
void* dispatch::clSetKernelArg_;
|
|
||||||
void* dispatch::clReleaseMemObject_;
|
|
||||||
void* dispatch::clFinish_;
|
|
||||||
void* dispatch::clGetMemObjectInfo_;
|
|
||||||
void* dispatch::clGetCommandQueueInfo_;
|
|
||||||
void* dispatch::clReleaseContext_;
|
|
||||||
void* dispatch::clReleaseEvent_;
|
|
||||||
void* dispatch::clEnqueueWriteBuffer_;
|
|
||||||
void* dispatch::clEnqueueReadBuffer_;
|
|
||||||
void* dispatch::clGetProgramBuildInfo_;
|
|
||||||
void* dispatch::clReleaseDevice_;
|
|
||||||
void* dispatch::clCreateContext_;
|
|
||||||
void* dispatch::clGetDeviceIDs_;
|
|
||||||
void* dispatch::clGetContextInfo_;
|
|
||||||
void* dispatch::clGetDeviceInfo_;
|
|
||||||
void* dispatch::clReleaseCommandQueue_;
|
|
||||||
void* dispatch::clGetPlatformIDs_;
|
|
||||||
void* dispatch::clGetPlatformInfo_;
|
|
||||||
void* dispatch::clGetEventProfilingInfo_;
|
|
||||||
void* dispatch::clCreateProgramWithBinary_;
|
|
||||||
void* dispatch::clCreateCommandQueue_;
|
|
||||||
void* dispatch::clRetainEvent_;
|
|
||||||
void* dispatch::clReleaseProgram_;
|
|
||||||
void* dispatch::clFlush_;
|
|
||||||
void* dispatch::clGetProgramInfo_;
|
|
||||||
void* dispatch::clGetKernelInfo_;
|
|
||||||
void* dispatch::clGetKernelWorkGroupInfo_;
|
|
||||||
void* dispatch::clCreateKernel_;
|
|
||||||
void* dispatch::clCreateKernelsInProgram_;
|
|
||||||
void* dispatch::clCreateBuffer_;
|
|
||||||
void* dispatch::clCreateProgramWithSource_;
|
|
||||||
void* dispatch::clReleaseKernel_;
|
|
||||||
|
|
||||||
//CUDA
|
//CUDA
|
||||||
void* dispatch::cuCtxGetCurrent_;
|
void* dispatch::cuCtxGetCurrent_;
|
||||||
void* dispatch::cuCtxSetCurrent_;
|
void* dispatch::cuCtxSetCurrent_;
|
||||||
|
@@ -94,67 +94,6 @@ void check(CUresult err)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void check(cl_int err)
|
|
||||||
{
|
|
||||||
using namespace exception::ocl;
|
|
||||||
switch(err)
|
|
||||||
{
|
|
||||||
case CL_SUCCESS: break;
|
|
||||||
case CL_DEVICE_NOT_FOUND: throw device_not_found();
|
|
||||||
case CL_DEVICE_NOT_AVAILABLE: throw device_not_available();
|
|
||||||
case CL_COMPILER_NOT_AVAILABLE: throw compiler_not_available();
|
|
||||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: throw mem_object_allocation_failure();
|
|
||||||
case CL_OUT_OF_RESOURCES: throw out_of_resources();
|
|
||||||
case CL_OUT_OF_HOST_MEMORY: throw out_of_host_memory();
|
|
||||||
case CL_PROFILING_INFO_NOT_AVAILABLE: throw profiling_info_not_available();
|
|
||||||
case CL_MEM_COPY_OVERLAP: throw mem_copy_overlap();
|
|
||||||
case CL_IMAGE_FORMAT_MISMATCH: throw image_format_mismatch();
|
|
||||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: throw image_format_not_supported();
|
|
||||||
case CL_BUILD_PROGRAM_FAILURE: throw build_program_failure();
|
|
||||||
case CL_MAP_FAILURE: throw map_failure();
|
|
||||||
|
|
||||||
case CL_INVALID_VALUE: throw invalid_value();
|
|
||||||
case CL_INVALID_DEVICE_TYPE: throw invalid_device_type();
|
|
||||||
case CL_INVALID_PLATFORM: throw invalid_platform();
|
|
||||||
case CL_INVALID_DEVICE: throw invalid_device();
|
|
||||||
case CL_INVALID_CONTEXT: throw invalid_context();
|
|
||||||
case CL_INVALID_QUEUE_PROPERTIES: throw invalid_queue_properties();
|
|
||||||
case CL_INVALID_COMMAND_QUEUE: throw invalid_command_queue();
|
|
||||||
case CL_INVALID_HOST_PTR: throw invalid_host_ptr();
|
|
||||||
case CL_INVALID_MEM_OBJECT: throw invalid_mem_object();
|
|
||||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: throw invalid_image_format_descriptor();
|
|
||||||
case CL_INVALID_IMAGE_SIZE: throw invalid_image_size();
|
|
||||||
case CL_INVALID_SAMPLER: throw invalid_sampler();
|
|
||||||
case CL_INVALID_BINARY: throw invalid_binary();
|
|
||||||
case CL_INVALID_BUILD_OPTIONS: throw invalid_build_options();
|
|
||||||
case CL_INVALID_PROGRAM: throw invalid_program();
|
|
||||||
case CL_INVALID_PROGRAM_EXECUTABLE: throw invalid_program_executable();
|
|
||||||
case CL_INVALID_KERNEL_NAME: throw invalid_kernel_name();
|
|
||||||
case CL_INVALID_KERNEL_DEFINITION: throw invalid_kernel_definition();
|
|
||||||
case CL_INVALID_KERNEL: throw invalid_kernel();
|
|
||||||
case CL_INVALID_ARG_INDEX: throw invalid_arg_index();
|
|
||||||
case CL_INVALID_ARG_VALUE: throw invalid_arg_value();
|
|
||||||
case CL_INVALID_ARG_SIZE: throw invalid_arg_size();
|
|
||||||
case CL_INVALID_KERNEL_ARGS: throw invalid_kernel_args();
|
|
||||||
case CL_INVALID_WORK_DIMENSION: throw invalid_work_dimension();
|
|
||||||
case CL_INVALID_WORK_GROUP_SIZE: throw invalid_work_group_size();
|
|
||||||
case CL_INVALID_WORK_ITEM_SIZE: throw invalid_work_item_size();
|
|
||||||
case CL_INVALID_GLOBAL_OFFSET: throw invalid_global_offset();
|
|
||||||
case CL_INVALID_EVENT_WAIT_LIST: throw invalid_event_wait_list();
|
|
||||||
case CL_INVALID_EVENT: throw invalid_event();
|
|
||||||
case CL_INVALID_OPERATION: throw invalid_operation();
|
|
||||||
case CL_INVALID_GL_OBJECT: throw invalid_gl_object();
|
|
||||||
case CL_INVALID_BUFFER_SIZE: throw invalid_buffer_size();
|
|
||||||
case CL_INVALID_MIP_LEVEL: throw invalid_mip_level();
|
|
||||||
case CL_INVALID_GLOBAL_WORK_SIZE: throw invalid_global_work_size();
|
|
||||||
#ifdef CL_INVALID_PROPERTY
|
|
||||||
case CL_INVALID_PROPERTY: throw invalid_property();
|
|
||||||
#endif
|
|
||||||
default: throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -38,15 +38,6 @@ inline void _delete(host_stream_t) { }
|
|||||||
inline void _delete(host_buffer_t x) { if(x.data) delete[] x.data; }
|
inline void _delete(host_buffer_t x) { if(x.data) delete[] x.data; }
|
||||||
inline void _delete(host_function_t) { }
|
inline void _delete(host_function_t) { }
|
||||||
|
|
||||||
//OpenCL
|
|
||||||
inline void _delete(cl_platform_id) { }
|
|
||||||
inline void _delete(cl_device_id x) { dispatch::clReleaseDevice(x); }
|
|
||||||
inline void _delete(cl_context x) { dispatch::clReleaseContext(x); }
|
|
||||||
inline void _delete(cl_program x) { dispatch::clReleaseProgram(x); }
|
|
||||||
inline void _delete(cl_kernel x) { dispatch::clReleaseKernel(x); }
|
|
||||||
inline void _delete(cl_command_queue x) { dispatch::clReleaseCommandQueue(x); }
|
|
||||||
inline void _delete(cl_mem x) { dispatch::clReleaseMemObject(x); }
|
|
||||||
|
|
||||||
//CUDA
|
//CUDA
|
||||||
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
|
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
|
||||||
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
|
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
|
||||||
@@ -87,14 +78,6 @@ template class handle<CUfunction>;
|
|||||||
template class handle<CUmodule>;
|
template class handle<CUmodule>;
|
||||||
template class handle<CUPlatform>;
|
template class handle<CUPlatform>;
|
||||||
|
|
||||||
template class handle<cl_platform_id>;
|
|
||||||
template class handle<cl_device_id>;
|
|
||||||
template class handle<cl_context>;
|
|
||||||
template class handle<cl_program>;
|
|
||||||
template class handle<cl_command_queue>;
|
|
||||||
template class handle<cl_mem>;
|
|
||||||
template class handle<cl_kernel>;
|
|
||||||
|
|
||||||
template class handle<host_platform_t>;
|
template class handle<host_platform_t>;
|
||||||
template class handle<host_device_t>;
|
template class handle<host_device_t>;
|
||||||
template class handle<host_context_t>;
|
template class handle<host_context_t>;
|
||||||
|
@@ -39,9 +39,6 @@ kernel::kernel(driver::module *program, CUfunction fn, bool has_ownership):
|
|||||||
polymorphic_resource(fn, has_ownership), program_(program){
|
polymorphic_resource(fn, has_ownership), program_(program){
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel::kernel(driver::module *program, cl_kernel fn, bool has_ownership):
|
|
||||||
polymorphic_resource(fn, has_ownership), program_(program){
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel::kernel(driver::module *program, host_function_t fn, bool has_ownership):
|
kernel::kernel(driver::module *program, host_function_t fn, bool has_ownership):
|
||||||
polymorphic_resource(fn, has_ownership), program_(program){
|
polymorphic_resource(fn, has_ownership), program_(program){
|
||||||
@@ -50,7 +47,6 @@ kernel::kernel(driver::module *program, host_function_t fn, bool has_ownership):
|
|||||||
kernel* kernel::create(driver::module* program, const char* name) {
|
kernel* kernel::create(driver::module* program, const char* name) {
|
||||||
switch(program->backend()){
|
switch(program->backend()){
|
||||||
case CUDA: return new cu_kernel(program, name);
|
case CUDA: return new cu_kernel(program, name);
|
||||||
case OpenCL: return new ocl_kernel(program, name);
|
|
||||||
case Host: return new host_kernel(program, name);
|
case Host: return new host_kernel(program, name);
|
||||||
default: throw std::runtime_error("unknown backend");
|
default: throw std::runtime_error("unknown backend");
|
||||||
}
|
}
|
||||||
@@ -89,31 +85,6 @@ const std::vector<void *> &host_kernel::params(){
|
|||||||
return params_;
|
return params_;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
ocl_kernel::ocl_kernel(driver::module* program, const char* name): kernel(program, cl_kernel(), true) {
|
|
||||||
// cl_uint res;
|
|
||||||
// check(dispatch::clCreateKernelsInProgram(*program->cl(), 0, NULL, &res));
|
|
||||||
// std::cout << res << std::endl;
|
|
||||||
cl_int err;
|
|
||||||
*cl_ = dispatch::clCreateKernel(*program->cl(), "matmul", &err);
|
|
||||||
check(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ocl_kernel::setArg(unsigned int index, std::size_t size, void* ptr) {
|
|
||||||
check(dispatch::clSetKernelArg(*cl_, index, size, ptr));
|
|
||||||
}
|
|
||||||
|
|
||||||
void ocl_kernel::setArg(unsigned int index, driver::buffer* buffer) {
|
|
||||||
if(buffer)
|
|
||||||
check(dispatch::clSetKernelArg(*cl_, index, sizeof(cl_mem), (void*)&*buffer->cl()));
|
|
||||||
else
|
|
||||||
kernel::setArg(index, (std::ptrdiff_t)0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
// CUDA //
|
// CUDA //
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
|
@@ -64,10 +64,6 @@ module::module(driver::context* ctx, CUmodule mod, bool has_ownership)
|
|||||||
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
module::module(driver::context* ctx, cl_program mod, bool has_ownership)
|
|
||||||
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
|
||||||
}
|
|
||||||
|
|
||||||
module::module(driver::context* ctx, host_module_t mod, bool has_ownership)
|
module::module(driver::context* ctx, host_module_t mod, bool has_ownership)
|
||||||
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
: polymorphic_resource(mod, has_ownership), ctx_(ctx) {
|
||||||
}
|
}
|
||||||
@@ -79,7 +75,6 @@ driver::context* module::context() const {
|
|||||||
module* module::create(driver::context* ctx, std::unique_ptr<llvm::Module> src) {
|
module* module::create(driver::context* ctx, std::unique_ptr<llvm::Module> src) {
|
||||||
switch(ctx->backend()){
|
switch(ctx->backend()){
|
||||||
case CUDA: return new cu_module(ctx, std::move(src));
|
case CUDA: return new cu_module(ctx, std::move(src));
|
||||||
case OpenCL: return new ocl_module(ctx, std::move(src));
|
|
||||||
case Host: return new host_module(ctx, std::move(src));
|
case Host: return new host_module(ctx, std::move(src));
|
||||||
default: throw std::runtime_error("unknown backend");
|
default: throw std::runtime_error("unknown backend");
|
||||||
}
|
}
|
||||||
@@ -214,42 +209,6 @@ std::unique_ptr<buffer> host_module::symbol(const char *name) const {
|
|||||||
throw std::runtime_error("not implemented");
|
throw std::runtime_error("not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
ocl_module::ocl_module(driver::context * context, std::unique_ptr<llvm::Module> src): module(context, cl_program(), true) {
|
|
||||||
throw std::runtime_error("not supported");
|
|
||||||
// init_llvm();
|
|
||||||
// llvm::SmallVector<char, 0> buffer;
|
|
||||||
// module::compile_llvm_module(src, "amdgcn-amd-amdhsa-amdgizcl", "gfx902", "", buffer, "code-object-v3", Object);
|
|
||||||
// std::ofstream output("/tmp/tmp.o", std::ios::binary);
|
|
||||||
// std::copy(buffer.begin(), buffer.end(), std::ostreambuf_iterator<char>(output));
|
|
||||||
// system("ld.lld-8 /tmp/tmp.o -shared -o /tmp/tmp.o");
|
|
||||||
// std::ifstream input("/tmp/tmp.o", std::ios::in | std::ios::binary );
|
|
||||||
// std::vector<unsigned char> in_buffer(std::istreambuf_iterator<char>(input), {});
|
|
||||||
// size_t sizes[] = {in_buffer.size()};
|
|
||||||
// const unsigned char* data[] = {(unsigned char*)in_buffer.data()};
|
|
||||||
// cl_int status;
|
|
||||||
// cl_int err;
|
|
||||||
// *cl_ = dispatch::clCreateProgramWithBinary(*context->cl(), 1, &*context->device()->cl(), sizes, data, &status, &err);
|
|
||||||
// check(status);
|
|
||||||
// check(err);
|
|
||||||
// try{
|
|
||||||
// dispatch::clBuildProgram(*cl_, 1, &*context->device()->cl(), NULL, NULL, NULL);
|
|
||||||
// }
|
|
||||||
// catch(...){
|
|
||||||
// char log[2048];
|
|
||||||
// dispatch::clGetProgramBuildInfo(*cl_, *context->device()->cl(), CL_PROGRAM_BUILD_LOG, 1024, log, NULL);
|
|
||||||
// throw;
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<buffer> ocl_module::symbol(const char *name) const {
|
|
||||||
throw std::runtime_error("not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
// CUDA //
|
// CUDA //
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
|
@@ -51,27 +51,6 @@ void cu_platform::devices(std::vector<device *> &devices) const{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
std::string cl_platform::version() const {
|
|
||||||
size_t size;
|
|
||||||
check(dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, 0, nullptr, &size));
|
|
||||||
std::string result(size, 0);
|
|
||||||
check(dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, size, (void*)&*result.begin(), nullptr));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void cl_platform::devices(std::vector<device*> &devices) const{
|
|
||||||
cl_uint num_devices;
|
|
||||||
check(dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices));
|
|
||||||
std::vector<cl_device_id> ids(num_devices);
|
|
||||||
check(dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, num_devices, ids.data(), nullptr));
|
|
||||||
for(cl_device_id id: ids)
|
|
||||||
devices.push_back(new driver::ocl_device(id));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
// Host //
|
// Host //
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
|
@@ -46,9 +46,6 @@ stream::stream(driver::context *ctx, CUstream cu, bool has_ownership)
|
|||||||
: polymorphic_resource(cu, has_ownership), ctx_(ctx) {
|
: polymorphic_resource(cu, has_ownership), ctx_(ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
stream::stream(driver::context *ctx, cl_command_queue cl, bool has_ownership)
|
|
||||||
: polymorphic_resource(cl, has_ownership), ctx_(ctx) {
|
|
||||||
}
|
|
||||||
|
|
||||||
stream::stream(driver::context *ctx, host_stream_t cl, bool has_ownership)
|
stream::stream(driver::context *ctx, host_stream_t cl, bool has_ownership)
|
||||||
: polymorphic_resource(cl, has_ownership), ctx_(ctx) {
|
: polymorphic_resource(cl, has_ownership), ctx_(ctx) {
|
||||||
@@ -57,7 +54,6 @@ stream::stream(driver::context *ctx, host_stream_t cl, bool has_ownership)
|
|||||||
driver::stream* stream::create(driver::context* ctx) {
|
driver::stream* stream::create(driver::context* ctx) {
|
||||||
switch(ctx->backend()){
|
switch(ctx->backend()){
|
||||||
case CUDA: return new cu_stream(ctx);
|
case CUDA: return new cu_stream(ctx);
|
||||||
case OpenCL: return new cl_stream(ctx);
|
|
||||||
case Host: return new host_stream(ctx);
|
case Host: return new host_stream(ctx);
|
||||||
default: throw std::runtime_error("unknown backend");
|
default: throw std::runtime_error("unknown backend");
|
||||||
}
|
}
|
||||||
@@ -97,33 +93,6 @@ void host_stream::read(driver::buffer* buffer, bool blocking, std::size_t offset
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ */
|
|
||||||
// OpenCL //
|
|
||||||
/* ------------------------ */
|
|
||||||
|
|
||||||
cl_stream::cl_stream(driver::context *ctx): stream(ctx, cl_command_queue(), true) {
|
|
||||||
cl_int err;
|
|
||||||
*cl_ = dispatch::clCreateCommandQueue(*ctx->cl(), *ctx->device()->cl(), 0, &err);
|
|
||||||
check(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cl_stream::synchronize() {
|
|
||||||
check(dispatch::clFinish(*cl_));
|
|
||||||
}
|
|
||||||
|
|
||||||
void cl_stream::enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event* event, void **args, size_t args_size) {
|
|
||||||
std::array<size_t, 3> global = {grid[0]*block[0], grid[1]*block[1], grid[2]*block[2]};
|
|
||||||
check(dispatch::clEnqueueNDRangeKernel(*cl_, *kernel->cl(), grid.size(), NULL, (const size_t*)global.data(), (const size_t*)block.data(), 0, NULL, NULL));
|
|
||||||
}
|
|
||||||
|
|
||||||
void cl_stream::write(driver::buffer* buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr) {
|
|
||||||
check(dispatch::clEnqueueWriteBuffer(*cl_, *buffer->cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
|
||||||
}
|
|
||||||
|
|
||||||
void cl_stream::read(driver::buffer* buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr) {
|
|
||||||
check(dispatch::clEnqueueReadBuffer(*cl_, *buffer->cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
// CUDA //
|
// CUDA //
|
||||||
/* ------------------------ */
|
/* ------------------------ */
|
||||||
|
@@ -10,7 +10,7 @@ namespace triton{
|
|||||||
namespace ir{
|
namespace ir{
|
||||||
|
|
||||||
builder::builder(context &ctx):
|
builder::builder(context &ctx):
|
||||||
ctx_(ctx), block_(nullptr), insert_point_(nullptr) {}
|
ctx_(ctx), block_(nullptr) {}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// utilities
|
// utilities
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
#include <regex>
|
#include <regex>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include "triton/codegen/analysis/axes.h"
|
#include "triton/codegen/analysis/axes.h"
|
||||||
#include "triton/codegen/analysis/allocation.h"
|
#include "triton/codegen/analysis/allocation.h"
|
||||||
|
Reference in New Issue
Block a user