diff --git a/include/triton/driver/buffer.h b/include/triton/driver/buffer.h index 3817ca4dd..f64e81fc2 100755 --- a/include/triton/driver/buffer.h +++ b/include/triton/driver/buffer.h @@ -14,10 +14,9 @@ namespace driver class stream; // Base -class buffer : public polymorphic_resource { +class buffer : public polymorphic_resource { public: buffer(driver::context* ctx, size_t size, CUdeviceptr cl, bool take_ownership); - buffer(driver::context* ctx, size_t size, cl_mem cl, bool take_ownership); buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership); uintptr_t addr_as_uintptr_t(); static buffer* create(driver::context* ctx, size_t size); @@ -36,13 +35,6 @@ public: host_buffer(driver::context* context, size_t size); }; -// OpenCL -class ocl_buffer: public buffer -{ -public: - ocl_buffer(driver::context* context, size_t size); -}; - // CUDA class cu_buffer: public buffer { diff --git a/include/triton/driver/context.h b/include/triton/driver/context.h index 9e368972d..d893ee87a 100755 --- a/include/triton/driver/context.h +++ b/include/triton/driver/context.h @@ -11,13 +11,12 @@ namespace triton namespace driver { -class context: public polymorphic_resource{ +class context: public polymorphic_resource{ protected: static std::string get_cache_path(); public: context(driver::device *dev, CUcontext cu, bool take_ownership); - context(driver::device *dev, cl_context cl, bool take_ownership); context(driver::device *dev, host_context_t hst, bool take_ownership); driver::device* device() const; std::string const & cache_path() const; @@ -55,15 +54,6 @@ public: cu_context(driver::device* dev); }; -// OpenCL -class ocl_context: public context { -public: - ocl_context(driver::device* dev); -}; - - - - } } diff --git a/include/triton/driver/device.h b/include/triton/driver/device.h index df119a272..8110c0bc7 100755 --- a/include/triton/driver/device.h +++ b/include/triton/driver/device.h @@ -20,7 +20,7 @@ namespace driver class context; // Base device -class device: public polymorphic_resource{ +class device: public polymorphic_resource{ public: using polymorphic_resource::polymorphic_resource; virtual size_t max_threads_per_block() const = 0; @@ -37,15 +37,6 @@ public: std::unique_ptr make_target() const; }; -// OpenCL device -class ocl_device: public device { -public: - ocl_device(cl_device_id cl, bool take_ownership = true): device(cl, take_ownership) { } - size_t max_threads_per_block() const; - size_t max_shared_memory() const; - std::unique_ptr make_target() const; -}; - // CUDA device class cu_device: public device { public: diff --git a/include/triton/driver/dispatch.h b/include/triton/driver/dispatch.h index ed717a7fb..2518c8005 100755 --- a/include/triton/driver/dispatch.h +++ b/include/triton/driver/dispatch.h @@ -9,8 +9,6 @@ //CUDA Backend #include "triton/external/CUDA/cuda.h" #include "triton/external/CUDA/nvml.h" -#include "triton/external/CL/cl.h" -#include "triton/external/CL/cl_ext.h" //Exceptions #include @@ -30,7 +28,6 @@ class cu_context; template void check(T){} void check(CUresult err); -void check(cl_int err); class dispatch { @@ -61,48 +58,11 @@ protected: } public: - static bool clinit(); static bool nvmlinit(); static bool cuinit(); static bool spvllvminit(); static void release(); - // OpenCL - static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *); - static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *); - static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *); - static cl_int clReleaseMemObject(cl_mem); - static cl_int clFinish(cl_command_queue); - static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *); - static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *); - static cl_int clReleaseContext(cl_context); - static cl_int clReleaseEvent(cl_event); - static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *); - static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *); - static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *); - static cl_int clReleaseDevice(cl_device_id); - static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *); - static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); - static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *); - static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *); - static cl_int clReleaseCommandQueue(cl_command_queue); - static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *); - static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *); - static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *); - static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *); - static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *); - static cl_int clRetainEvent(cl_event); - static cl_int clReleaseProgram(cl_program); - static cl_int clFlush(cl_command_queue); - static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *); - static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *); - static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *); - static cl_kernel clCreateKernel(cl_program, const char *, cl_int *); - static cl_int clCreateKernelsInProgram(cl_program, cl_uint, cl_kernel*, cl_uint*); - static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *); - static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *); - static cl_int clReleaseKernel(cl_kernel); - // CUDA static CUresult cuCtxGetCurrent(CUcontext *pctx); static CUresult cuCtxSetCurrent(CUcontext ctx); @@ -157,7 +117,6 @@ public: private: // Libraries - static void* opencl_; static void* cuda_; static void* nvml_; static void* vulkan_; @@ -165,41 +124,6 @@ private: static void* spvcross_; static void* opengl_; - // OpenCL functions - static void* clBuildProgram_; - static void* clEnqueueNDRangeKernel_; - static void* clSetKernelArg_; - static void* clReleaseMemObject_; - static void* clFinish_; - static void* clGetMemObjectInfo_; - static void* clGetCommandQueueInfo_; - static void* clReleaseContext_; - static void* clReleaseEvent_; - static void* clEnqueueWriteBuffer_; - static void* clEnqueueReadBuffer_; - static void* clGetProgramBuildInfo_; - static void* clReleaseDevice_; - static void* clCreateContext_; - static void* clGetDeviceIDs_; - static void* clGetContextInfo_; - static void* clGetDeviceInfo_; - static void* clReleaseCommandQueue_; - static void* clGetPlatformIDs_; - static void* clGetPlatformInfo_; - static void* clGetEventProfilingInfo_; - static void* clCreateProgramWithBinary_; - static void* clCreateCommandQueue_; - static void* clRetainEvent_; - static void* clReleaseProgram_; - static void* clFlush_; - static void* clGetProgramInfo_; - static void* clGetKernelInfo_; - static void* clGetKernelWorkGroupInfo_; - static void* clCreateKernel_; - static void* clCreateKernelsInProgram_; - static void* clCreateBuffer_; - static void* clCreateProgramWithSource_; - static void* clReleaseKernel_; // CUDA functions static void* cuCtxGetCurrent_; diff --git a/include/triton/driver/error.h b/include/triton/driver/error.h index 5091faf3e..affbae94a 100755 --- a/include/triton/driver/error.h +++ b/include/triton/driver/error.h @@ -141,66 +141,6 @@ namespace triton ISAAC_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow"); } - namespace ocl - { - - class base: public std::exception{}; - -#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } } - - - ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found"); - ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available"); - ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available"); - ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure"); - ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources"); - ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory"); - ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available"); - ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap"); - ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch"); - ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported"); - ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure"); - ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure"); - ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value"); - ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type"); - ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform"); - ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device"); - ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context"); - ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties"); - ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue"); - ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer"); - ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object"); - ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor"); - ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size"); - ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler"); - ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary"); - ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options"); - ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program"); - ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable"); - ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name"); - ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition"); - ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel"); - ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index"); - ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value"); - ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size"); - ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args"); - ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension"); - ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size"); - ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size"); - ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset"); - ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list"); - ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event"); - ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation"); - ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object"); - ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size"); - ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level"); - ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size"); -#ifdef CL_INVALID_PROPERTY - ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property"); -#endif - } - - } } } diff --git a/include/triton/driver/handle.h b/include/triton/driver/handle.h index 2e512ddde..d750aeba5 100755 --- a/include/triton/driver/handle.h +++ b/include/triton/driver/handle.h @@ -33,7 +33,6 @@ namespace driver enum backend_t { CUDA, - OpenCL, Host }; @@ -120,24 +119,20 @@ protected: bool has_ownership_; }; -template +template class polymorphic_resource { public: polymorphic_resource(CUType cu, bool take_ownership): cu_(cu, take_ownership), backend_(CUDA){} - polymorphic_resource(CLType cl, bool take_ownership): cl_(cl, take_ownership), backend_(OpenCL){} polymorphic_resource(HostType hst, bool take_ownership): hst_(hst, take_ownership), backend_(Host){} virtual ~polymorphic_resource() { } handle cu() { return cu_; } - handle cl() { return cl_; } handle hst() { return hst_; } const handle& cu() const { return cu_; } - const handle& cl() const { return cl_; } const handle& hst() const { return hst_; } backend_t backend() { return backend_; } protected: - handle cl_; handle cu_; handle hst_; backend_t backend_; diff --git a/include/triton/driver/kernel.h b/include/triton/driver/kernel.h index b45755ee7..0aa7efc5e 100755 --- a/include/triton/driver/kernel.h +++ b/include/triton/driver/kernel.h @@ -21,10 +21,9 @@ namespace driver class cu_buffer; // Base -class kernel: public polymorphic_resource { +class kernel: public polymorphic_resource { public: kernel(driver::module* program, CUfunction fn, bool has_ownership); - kernel(driver::module* program, cl_kernel fn, bool has_ownership); kernel(driver::module* program, host_function_t fn, bool has_ownership); // Getters driver::module* module(); @@ -53,17 +52,6 @@ private: std::vector params_; }; -// OpenCL -class ocl_kernel: public kernel { -public: - //Constructors - ocl_kernel(driver::module* program, const char* name); - // Arguments setters - void setArg(unsigned int index, std::size_t size, void* ptr); - void setArg(unsigned int index, driver::buffer* buffer); - -}; - // CUDA class cu_kernel: public kernel { public: diff --git a/include/triton/driver/module.h b/include/triton/driver/module.h index 2e6a390e9..991af82ae 100755 --- a/include/triton/driver/module.h +++ b/include/triton/driver/module.h @@ -25,7 +25,7 @@ class cu_context; class cu_device; // Base -class module: public polymorphic_resource { +class module: public polymorphic_resource { protected: void init_llvm(); @@ -36,7 +36,6 @@ protected: public: module(driver::context* ctx, CUmodule mod, bool has_ownership); - module(driver::context* ctx, cl_program mod, bool has_ownership); module(driver::context* ctx, host_module_t mod, bool has_ownership); static module* create(driver::context* ctx, std::unique_ptr src); driver::context* context() const; @@ -59,13 +58,6 @@ public: std::unique_ptr symbol(const char * name) const; }; -// OpenCL -class ocl_module: public module{ -public: - ocl_module(driver::context* context, std::unique_ptr module); - std::unique_ptr symbol(const char * name) const; -}; - // CUDA class cu_module: public module { std::string compile_llvm_module(std::unique_ptr module, driver::device* device); diff --git a/include/triton/driver/platform.h b/include/triton/driver/platform.h index ff4e83b9d..8f64bf3f5 100755 --- a/include/triton/driver/platform.h +++ b/include/triton/driver/platform.h @@ -42,18 +42,6 @@ private: handle cu_; }; -// OpenCL -class cl_platform: public platform -{ -public: - cl_platform(cl_platform_id cl): platform("OpenCL"), cl_(cl) { } - std::string version() const; - void devices(std::vector &devices) const; - -private: - handle cl_; -}; - // Host class host_platform: public platform { diff --git a/include/triton/driver/stream.h b/include/triton/driver/stream.h index 7b70fd584..df4c6ad5f 100755 --- a/include/triton/driver/stream.h +++ b/include/triton/driver/stream.h @@ -21,10 +21,9 @@ class Range; class cu_buffer; // Base -class stream: public polymorphic_resource { +class stream: public polymorphic_resource { public: stream(driver::context *ctx, CUstream, bool has_ownership); - stream(driver::context *ctx, cl_command_queue, bool has_ownership); stream(driver::context *ctx, host_stream_t, bool has_ownership); // factory static driver::stream* create(driver::context* ctx); @@ -58,19 +57,6 @@ public: void read(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void* ptr); }; -// OpenCL -class cl_stream: public stream { -public: - // Constructors - cl_stream(driver::context *ctx); - - // Overridden - void synchronize(); - void enqueue(driver::kernel* kernel, std::array grid, std::array block, std::vector const *, event *event, void **args, size_t args_size); - void write(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void const* ptr); - void read(driver::buffer* buf, bool blocking, std::size_t offset, std::size_t size, void* ptr); -}; - // CUDA class cu_stream: public stream { public: diff --git a/lib/driver/buffer.cc b/lib/driver/buffer.cc index f188d7483..7cbefad45 100755 --- a/lib/driver/buffer.cc +++ b/lib/driver/buffer.cc @@ -38,9 +38,6 @@ namespace driver buffer::buffer(driver::context* ctx, size_t size, CUdeviceptr cu, bool take_ownership) : polymorphic_resource(cu, take_ownership), context_(ctx), size_(size) { } -buffer::buffer(driver::context* ctx, size_t size, cl_mem cl, bool take_ownership) - : polymorphic_resource(cl, take_ownership), context_(ctx), size_(size) { } - buffer::buffer(driver::context* ctx, size_t size, host_buffer_t hst, bool take_ownership) : polymorphic_resource(hst, take_ownership), context_(ctx), size_(size) { } @@ -65,7 +62,6 @@ uintptr_t buffer::addr_as_uintptr_t() { buffer* buffer::create(driver::context* ctx, size_t size) { switch(ctx->backend()){ case CUDA: return new cu_buffer(ctx, size); - case OpenCL: return new ocl_buffer(ctx, size); case Host: return new host_buffer(ctx, size); default: throw std::runtime_error("unknown backend"); } @@ -78,15 +74,6 @@ host_buffer::host_buffer(driver::context *context, size_t size) hst_->data = new char[size]; } -// - -ocl_buffer::ocl_buffer(driver::context* context, size_t size) - : buffer(context, size, cl_mem(), true){ - cl_int err; - *cl_ = dispatch::clCreateBuffer(*context->cl(), CL_MEM_READ_WRITE, size, NULL, &err); - check(err); -} - // diff --git a/lib/driver/context.cc b/lib/driver/context.cc index 473cfaac7..8f538cae2 100755 --- a/lib/driver/context.cc +++ b/lib/driver/context.cc @@ -41,11 +41,6 @@ context::context(driver::device *dev, CUcontext cu, bool take_ownership): dev_(dev), cache_path_(get_cache_path()) { } -context::context(driver::device *dev, cl_context cl, bool take_ownership): - polymorphic_resource(cl, take_ownership), - dev_(dev), cache_path_(get_cache_path()){ -} - context::context(driver::device *dev, host_context_t hst, bool take_ownership): polymorphic_resource(hst, take_ownership), dev_(dev), cache_path_(get_cache_path()){ @@ -54,7 +49,6 @@ context::context(driver::device *dev, host_context_t hst, bool take_ownership): context* context::create(driver::device *dev){ switch(dev->backend()){ case CUDA: return new cu_context(dev); - case OpenCL: return new ocl_context(dev); case Host: return new host_context(dev); default: throw std::runtime_error("unknown backend"); } @@ -131,17 +125,5 @@ cu_context::cu_context(driver::device* device): context(device, CUcontext(), tru } -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -ocl_context::ocl_context(driver::device* dev): context(dev, cl_context(), true) { - cl_int err; - *cl_ = dispatch::clCreateContext(nullptr, 1, &*dev->cl(), nullptr, nullptr, &err); - check(err); -} - - - } } diff --git a/lib/driver/device.cc b/lib/driver/device.cc index 3f82e2f33..53ed3007d 100755 --- a/lib/driver/device.cc +++ b/lib/driver/device.cc @@ -44,25 +44,6 @@ std::unique_ptr host_device::make_target() const { } -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -// maximum amount of shared memory per block -size_t ocl_device::max_shared_memory() const { - throw std::runtime_error("not implemented"); -// return ocl::info(*cl_); -} - -size_t ocl_device::max_threads_per_block() const { - throw std::runtime_error("not implemented"); -// return ocl::info(*cl_).at(0); -} - -std::unique_ptr ocl_device::make_target() const { - return std::unique_ptr(new codegen::amd_cl_target()); -} - /* ------------------------ */ // CUDA // /* ------------------------ */ diff --git a/lib/driver/dispatch.cc b/lib/driver/dispatch.cc index fd6ca7bcb..d62d9ec18 100755 --- a/lib/driver/dispatch.cc +++ b/lib/driver/dispatch.cc @@ -72,17 +72,6 @@ namespace driver #define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\ {return f_impl(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); } -//Specialized helpers for OpenCL -#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1) -#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2) -#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3) -#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4) -#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5) -#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6) -#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7) -#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) -#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) - //Specialized helpers for CUDA #define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1) #define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2) @@ -101,12 +90,6 @@ namespace driver #define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2) #define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3) -bool dispatch::clinit() -{ - if(opencl_==nullptr) - opencl_ = dlopen("libOpenCL.so", RTLD_LAZY); - return opencl_ != nullptr; -} bool dispatch::cuinit(){ if(cuda_==nullptr){ @@ -189,46 +172,6 @@ NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*) NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t, unsigned int, unsigned int) -// OpenCL -cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f) -{ return f_impl(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); } - -cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f) -{ return f_impl(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); } - -OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*) -OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *) -OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem) -OCL_DEFINE1(cl_int, clFinish, cl_command_queue) -OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *) -OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *) -OCL_DEFINE1(cl_int, clReleaseContext, cl_context) -OCL_DEFINE1(cl_int, clReleaseEvent, cl_event) -OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *) -OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *) -OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *) -OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id) -OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *) -OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *) -OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *) -OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue) -OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *) -OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *) -OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *) -OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *) -OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *) -OCL_DEFINE1(cl_int, clRetainEvent, cl_event) -OCL_DEFINE1(cl_int, clReleaseProgram, cl_program) -OCL_DEFINE1(cl_int, clFlush, cl_command_queue) -OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *) -OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *) -OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *) -OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *) -OCL_DEFINE4(cl_int, clCreateKernelsInProgram, cl_program, cl_uint, cl_kernel*, cl_uint*) -OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *) -OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *) -OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel) - // LLVM to SPIR-V int dispatch::initializeLLVMToSPIRVPass(llvm::PassRegistry ®istry){ return f_impl(spvllvm_, initializeLLVMToSPIRVPass, initializeLLVMToSPIRVPass_, "initializeLLVMToSPIRVPass", std::ref(registry)); @@ -246,47 +189,10 @@ void dispatch::release(){ } } -void * dispatch::opencl_; void* dispatch::cuda_; void* dispatch::nvml_; void* dispatch::spvllvm_; -//OpenCL -void* dispatch::clBuildProgram_; -void* dispatch::clEnqueueNDRangeKernel_; -void* dispatch::clSetKernelArg_; -void* dispatch::clReleaseMemObject_; -void* dispatch::clFinish_; -void* dispatch::clGetMemObjectInfo_; -void* dispatch::clGetCommandQueueInfo_; -void* dispatch::clReleaseContext_; -void* dispatch::clReleaseEvent_; -void* dispatch::clEnqueueWriteBuffer_; -void* dispatch::clEnqueueReadBuffer_; -void* dispatch::clGetProgramBuildInfo_; -void* dispatch::clReleaseDevice_; -void* dispatch::clCreateContext_; -void* dispatch::clGetDeviceIDs_; -void* dispatch::clGetContextInfo_; -void* dispatch::clGetDeviceInfo_; -void* dispatch::clReleaseCommandQueue_; -void* dispatch::clGetPlatformIDs_; -void* dispatch::clGetPlatformInfo_; -void* dispatch::clGetEventProfilingInfo_; -void* dispatch::clCreateProgramWithBinary_; -void* dispatch::clCreateCommandQueue_; -void* dispatch::clRetainEvent_; -void* dispatch::clReleaseProgram_; -void* dispatch::clFlush_; -void* dispatch::clGetProgramInfo_; -void* dispatch::clGetKernelInfo_; -void* dispatch::clGetKernelWorkGroupInfo_; -void* dispatch::clCreateKernel_; -void* dispatch::clCreateKernelsInProgram_; -void* dispatch::clCreateBuffer_; -void* dispatch::clCreateProgramWithSource_; -void* dispatch::clReleaseKernel_; - //CUDA void* dispatch::cuCtxGetCurrent_; void* dispatch::cuCtxSetCurrent_; diff --git a/lib/driver/error.cc b/lib/driver/error.cc index ea7d1721a..e40c317b7 100755 --- a/lib/driver/error.cc +++ b/lib/driver/error.cc @@ -94,67 +94,6 @@ void check(CUresult err) } } -void check(cl_int err) -{ - using namespace exception::ocl; - switch(err) - { - case CL_SUCCESS: break; - case CL_DEVICE_NOT_FOUND: throw device_not_found(); - case CL_DEVICE_NOT_AVAILABLE: throw device_not_available(); - case CL_COMPILER_NOT_AVAILABLE: throw compiler_not_available(); - case CL_MEM_OBJECT_ALLOCATION_FAILURE: throw mem_object_allocation_failure(); - case CL_OUT_OF_RESOURCES: throw out_of_resources(); - case CL_OUT_OF_HOST_MEMORY: throw out_of_host_memory(); - case CL_PROFILING_INFO_NOT_AVAILABLE: throw profiling_info_not_available(); - case CL_MEM_COPY_OVERLAP: throw mem_copy_overlap(); - case CL_IMAGE_FORMAT_MISMATCH: throw image_format_mismatch(); - case CL_IMAGE_FORMAT_NOT_SUPPORTED: throw image_format_not_supported(); - case CL_BUILD_PROGRAM_FAILURE: throw build_program_failure(); - case CL_MAP_FAILURE: throw map_failure(); - - case CL_INVALID_VALUE: throw invalid_value(); - case CL_INVALID_DEVICE_TYPE: throw invalid_device_type(); - case CL_INVALID_PLATFORM: throw invalid_platform(); - case CL_INVALID_DEVICE: throw invalid_device(); - case CL_INVALID_CONTEXT: throw invalid_context(); - case CL_INVALID_QUEUE_PROPERTIES: throw invalid_queue_properties(); - case CL_INVALID_COMMAND_QUEUE: throw invalid_command_queue(); - case CL_INVALID_HOST_PTR: throw invalid_host_ptr(); - case CL_INVALID_MEM_OBJECT: throw invalid_mem_object(); - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: throw invalid_image_format_descriptor(); - case CL_INVALID_IMAGE_SIZE: throw invalid_image_size(); - case CL_INVALID_SAMPLER: throw invalid_sampler(); - case CL_INVALID_BINARY: throw invalid_binary(); - case CL_INVALID_BUILD_OPTIONS: throw invalid_build_options(); - case CL_INVALID_PROGRAM: throw invalid_program(); - case CL_INVALID_PROGRAM_EXECUTABLE: throw invalid_program_executable(); - case CL_INVALID_KERNEL_NAME: throw invalid_kernel_name(); - case CL_INVALID_KERNEL_DEFINITION: throw invalid_kernel_definition(); - case CL_INVALID_KERNEL: throw invalid_kernel(); - case CL_INVALID_ARG_INDEX: throw invalid_arg_index(); - case CL_INVALID_ARG_VALUE: throw invalid_arg_value(); - case CL_INVALID_ARG_SIZE: throw invalid_arg_size(); - case CL_INVALID_KERNEL_ARGS: throw invalid_kernel_args(); - case CL_INVALID_WORK_DIMENSION: throw invalid_work_dimension(); - case CL_INVALID_WORK_GROUP_SIZE: throw invalid_work_group_size(); - case CL_INVALID_WORK_ITEM_SIZE: throw invalid_work_item_size(); - case CL_INVALID_GLOBAL_OFFSET: throw invalid_global_offset(); - case CL_INVALID_EVENT_WAIT_LIST: throw invalid_event_wait_list(); - case CL_INVALID_EVENT: throw invalid_event(); - case CL_INVALID_OPERATION: throw invalid_operation(); - case CL_INVALID_GL_OBJECT: throw invalid_gl_object(); - case CL_INVALID_BUFFER_SIZE: throw invalid_buffer_size(); - case CL_INVALID_MIP_LEVEL: throw invalid_mip_level(); - case CL_INVALID_GLOBAL_WORK_SIZE: throw invalid_global_work_size(); - #ifdef CL_INVALID_PROPERTY - case CL_INVALID_PROPERTY: throw invalid_property(); - #endif - default: throw; - } -} - - } } diff --git a/lib/driver/handle.cc b/lib/driver/handle.cc index 8899eb30e..f14800e99 100755 --- a/lib/driver/handle.cc +++ b/lib/driver/handle.cc @@ -38,15 +38,6 @@ inline void _delete(host_stream_t) { } inline void _delete(host_buffer_t x) { if(x.data) delete[] x.data; } inline void _delete(host_function_t) { } -//OpenCL -inline void _delete(cl_platform_id) { } -inline void _delete(cl_device_id x) { dispatch::clReleaseDevice(x); } -inline void _delete(cl_context x) { dispatch::clReleaseContext(x); } -inline void _delete(cl_program x) { dispatch::clReleaseProgram(x); } -inline void _delete(cl_kernel x) { dispatch::clReleaseKernel(x); } -inline void _delete(cl_command_queue x) { dispatch::clReleaseCommandQueue(x); } -inline void _delete(cl_mem x) { dispatch::clReleaseMemObject(x); } - //CUDA inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); } inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); } @@ -87,14 +78,6 @@ template class handle; template class handle; template class handle; -template class handle; -template class handle; -template class handle; -template class handle; -template class handle; -template class handle; -template class handle; - template class handle; template class handle; template class handle; diff --git a/lib/driver/kernel.cc b/lib/driver/kernel.cc index e8bed34bc..05d8b7b3c 100755 --- a/lib/driver/kernel.cc +++ b/lib/driver/kernel.cc @@ -39,9 +39,6 @@ kernel::kernel(driver::module *program, CUfunction fn, bool has_ownership): polymorphic_resource(fn, has_ownership), program_(program){ } -kernel::kernel(driver::module *program, cl_kernel fn, bool has_ownership): - polymorphic_resource(fn, has_ownership), program_(program){ -} kernel::kernel(driver::module *program, host_function_t fn, bool has_ownership): polymorphic_resource(fn, has_ownership), program_(program){ @@ -50,7 +47,6 @@ kernel::kernel(driver::module *program, host_function_t fn, bool has_ownership): kernel* kernel::create(driver::module* program, const char* name) { switch(program->backend()){ case CUDA: return new cu_kernel(program, name); - case OpenCL: return new ocl_kernel(program, name); case Host: return new host_kernel(program, name); default: throw std::runtime_error("unknown backend"); } @@ -89,31 +85,6 @@ const std::vector &host_kernel::params(){ return params_; } -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -ocl_kernel::ocl_kernel(driver::module* program, const char* name): kernel(program, cl_kernel(), true) { -// cl_uint res; -// check(dispatch::clCreateKernelsInProgram(*program->cl(), 0, NULL, &res)); -// std::cout << res << std::endl; - cl_int err; - *cl_ = dispatch::clCreateKernel(*program->cl(), "matmul", &err); - check(err); -} - -void ocl_kernel::setArg(unsigned int index, std::size_t size, void* ptr) { - check(dispatch::clSetKernelArg(*cl_, index, size, ptr)); -} - -void ocl_kernel::setArg(unsigned int index, driver::buffer* buffer) { - if(buffer) - check(dispatch::clSetKernelArg(*cl_, index, sizeof(cl_mem), (void*)&*buffer->cl())); - else - kernel::setArg(index, (std::ptrdiff_t)0); -} - - /* ------------------------ */ // CUDA // /* ------------------------ */ diff --git a/lib/driver/module.cc b/lib/driver/module.cc index 57f206c8c..78f42d9a5 100755 --- a/lib/driver/module.cc +++ b/lib/driver/module.cc @@ -64,10 +64,6 @@ module::module(driver::context* ctx, CUmodule mod, bool has_ownership) : polymorphic_resource(mod, has_ownership), ctx_(ctx) { } -module::module(driver::context* ctx, cl_program mod, bool has_ownership) - : polymorphic_resource(mod, has_ownership), ctx_(ctx) { -} - module::module(driver::context* ctx, host_module_t mod, bool has_ownership) : polymorphic_resource(mod, has_ownership), ctx_(ctx) { } @@ -79,7 +75,6 @@ driver::context* module::context() const { module* module::create(driver::context* ctx, std::unique_ptr src) { switch(ctx->backend()){ case CUDA: return new cu_module(ctx, std::move(src)); - case OpenCL: return new ocl_module(ctx, std::move(src)); case Host: return new host_module(ctx, std::move(src)); default: throw std::runtime_error("unknown backend"); } @@ -214,42 +209,6 @@ std::unique_ptr host_module::symbol(const char *name) const { throw std::runtime_error("not implemented"); } - -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -ocl_module::ocl_module(driver::context * context, std::unique_ptr src): module(context, cl_program(), true) { - throw std::runtime_error("not supported"); -// init_llvm(); -// llvm::SmallVector buffer; -// module::compile_llvm_module(src, "amdgcn-amd-amdhsa-amdgizcl", "gfx902", "", buffer, "code-object-v3", Object); -// std::ofstream output("/tmp/tmp.o", std::ios::binary); -// std::copy(buffer.begin(), buffer.end(), std::ostreambuf_iterator(output)); -// system("ld.lld-8 /tmp/tmp.o -shared -o /tmp/tmp.o"); -// std::ifstream input("/tmp/tmp.o", std::ios::in | std::ios::binary ); -// std::vector in_buffer(std::istreambuf_iterator(input), {}); -// size_t sizes[] = {in_buffer.size()}; -// const unsigned char* data[] = {(unsigned char*)in_buffer.data()}; -// cl_int status; -// cl_int err; -// *cl_ = dispatch::clCreateProgramWithBinary(*context->cl(), 1, &*context->device()->cl(), sizes, data, &status, &err); -// check(status); -// check(err); -// try{ -// dispatch::clBuildProgram(*cl_, 1, &*context->device()->cl(), NULL, NULL, NULL); -// } -// catch(...){ -// char log[2048]; -// dispatch::clGetProgramBuildInfo(*cl_, *context->device()->cl(), CL_PROGRAM_BUILD_LOG, 1024, log, NULL); -// throw; -// } -} - -std::unique_ptr ocl_module::symbol(const char *name) const { - throw std::runtime_error("not implemented"); -} - /* ------------------------ */ // CUDA // /* ------------------------ */ diff --git a/lib/driver/platform.cc b/lib/driver/platform.cc index 90cb1913b..f6333cd5a 100755 --- a/lib/driver/platform.cc +++ b/lib/driver/platform.cc @@ -51,27 +51,6 @@ void cu_platform::devices(std::vector &devices) const{ } } -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -std::string cl_platform::version() const { - size_t size; - check(dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, 0, nullptr, &size)); - std::string result(size, 0); - check(dispatch::clGetPlatformInfo(*cl_, CL_PLATFORM_VERSION, size, (void*)&*result.begin(), nullptr)); - return result; -} - -void cl_platform::devices(std::vector &devices) const{ - cl_uint num_devices; - check(dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices)); - std::vector ids(num_devices); - check(dispatch::clGetDeviceIDs(*cl_, CL_DEVICE_TYPE_GPU, num_devices, ids.data(), nullptr)); - for(cl_device_id id: ids) - devices.push_back(new driver::ocl_device(id)); -} - /* ------------------------ */ // Host // /* ------------------------ */ diff --git a/lib/driver/stream.cc b/lib/driver/stream.cc index f1501bb2c..31f35cab3 100755 --- a/lib/driver/stream.cc +++ b/lib/driver/stream.cc @@ -46,9 +46,6 @@ stream::stream(driver::context *ctx, CUstream cu, bool has_ownership) : polymorphic_resource(cu, has_ownership), ctx_(ctx) { } -stream::stream(driver::context *ctx, cl_command_queue cl, bool has_ownership) - : polymorphic_resource(cl, has_ownership), ctx_(ctx) { -} stream::stream(driver::context *ctx, host_stream_t cl, bool has_ownership) : polymorphic_resource(cl, has_ownership), ctx_(ctx) { @@ -57,7 +54,6 @@ stream::stream(driver::context *ctx, host_stream_t cl, bool has_ownership) driver::stream* stream::create(driver::context* ctx) { switch(ctx->backend()){ case CUDA: return new cu_stream(ctx); - case OpenCL: return new cl_stream(ctx); case Host: return new host_stream(ctx); default: throw std::runtime_error("unknown backend"); } @@ -97,33 +93,6 @@ void host_stream::read(driver::buffer* buffer, bool blocking, std::size_t offset } -/* ------------------------ */ -// OpenCL // -/* ------------------------ */ - -cl_stream::cl_stream(driver::context *ctx): stream(ctx, cl_command_queue(), true) { - cl_int err; - *cl_ = dispatch::clCreateCommandQueue(*ctx->cl(), *ctx->device()->cl(), 0, &err); - check(err); -} - -void cl_stream::synchronize() { - check(dispatch::clFinish(*cl_)); -} - -void cl_stream::enqueue(driver::kernel* kernel, std::array grid, std::array block, std::vector const *, event* event, void **args, size_t args_size) { - std::array global = {grid[0]*block[0], grid[1]*block[1], grid[2]*block[2]}; - check(dispatch::clEnqueueNDRangeKernel(*cl_, *kernel->cl(), grid.size(), NULL, (const size_t*)global.data(), (const size_t*)block.data(), 0, NULL, NULL)); -} - -void cl_stream::write(driver::buffer* buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr) { - check(dispatch::clEnqueueWriteBuffer(*cl_, *buffer->cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL)); -} - -void cl_stream::read(driver::buffer* buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr) { - check(dispatch::clEnqueueReadBuffer(*cl_, *buffer->cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL)); -} - /* ------------------------ */ // CUDA // /* ------------------------ */ diff --git a/lib/ir/builder.cc b/lib/ir/builder.cc index 5f67b93af..50404df46 100644 --- a/lib/ir/builder.cc +++ b/lib/ir/builder.cc @@ -10,7 +10,7 @@ namespace triton{ namespace ir{ builder::builder(context &ctx): - ctx_(ctx), block_(nullptr), insert_point_(nullptr) {} + ctx_(ctx), block_(nullptr) {} //===----------------------------------------------------------------------===// // utilities diff --git a/lib/runtime/function.cc b/lib/runtime/function.cc index db514a4a8..4d8edc523 100644 --- a/lib/runtime/function.cc +++ b/lib/runtime/function.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "triton/codegen/analysis/axes.h" #include "triton/codegen/analysis/allocation.h"