Code quality: removed dependencies on the C++ OpenCL wrapper
This commit is contained in:
@@ -4,7 +4,6 @@
|
||||
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/backend/parse.h"
|
||||
|
@@ -19,17 +19,17 @@ class ISAACAPI Buffer
|
||||
friend class CommandQueue;
|
||||
friend class Kernel;
|
||||
public:
|
||||
Buffer(cl::Buffer const & Buffer);
|
||||
Buffer(cl_mem Buffer);
|
||||
Buffer(Context const & context, std::size_t size);
|
||||
Context const & context() const;
|
||||
bool operator<(Buffer const &) const;
|
||||
bool operator==(Buffer const &) const;
|
||||
HANDLE_TYPE(cl::Buffer, CUdeviceptr)& handle();
|
||||
HANDLE_TYPE(cl::Buffer, CUdeviceptr) const & handle() const;
|
||||
HANDLE_TYPE(cl_mem, CUdeviceptr)& handle();
|
||||
HANDLE_TYPE(cl_mem, CUdeviceptr) const & handle() const;
|
||||
private:
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
HANDLE_TYPE(cl::Buffer, CUdeviceptr) h_;
|
||||
HANDLE_TYPE(cl_mem, CUdeviceptr) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -23,7 +23,7 @@ class Buffer;
|
||||
class ISAACAPI CommandQueue
|
||||
{
|
||||
public:
|
||||
CommandQueue(cl::CommandQueue const & queue);
|
||||
CommandQueue(cl_command_queue const & queue);
|
||||
CommandQueue(Context const & context, Device const & device, cl_command_queue_properties properties = 0);
|
||||
Context const & context() const;
|
||||
Device const & device() const;
|
||||
@@ -33,12 +33,12 @@ public:
|
||||
void read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||
bool operator==(CommandQueue const & other) const;
|
||||
bool operator<(CommandQueue const & other) const;
|
||||
HANDLE_TYPE(cl::CommandQueue, CUstream)& handle();
|
||||
HANDLE_TYPE(cl_command_queue, CUstream)& handle();
|
||||
private:
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
Device device_;
|
||||
HANDLE_TYPE(cl::CommandQueue, CUstream) h_;
|
||||
HANDLE_TYPE(cl_command_queue, CUstream) h_;
|
||||
};
|
||||
|
||||
|
||||
|
@@ -1,7 +1,11 @@
|
||||
#ifndef ISAAC_DRIVER_COMMON_H
|
||||
#define ISAAC_DRIVER_COMMON_H
|
||||
|
||||
#include <CL/cl.hpp>
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
#include <exception>
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#include <cuda.h>
|
||||
#include <nvrtc.h>
|
||||
|
@@ -21,7 +21,7 @@ class ISAACAPI Context
|
||||
friend class Buffer;
|
||||
|
||||
public:
|
||||
explicit Context(cl::Context const & context);
|
||||
explicit Context(cl_context const & context);
|
||||
explicit Context(Device const & device);
|
||||
backend_type backend() const;
|
||||
Device const & device() const;
|
||||
@@ -32,7 +32,7 @@ private:
|
||||
Device device_;
|
||||
|
||||
std::string cache_path_;
|
||||
HANDLE_TYPE(cl::Context, CUcontext) h_;
|
||||
HANDLE_TYPE(cl_context, CUcontext) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -37,7 +37,7 @@ public:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
Device(int ordinal);
|
||||
#endif
|
||||
Device(cl::Device const & device);
|
||||
Device(cl_device_id const & device);
|
||||
backend_type backend() const;
|
||||
size_t clock_rate() const;
|
||||
unsigned int address_bits() const;
|
||||
@@ -56,7 +56,7 @@ public:
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
HANDLE_TYPE(cl::Device, CUdevice) h_;
|
||||
HANDLE_TYPE(cl_device_id, CUdevice) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -16,17 +16,16 @@ class ISAACAPI Event
|
||||
{
|
||||
friend class CommandQueue;
|
||||
public:
|
||||
Event(cl::Event const & event);
|
||||
Event(cl_event const & event);
|
||||
Event(backend_type backend);
|
||||
long elapsed_time() const;
|
||||
operator cl::Event();
|
||||
|
||||
HANDLE_TYPE(cl_event, cu_event_t)& handle();
|
||||
private:
|
||||
backend_type backend_;
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
typedef std::pair<CUevent, CUevent> cu_event_t;
|
||||
#endif
|
||||
HANDLE_TYPE(cl::Event, cu_event_t) h_;
|
||||
HANDLE_TYPE(cl_event, cu_event_t) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -34,7 +34,7 @@ private:
|
||||
std::vector<std::shared_ptr<void> > cu_params_store_;
|
||||
std::vector<void*> cu_params_;
|
||||
#endif
|
||||
HANDLE_TYPE(cl::Kernel, CUfunction) h_;
|
||||
HANDLE_TYPE(cl_kernel, CUfunction) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ class ISAACAPI NDRange
|
||||
{
|
||||
public:
|
||||
NDRange(size_t size0 = 1, size_t size1 = 1, size_t size2 = 1);
|
||||
operator cl::NDRange() const;
|
||||
size_t dimension() const;
|
||||
operator const size_t*() const;
|
||||
private:
|
||||
size_t sizes_[3];
|
||||
|
@@ -1,6 +1,9 @@
|
||||
#ifndef ISAAC_DRIVER_PLATFORM_H
|
||||
#define ISAAC_DRIVER_PLATFORM_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
|
||||
@@ -19,15 +22,13 @@ public:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
Platform(backend_type);
|
||||
#endif
|
||||
Platform(cl::Platform const &);
|
||||
Platform(cl_platform_id const &);
|
||||
std::string name() const;
|
||||
std::string version() const;
|
||||
std::vector<Device> devices() const;
|
||||
static std::vector<Platform> get();
|
||||
|
||||
void devices(std::vector<Device> &) const;
|
||||
private:
|
||||
backend_type backend_;
|
||||
cl::Platform cl_platform_;
|
||||
cl_platform_id cl_platform_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -24,7 +24,7 @@ private:
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
std::string source_;
|
||||
HANDLE_TYPE(cl::Program, CUmodule) h_;
|
||||
HANDLE_TYPE(cl_program, CUmodule) h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -1,7 +1,6 @@
|
||||
#ifndef _ISAAC_SCHEDULER_EXECUTE_H
|
||||
#define _ISAAC_SCHEDULER_EXECUTE_H
|
||||
|
||||
#include <CL/cl.hpp>
|
||||
#include "isaac/model/model.h"
|
||||
#include "isaac/symbolic/expression.h"
|
||||
|
||||
|
@@ -1,7 +1,6 @@
|
||||
#ifndef ISAAC_TYPES_H
|
||||
#define ISAAC_TYPES_H
|
||||
|
||||
#include <CL/cl.hpp>
|
||||
#include <list>
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/exception/unknown_datatype.h"
|
||||
|
@@ -1,3 +1,5 @@
|
||||
#include <cstring>
|
||||
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/backend/parse.h"
|
||||
#include "isaac/exception/operation_not_supported.h"
|
||||
|
@@ -1,3 +1,4 @@
|
||||
#include <cstring>
|
||||
#include "isaac/backend/templates/axpy.h"
|
||||
#include "isaac/backend/keywords.h"
|
||||
#include "isaac/driver/backend.h"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "isaac/backend/templates/dot.h"
|
||||
#include <CL/cl.hpp>
|
||||
#include "isaac/tools/to_string.hpp"
|
||||
#include "isaac/tools/make_map.hpp"
|
||||
#include "isaac/tools/make_vector.hpp"
|
||||
|
@@ -1,3 +1,4 @@
|
||||
#include <cstring>
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/backend/templates/gemm.h"
|
||||
#include "isaac/backend/keywords.h"
|
||||
|
@@ -1,3 +1,4 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "isaac/backend/stream.h"
|
||||
#include "isaac/backend/keywords.h"
|
||||
|
@@ -1,9 +1,10 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "isaac/backend/templates/ger.h"
|
||||
#include "isaac/tools/make_map.hpp"
|
||||
#include "isaac/tools/make_vector.hpp"
|
||||
#include "isaac/symbolic/io.h"
|
||||
#include "isaac/backend/keywords.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#include "isaac/driver/backend.h"
|
||||
#include <assert.h>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -30,13 +31,17 @@ void queues_type::cuinit()
|
||||
|
||||
void queues_type::clinit()
|
||||
{
|
||||
std::vector<cl::Platform> platforms;
|
||||
cl::Platform::get(&platforms);
|
||||
for(auto & p : platforms)
|
||||
cl_uint nplatforms;
|
||||
ocl::check(clGetPlatformIDs(0, NULL, &nplatforms));
|
||||
std::vector<cl_platform_id> platforms(nplatforms);
|
||||
ocl::check(clGetPlatformIDs(nplatforms, platforms.data(), NULL));
|
||||
for(cl_platform_id p : platforms)
|
||||
{
|
||||
std::vector<cl::Device> devices;
|
||||
p.getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||
for(auto & d : devices)
|
||||
cl_uint ndevices;
|
||||
ocl::check(clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
|
||||
std::vector<cl_device_id> devices(ndevices);
|
||||
ocl::check(clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, ndevices, devices.data(), NULL));
|
||||
for(cl_device_id d : devices)
|
||||
append(Context(Device(d)));
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include <iostream>
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -7,7 +8,7 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
Buffer::Buffer(cl::Buffer const & buffer) : backend_(OPENCL), context_(buffer.getInfo<CL_MEM_CONTEXT>()), h_(backend_)
|
||||
Buffer::Buffer(cl_mem buffer) : backend_(OPENCL), context_(ocl::info<CL_MEM_CONTEXT>(buffer)), h_(backend_)
|
||||
{
|
||||
h_.cl() = buffer;
|
||||
}
|
||||
@@ -24,7 +25,7 @@ Buffer::Buffer(Context const & context, std::size_t size) : backend_(context.bac
|
||||
#endif
|
||||
case OPENCL:
|
||||
cl_int err;
|
||||
h_.cl() = cl::Buffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
||||
h_.cl() = clCreateBuffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
@@ -41,10 +42,10 @@ bool Buffer::operator==(Buffer const & other) const
|
||||
bool Buffer::operator<(Buffer const & other) const
|
||||
{ return h_<other.h_; }
|
||||
|
||||
HANDLE_TYPE(cl::Buffer, CUdeviceptr) & Buffer::handle()
|
||||
HANDLE_TYPE(cl_mem, CUdeviceptr) & Buffer::handle()
|
||||
{ return h_; }
|
||||
|
||||
HANDLE_TYPE(cl::Buffer, CUdeviceptr) const & Buffer::handle() const
|
||||
HANDLE_TYPE(cl_mem, CUdeviceptr) const & Buffer::handle() const
|
||||
{ return h_; }
|
||||
|
||||
}
|
||||
|
@@ -1,3 +1,5 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "isaac/driver/command_queue.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/context.h"
|
||||
@@ -6,14 +8,16 @@
|
||||
#include "isaac/driver/kernel.h"
|
||||
#include "isaac/driver/ndrange.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include <iostream>
|
||||
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
CommandQueue::CommandQueue(cl::CommandQueue const & queue) : backend_(OPENCL), context_(queue.getInfo<CL_QUEUE_CONTEXT>()), device_(queue.getInfo<CL_QUEUE_DEVICE>()), h_(backend_)
|
||||
CommandQueue::CommandQueue(cl_command_queue const & queue) : backend_(OPENCL), context_(ocl::info<CL_QUEUE_CONTEXT>(queue)), device_(ocl::info<CL_QUEUE_DEVICE>(queue)), h_(backend_)
|
||||
{
|
||||
h_.cl() = queue;
|
||||
}
|
||||
@@ -28,10 +32,12 @@ CommandQueue::CommandQueue(Context const & context, Device const & device, cl_co
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
cl_int err;
|
||||
h_.cl() = cl::CommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
|
||||
h_.cl() = clCreateCommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
}
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -49,7 +55,7 @@ void CommandQueue::synchronize()
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: cuda::check(cuStreamSynchronize(*h_.cu)); break;
|
||||
#endif
|
||||
case OPENCL: h_.cl().finish(); break;
|
||||
case OPENCL: ocl::check(clFinish(h_.cl())); break;
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -68,7 +74,7 @@ Event CommandQueue::enqueue(Kernel const & kernel, NDRange global, driver::NDRan
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(h_.cl().enqueueNDRangeKernel(kernel.h_.cl(), cl::NullRange, (cl::NDRange)global, (cl::NDRange)local, NULL, &event.h_.cl()));
|
||||
ocl::check(clEnqueueNDRangeKernel(h_.cl(), kernel.h_.cl(), global.dimension(), NULL, (const size_t *)global, (const size_t *) local, 0, NULL, &event.h_.cl()));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -88,7 +94,7 @@ void CommandQueue::write(Buffer const & buffer, bool blocking, std::size_t offse
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
h_.cl().enqueueWriteBuffer(buffer.h_.cl(), blocking, offset, size, ptr);
|
||||
clEnqueueWriteBuffer(h_.cl(), buffer.h_.cl(), blocking, offset, size, ptr, 0, NULL, NULL);
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -107,7 +113,7 @@ void CommandQueue::read(Buffer const & buffer, bool blocking, std::size_t offset
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
h_.cl().enqueueReadBuffer(buffer.h_.cl(), blocking, offset, size, ptr);
|
||||
clEnqueueReadBuffer(h_.cl(), buffer.h_.cl(), blocking, offset, size, ptr, 0, NULL, NULL);
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -119,7 +125,7 @@ bool CommandQueue::operator==(CommandQueue const & other) const
|
||||
bool CommandQueue::operator<(CommandQueue const & other) const
|
||||
{ return h_ < other.h_; }
|
||||
|
||||
HANDLE_TYPE(cl::CommandQueue, CUstream) & CommandQueue::handle()
|
||||
HANDLE_TYPE(cl_command_queue, CUstream) & CommandQueue::handle()
|
||||
{ return h_; }
|
||||
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include "isaac/driver/context.h"
|
||||
#include <iostream>
|
||||
#include "isaac/driver/context.h"
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -7,7 +8,7 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
Context::Context(cl::Context const & context) : backend_(OPENCL), device_(context.getInfo<CL_CONTEXT_DEVICES>()[0]), h_(backend_)
|
||||
Context::Context(cl_context const & context) : backend_(OPENCL), device_(ocl::info<CL_CONTEXT_DEVICES>(context)[0]), h_(backend_)
|
||||
{
|
||||
h_.cl() = context;
|
||||
}
|
||||
@@ -30,7 +31,7 @@ Context::Context(Device const & device) : backend_(device.backend_), device_(dev
|
||||
#endif
|
||||
case OPENCL:
|
||||
cl_int err;
|
||||
h_.cl() = cl::Context(std::vector<cl::Device>(1, device_.h_.cl()), NULL, NULL, NULL, &err);
|
||||
h_.cl() = clCreateContext(NULL, 1, &device_.h_.cl(), NULL, NULL, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#include "isaac/driver/device.h"
|
||||
#include <algorithm>
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -23,7 +24,7 @@ Device::Device(int ordinal): backend_(CUDA), h_(backend_)
|
||||
#endif
|
||||
|
||||
|
||||
Device::Device(cl::Device const & device) : backend_(OPENCL), h_(backend_)
|
||||
Device::Device(cl_device_id const & device) : backend_(OPENCL), h_(backend_)
|
||||
{ h_.cl() = device; }
|
||||
|
||||
backend_type Device::backend() const
|
||||
@@ -36,7 +37,7 @@ unsigned int Device::address_bits() const
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return sizeof(long long)*8;
|
||||
#endif
|
||||
case OPENCL: return h_.cl().getInfo<CL_DEVICE_ADDRESS_BITS>();
|
||||
case OPENCL: return ocl::info<CL_DEVICE_ADDRESS_BITS>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
|
||||
@@ -50,7 +51,7 @@ driver::Platform Device::platform() const
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return Platform(CUDA);
|
||||
#endif
|
||||
case OPENCL: return Platform(h_.cl().getInfo<CL_DEVICE_PLATFORM>());
|
||||
case OPENCL: return Platform(ocl::info<CL_DEVICE_PLATFORM>(h_.cl()));
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -65,7 +66,7 @@ std::string Device::name() const
|
||||
cuda::check(cuDeviceGetName(tmp, 128, *h_.cu));
|
||||
return std::string(tmp);
|
||||
#endif
|
||||
case OPENCL: return h_.cl().getInfo<CL_DEVICE_NAME>();
|
||||
case OPENCL: return ocl::info<CL_DEVICE_NAME>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -77,7 +78,7 @@ std::string Device::vendor_str() const
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return "NVidia";
|
||||
#endif
|
||||
case OPENCL: return h_.cl().getInfo<CL_DEVICE_VENDOR>();
|
||||
case OPENCL: return ocl::info<CL_DEVICE_VENDOR>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -111,7 +112,7 @@ std::vector<size_t> Device::max_work_item_sizes() const
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
return h_.cl().getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
|
||||
return ocl::info<CL_DEVICE_MAX_WORK_ITEM_SIZES>(h_.cl());
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
@@ -124,7 +125,7 @@ device_type Device::type() const
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return DEVICE_TYPE_GPU;
|
||||
#endif
|
||||
case OPENCL: return static_cast<device_type>(h_.cl().getInfo<CL_DEVICE_TYPE>());
|
||||
case OPENCL: return static_cast<device_type>(ocl::info<CL_DEVICE_TYPE>(h_.cl()));
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -138,7 +139,7 @@ std::string Device::extensions() const
|
||||
return "";
|
||||
#endif
|
||||
case OPENCL:
|
||||
return h_.cl().getInfo<CL_DEVICE_EXTENSIONS>();
|
||||
return ocl::info<CL_DEVICE_EXTENSIONS>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -155,7 +156,7 @@ std::string Device::extensions() const
|
||||
switch(backend_)\
|
||||
{\
|
||||
CUDACASE(CUNAME)\
|
||||
case OPENCL: return h_.cl().getInfo<CLNAME>();\
|
||||
case OPENCL: return ocl::info<CLNAME>(h_.cl());\
|
||||
default: throw;\
|
||||
}\
|
||||
}\
|
||||
@@ -163,7 +164,7 @@ std::string Device::extensions() const
|
||||
|
||||
WRAP_ATTRIBUTE(size_t, max_work_group_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, CL_DEVICE_MAX_WORK_GROUP_SIZE)
|
||||
WRAP_ATTRIBUTE(size_t, local_mem_size, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, CL_DEVICE_LOCAL_MEM_SIZE)
|
||||
WRAP_ATTRIBUTE(size_t, warp_wavefront_size, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, CL_DEVICE_WAVEFRONT_WIDTH_AMD)
|
||||
WRAP_ATTRIBUTE(size_t, warp_wavefront_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, CL_DEVICE_WAVEFRONT_WIDTH_AMD)
|
||||
WRAP_ATTRIBUTE(size_t, clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, CL_DEVICE_MAX_CLOCK_FREQUENCY)
|
||||
|
||||
|
||||
@@ -172,7 +173,7 @@ std::pair<unsigned int, unsigned int> Device::nv_compute_capability() const
|
||||
switch(backend_)
|
||||
{
|
||||
case OPENCL:
|
||||
return std::pair<unsigned int, unsigned int>( h_.cl().getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(), h_.cl().getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV> ());
|
||||
return std::pair<unsigned int, unsigned int>(ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(h_.cl()), ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV>(h_.cl()));
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
return std::pair<unsigned int, unsigned int>(cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>(), cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>());
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#include "isaac/driver/event.h"
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -21,7 +22,7 @@ Event::Event(backend_type backend) : backend_(backend), h_(backend_)
|
||||
}
|
||||
}
|
||||
|
||||
Event::Event(cl::Event const & event) : backend_(OPENCL), h_(backend_)
|
||||
Event::Event(cl_event const & event) : backend_(OPENCL), h_(backend_)
|
||||
{
|
||||
h_.cl() = event;
|
||||
}
|
||||
@@ -37,16 +38,15 @@ long Event::elapsed_time() const
|
||||
return 1e6*time;
|
||||
#endif
|
||||
case OPENCL:
|
||||
return (h_.cl().getProfilingInfo<CL_PROFILING_COMMAND_END>() - h_.cl().getProfilingInfo<CL_PROFILING_COMMAND_START>());
|
||||
return ocl::info<CL_PROFILING_COMMAND_END>(h_.cl()) - ocl::info<CL_PROFILING_COMMAND_START>(h_.cl());
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
Event::operator cl::Event()
|
||||
{
|
||||
return h_.cl();
|
||||
}
|
||||
HANDLE_TYPE(cl_event, cu_event_t) & Event::handle()
|
||||
{ return h_; }
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -55,7 +55,7 @@ bool Handle<CLType, CUType>::operator==(Handle const & other) const
|
||||
return cu()==other.cu();
|
||||
#endif
|
||||
if(backend_==OPENCL && other.backend_==OPENCL)
|
||||
return cl()()==other.cl()();
|
||||
return cl()==other.cl();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ bool Handle<CLType, CUType>::operator<(Handle const & other) const
|
||||
return (*cu_)<(*other.cu_);
|
||||
#endif
|
||||
if(backend_==OPENCL && other.backend_==OPENCL)
|
||||
return (*cl_)()<(*other.cl_)();
|
||||
return (*cl_)<(*other.cl_);
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
if(backend_==CUDA && other.backend_==OPENCL)
|
||||
return true;
|
||||
@@ -105,21 +105,21 @@ CUType & Handle<CLType, CUType>::cu()
|
||||
return *cu_;
|
||||
}
|
||||
|
||||
template class Handle<cl::Buffer, CUdeviceptr>;
|
||||
template class Handle<cl::CommandQueue, CUstream>;
|
||||
template class Handle<cl::Context, CUcontext>;
|
||||
template class Handle<cl::Device, CUdevice>;
|
||||
template class Handle<cl::Event, std::pair<CUevent, CUevent> >;
|
||||
template class Handle<cl::Kernel, CUfunction>;
|
||||
template class Handle<cl::Program, CUmodule>;
|
||||
template class Handle<cl_mem, CUdeviceptr>;
|
||||
template class Handle<cl_command_queue, CUstream>;
|
||||
template class Handle<cl_context, CUcontext>;
|
||||
template class Handle<cl_device_id, CUdevice>;
|
||||
template class Handle<cl_event, std::pair<CUevent, CUevent> >;
|
||||
template class Handle<cl_kernel, CUfunction>;
|
||||
template class Handle<cl_program, CUmodule>;
|
||||
#else
|
||||
template class Handle<cl::Buffer, void>;
|
||||
template class Handle<cl::CommandQueue, void>;
|
||||
template class Handle<cl::Context, void>;
|
||||
template class Handle<cl::Device, void>;
|
||||
template class Handle<cl::Event, void>;
|
||||
template class Handle<cl::Kernel, void>;
|
||||
template class Handle<cl::Program, void>;
|
||||
template class Handle<cl_mem, void>;
|
||||
template class Handle<cl_command_queue, void>;
|
||||
template class Handle<cl_context, void>;
|
||||
template class Handle<cl_device_id, void>;
|
||||
template class Handle<cl_event, void>;
|
||||
template class Handle<cl_kernel, void>;
|
||||
template class Handle<cl_program, void>;
|
||||
#endif
|
||||
|
||||
|
||||
|
399
lib/driver/helpers/ocl/infos.hpp
Normal file
399
lib/driver/helpers/ocl/infos.hpp
Normal file
@@ -0,0 +1,399 @@
|
||||
#ifndef ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
|
||||
#define ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
|
||||
|
||||
/* =========================================================================
|
||||
Copyright (c) 2010-2012, Institute for Microelectronics,
|
||||
Institute for Analysis and Scientific Computing,
|
||||
TU Wien.
|
||||
|
||||
-----------------
|
||||
ViennaCL - The Vienna Computing Library
|
||||
-----------------
|
||||
|
||||
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||
|
||||
(A list of authors and contributors can be found in the PDF manual)
|
||||
|
||||
License: MIT (X11), see file LICENSE in the base directory
|
||||
============================================================================= */
|
||||
|
||||
|
||||
#include "isaac/driver/common.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
/** @brief Implementation details for the OpenCL managment layer in ViennaCL */
|
||||
namespace detail{
|
||||
|
||||
/** @brief Helper class for obtaining informations from the OpenCL backend. Deprecated! */
|
||||
template<typename T>
|
||||
struct info;
|
||||
|
||||
/** \cond */
|
||||
template<>
|
||||
struct info<cl_mem>
|
||||
{
|
||||
typedef cl_mem_info type;
|
||||
|
||||
static void get(cl_mem handle, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_device_id>
|
||||
{
|
||||
typedef cl_device_info type;
|
||||
|
||||
static void get(cl_device_id handle, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_kernel>
|
||||
{
|
||||
typedef cl_kernel_info type;
|
||||
|
||||
static void get(cl_kernel handle, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_kernel handle, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_context>
|
||||
{
|
||||
typedef cl_context_info type;
|
||||
|
||||
static void get(cl_context handle, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_program>
|
||||
{
|
||||
typedef cl_program_info type;
|
||||
|
||||
static void get(cl_program handle, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_program handle, cl_device_id device, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct info<cl_event>
|
||||
{
|
||||
typedef cl_profiling_info type;
|
||||
static void get(cl_event handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_command_queue>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_command_queue handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_platform_id>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_platform_id handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
//Info getter
|
||||
//Some intelligence is needed for some types
|
||||
template<class RES_T>
|
||||
struct get_info_impl{
|
||||
|
||||
template<class MEM_T, class INFO_T>
|
||||
RES_T operator()(MEM_T const & mem, INFO_T const & info){
|
||||
RES_T res;
|
||||
detail::info<MEM_T>::get(mem,info,sizeof(RES_T),&res,NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
RES_T operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
|
||||
RES_T res;
|
||||
detail::info<MEM_T>::get(mem,arg_mem, info,sizeof(RES_T),&res,NULL);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct get_info_impl<std::string>{
|
||||
|
||||
template<class MEM_T, class INFO_T>
|
||||
std::string operator()(const MEM_T &mem, const INFO_T &info){
|
||||
char buff[1024];
|
||||
detail::info<MEM_T>::get(mem,info,1024,buff,NULL);
|
||||
return std::string(buff);
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
std::string operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
|
||||
char buff[1024];
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,1024,buff,NULL);
|
||||
return std::string(buff);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
struct get_info_impl<std::vector<T> >
|
||||
{
|
||||
template<class MEM_T, class INFO_T>
|
||||
std::vector<T> operator()(const MEM_T &mem, const INFO_T &info)
|
||||
{
|
||||
size_t vec_size;
|
||||
detail::info<MEM_T>::get(mem,info,0,NULL,&vec_size);
|
||||
std::vector<T> res(vec_size/sizeof(T));
|
||||
detail::info<MEM_T>::get(mem,info,vec_size,res.data(),NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
std::vector<T> operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info)
|
||||
{
|
||||
size_t vec_size;
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,0,NULL,&vec_size);
|
||||
std::vector<T> res(vec_size/sizeof(T));
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,vec_size,res.data(),NULL);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename info<T>::type param>
|
||||
struct return_type;
|
||||
/** \endcond */
|
||||
|
||||
/** \cond */
|
||||
#define SET_INFO_RETURN_TYPE(DATA_TYPE,NAME,RETURN_TYPE) template<> struct return_type<DATA_TYPE, NAME> { typedef RETURN_TYPE Result; }
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_CONTEXT, cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_DEVICE, cl_device_id);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_PROPERTIES, cl_command_queue_properties);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_DEVICES, std::vector<cl_device_id>);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_NUM_DEVICES, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_PROPERTIES, cl_context_properties);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ADDRESS_BITS, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_AVAILABLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPILER_AVAILABLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ENDIAN_LITTLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXTENSIONS, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong);
|
||||
//SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE_SUPPORT, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_DEPTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_HEIGHT , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_WIDTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_COMPUTE_UNITS , cl_uint); //The minimum value is 1
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_ARGS , cl_uint); //The minimum value is 8
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE , cl_ulong); //The minimum value is 64 KB
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE , cl_ulong); //The minimum value is max (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024)
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_PARAMETER_SIZE , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_READ_IMAGE_ARGS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_SAMPLERS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES , std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_NAME , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PLATFORM , cl_platform_id);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILE , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILING_TIMER_RESOLUTION , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_QUEUE_PROPERTIES , cl_command_queue_properties);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_SINGLE_FP_CONFIG , cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_TYPE , cl_device_type);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR_ID , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VERSION , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DRIVER_VERSION , std::string);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_QUEUED, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_SUBMIT, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_START, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_END, cl_ulong);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_FUNCTION_NAME, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_NUM_ARGS, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_CONTEXT, cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PROGRAM, cl_program);
|
||||
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_WORK_GROUP_SIZE, size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_COMPILE_WORK_GROUP_SIZE, std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_TYPE, cl_mem_object_type);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_FLAGS, cl_mem_flags);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_SIZE, size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_HOST_PTR, void*);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_MAP_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_CONTEXT, cl_context);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_CONTEXT,cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_DEVICES,std::vector<cl_device_id>);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_NUM_DEVICES,cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_SOURCE,std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARY_SIZES,std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARIES,std::vector<unsigned char*>);
|
||||
//Build
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_STATUS, cl_build_status);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_OPTIONS, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_LOG, std::string);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_PROFILE, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VERSION, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_NAME, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VENDOR, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_EXTENSIONS, std::string);
|
||||
|
||||
#undef SET_INFO_RETURN_TYPE
|
||||
|
||||
/** \endcond */
|
||||
}
|
||||
|
||||
template<cl_device_info param>
|
||||
typename detail::return_type<cl_device_id, param>::Result info(cl_device_id const & handle){
|
||||
typedef typename detail::return_type<cl_device_id, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<cl_mem_info param>
|
||||
typename detail::return_type<cl_mem, param>::Result info(cl_mem const & handle){
|
||||
typedef typename detail::return_type<cl_mem, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Program
|
||||
|
||||
template<cl_program_info param>
|
||||
typename detail::return_type<cl_program, param>::Result info(cl_program const & handle){
|
||||
typedef typename detail::return_type<cl_program, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<cl_program_build_info param>
|
||||
typename detail::return_type<cl_program, param>::Result info(cl_program const & phandle, cl_device_id const & dhandle){
|
||||
typedef typename detail::return_type<cl_program, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(phandle,dhandle,param);
|
||||
}
|
||||
|
||||
//Kernel
|
||||
template<cl_kernel_info param>
|
||||
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & handle){
|
||||
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<cl_kernel_work_group_info param>
|
||||
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & khandle, cl_device_id const & dhandle){
|
||||
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(khandle,dhandle,param);
|
||||
}
|
||||
|
||||
//Context
|
||||
template<cl_context_info param>
|
||||
typename detail::return_type<cl_context, param>::Result info(cl_context const & handle){
|
||||
typedef typename detail::return_type<cl_context, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Event
|
||||
template<cl_profiling_info param>
|
||||
typename detail::return_type<cl_event, param>::Result info(cl_event const & handle){
|
||||
typedef typename detail::return_type<cl_event, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Command queue
|
||||
template<cl_command_queue_info param>
|
||||
typename detail::return_type<cl_command_queue, param>::Result info(cl_command_queue const & handle){
|
||||
typedef typename detail::return_type<cl_command_queue, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Plaftform
|
||||
template<cl_platform_info param>
|
||||
typename detail::return_type<cl_platform_id, param>::Result info(cl_platform_id const & handle){
|
||||
typedef typename detail::return_type<cl_platform_id, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<class OCL_TYPE, typename detail::info<OCL_TYPE>::type param>
|
||||
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle){
|
||||
return info(handle.get());
|
||||
}
|
||||
|
||||
template<class OCL_TYPE, class OCL_TYPE_ARG, typename detail::info<OCL_TYPE>::type param>
|
||||
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle, OCL_TYPE_ARG const & arg_handle){
|
||||
return info(handle.get(), arg_handle.get());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // INFOS_HPP
|
@@ -20,7 +20,9 @@ Kernel::Kernel(Program const & program, const char * name) : backend_(program.ba
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
h_.cl() = cl::Kernel(program.h_.cl(), name);
|
||||
cl_int err;
|
||||
h_.cl() = clCreateKernel(program.h_.cl(), name, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
throw;
|
||||
@@ -44,7 +46,7 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
h_.cl().setArg(index, size, ptr);
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, size, ptr));
|
||||
break;
|
||||
default:
|
||||
throw;
|
||||
@@ -61,7 +63,9 @@ void Kernel::setArg(unsigned int index, Buffer const & data)
|
||||
setArg(index, sizeof(CUdeviceptr), data.h_.cu.get()); break;
|
||||
}
|
||||
#endif
|
||||
case OPENCL: h_.cl().setArg(index, data.h_.cl()); break;
|
||||
case OPENCL:
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, sizeof(cl_mem), (void*)&data.h_.cl()));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -81,12 +85,12 @@ void Kernel::setSizeArg(unsigned int index, size_t N)
|
||||
case OPENCL:
|
||||
if(address_bits_==32){
|
||||
int32_t NN = N;
|
||||
h_.cl().setArg(index, 4, &NN);
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, 4, &NN));
|
||||
}
|
||||
else if(address_bits_==64)
|
||||
{
|
||||
int64_t NN = N;
|
||||
h_.cl().setArg(index, 8, &NN);
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, 8, &NN));
|
||||
}
|
||||
else
|
||||
throw;
|
||||
|
@@ -13,9 +13,9 @@ NDRange::NDRange(size_t size0, size_t size1, size_t size2)
|
||||
sizes_[2] = size2;
|
||||
}
|
||||
|
||||
NDRange::operator cl::NDRange() const
|
||||
size_t NDRange::dimension() const
|
||||
{
|
||||
return cl::NDRange(sizes_[0], sizes_[1], sizes_[2]);
|
||||
return (int)(sizes_[0]>1) + (int)(sizes_[1]>1) + (int)(sizes_[2]>1);
|
||||
}
|
||||
|
||||
NDRange::operator const size_t*() const
|
||||
|
@@ -1,6 +1,9 @@
|
||||
#include "isaac/driver/platform.h"
|
||||
#include "isaac/driver/device.h"
|
||||
#include "isaac/tools/to_string.hpp"
|
||||
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
@@ -11,7 +14,7 @@ namespace driver
|
||||
Platform::Platform(backend_type backend): backend_(backend){}
|
||||
#endif
|
||||
|
||||
Platform::Platform(cl::Platform const & platform) : backend_(OPENCL)
|
||||
Platform::Platform(cl_platform_id const & platform) : backend_(OPENCL)
|
||||
{
|
||||
cl_platform_ = platform;
|
||||
}
|
||||
@@ -26,7 +29,7 @@ std::string Platform::version() const
|
||||
cuDriverGetVersion(&version);
|
||||
return tools::to_string(version);
|
||||
#endif
|
||||
case OPENCL: return cl_platform_.getInfo<CL_PLATFORM_VERSION>();
|
||||
case OPENCL: return ocl::info<CL_PLATFORM_VERSION>(cl_platform_);
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -38,27 +41,13 @@ std::string Platform::name() const
|
||||
case CUDA: return "CUDA";
|
||||
#endif
|
||||
|
||||
case OPENCL: return cl_platform_.getInfo<CL_PLATFORM_NAME>();
|
||||
case OPENCL: return ocl::info<CL_PLATFORM_NAME>(cl_platform_);
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Platform> Platform::get()
|
||||
void Platform::devices(std::vector<Device> & devices) const
|
||||
{
|
||||
std::vector<Platform> result;
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
result.push_back(Platform(CUDA));
|
||||
#endif
|
||||
std::vector<cl::Platform> clresult;
|
||||
cl::Platform::get(&clresult);
|
||||
for(cl::Platform const & p : clresult)
|
||||
result.push_back(Platform(p));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<Device> Platform::devices() const
|
||||
{
|
||||
std::vector<Device> result;
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
@@ -67,17 +56,17 @@ std::vector<Device> Platform::devices() const
|
||||
int N;
|
||||
cuda::check(cuDeviceGetCount(&N));
|
||||
for(int i = 0 ; i < N ; ++i)
|
||||
result.push_back(Device(i));
|
||||
return result;
|
||||
devices.push_back(Device(i));
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
std::vector<cl::Device> clDevices;
|
||||
cl_platform_.getDevices(CL_DEVICE_TYPE_ALL, &clDevices);
|
||||
for(cl::Device const & d: clDevices)
|
||||
result.push_back(Device(d));
|
||||
return result;
|
||||
cl_uint ndevices;
|
||||
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
|
||||
std::vector<cl_device_id> device_ids(ndevices);
|
||||
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, ndevices, device_ids.data(), NULL));
|
||||
for(cl_device_id d : device_ids)
|
||||
devices.push_back(Device(d));
|
||||
}
|
||||
default:
|
||||
throw;
|
||||
|
@@ -8,6 +8,7 @@
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#include "helpers/cuda/vector.hpp"
|
||||
#endif
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -95,49 +96,56 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
std::vector<cl::Device> devices = context_.h_.cl().getInfo<CL_CONTEXT_DEVICES>();
|
||||
cl_int err;
|
||||
std::vector<cl_device_id> devices = ocl::info<CL_CONTEXT_DEVICES>(context_.h_.cl());
|
||||
|
||||
std::string prefix;
|
||||
for(std::vector<cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
prefix += it->getInfo<CL_DEVICE_NAME>() + it->getInfo<CL_DEVICE_VENDOR>() + it->getInfo<CL_DEVICE_VERSION>();
|
||||
for(cl_device_id dev: devices)
|
||||
prefix += ocl::info<CL_DEVICE_NAME>(dev) + ocl::info<CL_DEVICE_VENDOR>(dev) + ocl::info<CL_DEVICE_VERSION>(dev);
|
||||
std::string sha1 = tools::sha1(prefix + source);
|
||||
std::string fname(cache_path + sha1);
|
||||
|
||||
//Load cached program
|
||||
const char * build_opt = "";
|
||||
if(cache_path.size())
|
||||
{
|
||||
std::ifstream cached(fname, std::ios::binary);
|
||||
if (cached)
|
||||
{
|
||||
std::size_t len;
|
||||
std::vector<char> buffer;
|
||||
std::vector<unsigned char> buffer;
|
||||
cached.read((char*)&len, sizeof(std::size_t));
|
||||
buffer.resize(len);
|
||||
cached.read((char*)buffer.data(), std::streamsize(len));
|
||||
char* cbuffer = buffer.data();
|
||||
h_.cl() = cl::Program(context_.h_.cl(), devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)));
|
||||
h_.cl().build();
|
||||
unsigned char* cbuffer = buffer.data();
|
||||
h_.cl() = clCreateProgramWithBinary(context_.h_.cl(), devices.size(), devices.data(), &len, (const unsigned char **)&cbuffer, NULL, &err);
|
||||
ocl::check(err);
|
||||
ocl::check(clBuildProgram(h_.cl(), devices.size(), devices.data(), build_opt, NULL, NULL));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
h_.cl() = cl::Program(context_.h_.cl(), source);
|
||||
std::size_t srclen = source.size();
|
||||
const char * csrc = source.c_str();
|
||||
h_.cl() = clCreateProgramWithSource(context_.h_.cl(), 1, &csrc, &srclen, &err);
|
||||
try{
|
||||
ocl::check(h_.cl().build(devices));
|
||||
ocl::check(clBuildProgram(h_.cl(), devices.size(), devices.data(), build_opt, NULL, NULL));
|
||||
}catch(ocl::exception::build_program_failure const & e){
|
||||
for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
std::cout << "Device : " << it->getInfo<CL_DEVICE_NAME>()
|
||||
<< "Build Status = " << h_.cl().getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*it) << std::endl
|
||||
<< "Build Log = " << h_.cl().getBuildInfo<CL_PROGRAM_BUILD_LOG>(*it) << std::endl;
|
||||
for(std::vector<cl_device_id>::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
{
|
||||
std::cout << "Device : " << ocl::info<CL_DEVICE_NAME>(*it)
|
||||
<< "Build Status = " << ocl::info<CL_PROGRAM_BUILD_STATUS>(h_.cl(), *it) << std::endl
|
||||
<< "Build Log = " << ocl::info<CL_PROGRAM_BUILD_LOG>(h_.cl(),*it) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//Save cached program
|
||||
if (cache_path.size())
|
||||
{
|
||||
std::ofstream cached(fname.c_str(),std::ios::binary);
|
||||
std::vector<std::size_t> sizes = h_.cl().getInfo<CL_PROGRAM_BINARY_SIZES>();
|
||||
std::vector<std::size_t> sizes = ocl::info<CL_PROGRAM_BINARY_SIZES>(h_.cl());
|
||||
cached.write((char*)&sizes[0], sizeof(std::size_t));
|
||||
std::vector<char*> binaries = h_.cl().getInfo<CL_PROGRAM_BINARIES>();
|
||||
std::vector<unsigned char*> binaries = ocl::info<CL_PROGRAM_BINARIES>(h_.cl());
|
||||
cached.write((char*)binaries[0], std::streamsize(sizes[0]));
|
||||
}
|
||||
break;
|
||||
|
@@ -4,7 +4,6 @@
|
||||
#include <stdexcept>
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/array.h"
|
||||
#include <CL/cl.hpp>
|
||||
#include "isaac/model/model.h"
|
||||
#include "isaac/symbolic/expression.h"
|
||||
#include "isaac/symbolic/preset.h"
|
||||
|
@@ -2,7 +2,6 @@
|
||||
#include <vector>
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/value_scalar.h"
|
||||
#include <CL/cl.hpp>
|
||||
#include "isaac/symbolic/expression.h"
|
||||
#include "isaac/symbolic/preset.h"
|
||||
|
||||
|
@@ -1,5 +1,3 @@
|
||||
#include "CL/cl.hpp"
|
||||
|
||||
#include "isaac/wrap/clBLAS.h"
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/symbolic/execute.h"
|
||||
@@ -26,18 +24,17 @@ extern "C"
|
||||
{
|
||||
std::vector<is::driver::Event> waitlist;
|
||||
for(cl_uint i = 0 ; i < numEventsInWaitList ; ++i)
|
||||
waitlist.push_back(cl::Event(eventWaitList[i]));
|
||||
waitlist.push_back(eventWaitList[i]);
|
||||
for(cl_uint i = 0 ; i < numCommandQueues ; ++i)
|
||||
{
|
||||
std::list<is::driver::Event> levents;
|
||||
cl::CommandQueue queuepp(commandQueues[i]);
|
||||
is::driver::CommandQueue queue(queuepp);
|
||||
is::driver::CommandQueue queue(commandQueues[i]);
|
||||
clRetainCommandQueue(commandQueues[i]);
|
||||
is::execution_options_type options(queue, &levents, &waitlist);
|
||||
is::execute(is::control(operation, options), is::models(options.queue(context)));
|
||||
if(events)
|
||||
{
|
||||
events[i] = static_cast<cl::Event>(levents.front())();
|
||||
events[i] = levents.front().handle().cl();
|
||||
clRetainEvent(events[i]);
|
||||
}
|
||||
}
|
||||
@@ -57,9 +54,9 @@ extern "C"
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, \
|
||||
cl_event *events) \
|
||||
{ \
|
||||
is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx); \
|
||||
is::array x(N, TYPE_ISAAC, mx, offx, incx); \
|
||||
clRetainMemObject(mx); \
|
||||
is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy); \
|
||||
is::array y(N, TYPE_ISAAC, my, offy, incy); \
|
||||
clRetainMemObject(my); \
|
||||
execute(is::assign(y, alpha*x + y), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); \
|
||||
return clblasSuccess; \
|
||||
@@ -75,7 +72,7 @@ extern "C"
|
||||
cl_uint numCommandQueues, cl_command_queue *commandQueues,\
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
|
||||
{\
|
||||
is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\
|
||||
is::array x(N, TYPE_ISAAC, mx, offx, incx);\
|
||||
clRetainMemObject(mx);\
|
||||
execute(is::assign(x, alpha*x), x.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
return clblasSuccess;\
|
||||
@@ -92,9 +89,9 @@ extern "C"
|
||||
cl_uint numCommandQueues, cl_command_queue *commandQueues,\
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
|
||||
{\
|
||||
const is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\
|
||||
const is::array x(N, TYPE_ISAAC, mx, offx, incx);\
|
||||
clRetainMemObject(mx);\
|
||||
is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy);\
|
||||
is::array y(N, TYPE_ISAAC, my, offy, incy);\
|
||||
clRetainMemObject(my);\
|
||||
execute(is::assign(y, x), y.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
return clblasSuccess;\
|
||||
@@ -112,11 +109,11 @@ extern "C"
|
||||
cl_command_queue *commandQueues, cl_uint numEventsInWaitList, \
|
||||
const cl_event *eventWaitList, cl_event *events) \
|
||||
{ \
|
||||
is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx); \
|
||||
is::array x(N, TYPE_ISAAC, mx, offx, incx); \
|
||||
clRetainMemObject(mx); \
|
||||
is::array y(N, TYPE_ISAAC, cl::Buffer(my), offy, incy); \
|
||||
is::array y(N, TYPE_ISAAC, my, offy, incy); \
|
||||
clRetainMemObject(my); \
|
||||
is::scalar s(TYPE_ISAAC, cl::Buffer(dotProduct), offDP); \
|
||||
is::scalar s(TYPE_ISAAC, dotProduct, offDP); \
|
||||
clRetainMemObject(dotProduct); \
|
||||
execute(is::assign(s, dot(x,y)), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events); \
|
||||
return clblasSuccess; \
|
||||
@@ -132,9 +129,9 @@ extern "C"
|
||||
cl_mem /*scratchBuff*/, cl_uint numCommandQueues, cl_command_queue *commandQueues,\
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *events)\
|
||||
{\
|
||||
is::array x(N, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\
|
||||
is::array x(N, TYPE_ISAAC, mx, offx, incx);\
|
||||
clRetainMemObject(mx);\
|
||||
is::scalar s(TYPE_ISAAC, cl::Buffer(asum), offAsum);\
|
||||
is::scalar s(TYPE_ISAAC, asum, offAsum);\
|
||||
clRetainMemObject(asum);\
|
||||
execute(is::assign(s, sum(abs(x))), s.context(), numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);\
|
||||
return clblasSuccess;\
|
||||
@@ -159,14 +156,14 @@ extern "C"
|
||||
std::swap(M, N);\
|
||||
transA = (transA==clblasTrans)?clblasNoTrans:clblasTrans;\
|
||||
}\
|
||||
is::array A(M, N, TYPE_ISAAC, cl::Buffer(mA), offA, lda);\
|
||||
is::array A(M, N, TYPE_ISAAC, mA, offA, lda);\
|
||||
clRetainMemObject(mA);\
|
||||
\
|
||||
is::int_t sx = N, sy = M;\
|
||||
if(transA) std::swap(sx, sy);\
|
||||
is::array x(sx, TYPE_ISAAC, cl::Buffer(mx), offx, incx);\
|
||||
is::array x(sx, TYPE_ISAAC, mx, offx, incx);\
|
||||
clRetainMemObject(mx);\
|
||||
is::array y(sy, TYPE_ISAAC, cl::Buffer(my), offy, incy);\
|
||||
is::array y(sy, TYPE_ISAAC, my, offy, incy);\
|
||||
clRetainMemObject(my);\
|
||||
\
|
||||
is::driver::Context const & context = A.context();\
|
||||
@@ -207,11 +204,11 @@ extern "C"
|
||||
if(transA==clblasTrans) std::swap(As1, As2);\
|
||||
if(transB==clblasTrans) std::swap(Bs1, Bs2);\
|
||||
/*Struct*/\
|
||||
is::array A(As1, As2, TYPE_ISAAC, cl::Buffer(mA), offA, lda);\
|
||||
is::array A(As1, As2, TYPE_ISAAC, mA, offA, lda);\
|
||||
clRetainMemObject(mA);\
|
||||
is::array B(Bs1, Bs2, TYPE_ISAAC, cl::Buffer(mB), offB, ldb);\
|
||||
is::array B(Bs1, Bs2, TYPE_ISAAC, mB, offB, ldb);\
|
||||
clRetainMemObject(mB);\
|
||||
is::array C(M, N, TYPE_ISAAC, cl::Buffer(mC), offC, ldc);\
|
||||
is::array C(M, N, TYPE_ISAAC, mC, offC, ldc);\
|
||||
clRetainMemObject(mC);\
|
||||
is::driver::Context const & context = C.context();\
|
||||
/*Operation*/\
|
||||
|
@@ -115,7 +115,7 @@ def main():
|
||||
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/dot.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/mapped_object.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
@@ -17,7 +17,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
|
||||
isc::numeric_type dtype = x.dtype();
|
||||
isc::driver::Context const & ctx = x.context();
|
||||
isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
|
||||
cl_command_queue clqueue = queue.handle().cl()();
|
||||
cl_command_queue clqueue = queue.handle().cl();
|
||||
int_t N = cz.size();
|
||||
|
||||
T aa = -4.378, bb=3.5;
|
||||
|
@@ -16,7 +16,7 @@ enum interface_t
|
||||
CPP
|
||||
};
|
||||
|
||||
#define CHANDLE(X) X.data().handle().cl()()
|
||||
#define CHANDLE(X) X.data().handle().cl()
|
||||
#define OFF(X) X.start()[0] + X.start()[1]*X.ld()
|
||||
#define LD(X) X.ld()
|
||||
|
||||
|
@@ -16,7 +16,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
|
||||
isc::driver::Context const & ctx = x.context();
|
||||
int_t N = cx.size();
|
||||
isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
|
||||
cl_command_queue clqueue = queue.handle().cl()();
|
||||
cl_command_queue clqueue = queue.handle().cl();
|
||||
isc::array scratch(N, x.dtype());
|
||||
|
||||
unsigned int failure_count = 0;
|
||||
|
@@ -55,7 +55,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
||||
|
||||
if(interf==clBLAS)
|
||||
{
|
||||
cl_command_queue clqueue = queue.handle().cl()();
|
||||
cl_command_queue clqueue = queue.handle().cl();
|
||||
|
||||
//// //Row-major
|
||||
RUN_TEST("GEMM(ROW, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),
|
||||
|
@@ -47,7 +47,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
||||
|
||||
if(interf==clBLAS)
|
||||
{
|
||||
cl_command_queue clqueue = queue.handle().cl()();
|
||||
cl_command_queue clqueue = queue.handle().cl();
|
||||
|
||||
|
||||
TEST_OPERATION("GEMV(ROW, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = alpha*yi + beta*cy[i],
|
||||
|
Reference in New Issue
Block a user