Driver: made cl and cu attributes private in Handle<>

This commit is contained in:
Philippe Tillet
2015-07-23 09:39:13 -07:00
parent 3e7791ad07
commit a2b533b9a8
15 changed files with 80 additions and 62 deletions

View File

@@ -95,7 +95,8 @@ void bench(isc::numeric_type dtype, std::string operation)
//
// MACROS FOR BENCHMARKING
//
#define CL_HANDLE(X) (*X.handle().cl)()
#define CL_HANDLE(X) X.handle().cl()()
#define BENCHMARK_ISAAC(OP, PERF) \
{\
std::vector<long> times;\

View File

@@ -38,11 +38,16 @@ public:
Handle(backend_type backend);
bool operator==(Handle const & other) const;
bool operator<(Handle const & other) const;
CLType & cl();
CLType const & cl() const;
#ifdef ISAAC_WITH_CUDA
CUTYPE & cu();
#endif
~Handle();
public:
std::shared_ptr<CLType> cl;
std::shared_ptr<CUType> cu;
private:
std::shared_ptr<CLType> cl_;
std::shared_ptr<CUType> cu_;
private:
backend_type backend_;
};

View File

@@ -9,7 +9,7 @@ namespace driver
Buffer::Buffer(cl::Buffer const & buffer) : backend_(OPENCL), context_(buffer.getInfo<CL_MEM_CONTEXT>()), h_(backend_)
{
*h_.cl = buffer;
h_.cl() = buffer;
}
Buffer::Buffer(Context const & context, std::size_t size) : backend_(context.backend_), context_(context), h_(backend_)
@@ -24,7 +24,7 @@ Buffer::Buffer(Context const & context, std::size_t size) : backend_(context.bac
#endif
case OPENCL:
cl_int err;
*h_.cl = cl::Buffer(*context.h_.cl, CL_MEM_READ_WRITE, size, NULL, &err);
h_.cl() = cl::Buffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
ocl::check(err);
break;
default:

View File

@@ -15,7 +15,7 @@ namespace driver
CommandQueue::CommandQueue(cl::CommandQueue const & queue) : backend_(OPENCL), context_(queue.getInfo<CL_QUEUE_CONTEXT>()), device_(queue.getInfo<CL_QUEUE_DEVICE>()), h_(backend_)
{
*h_.cl = queue;
h_.cl() = queue;
}
CommandQueue::CommandQueue(Context const & context, Device const & device, cl_command_queue_properties properties): backend_(device.backend_), context_(context), device_(device), h_(backend_)
@@ -29,7 +29,7 @@ CommandQueue::CommandQueue(Context const & context, Device const & device, cl_co
#endif
case OPENCL:
cl_int err;
*h_.cl = cl::CommandQueue(*context.h_.cl, *device.h_.cl, properties, &err);
h_.cl() = cl::CommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
ocl::check(err);
break;
default: throw;
@@ -49,7 +49,7 @@ void CommandQueue::synchronize()
#ifdef ISAAC_WITH_CUDA
case CUDA: cuda::check(cuStreamSynchronize(*h_.cu)); break;
#endif
case OPENCL: h_.cl->finish(); break;
case OPENCL: h_.cl().finish(); break;
default: throw;
}
}
@@ -68,7 +68,7 @@ Event CommandQueue::enqueue(Kernel const & kernel, NDRange global, driver::NDRan
break;
#endif
case OPENCL:
ocl::check(h_.cl->enqueueNDRangeKernel(*kernel.h_.cl, cl::NullRange, (cl::NDRange)global, (cl::NDRange)local, NULL, event.h_.cl.get()));
ocl::check(h_.cl().enqueueNDRangeKernel(kernel.h_.cl(), cl::NullRange, (cl::NDRange)global, (cl::NDRange)local, NULL, &event.h_.cl()));
break;
default: throw;
}
@@ -88,7 +88,7 @@ void CommandQueue::write(Buffer const & buffer, bool blocking, std::size_t offse
break;
#endif
case OPENCL:
h_.cl->enqueueWriteBuffer(*buffer.h_.cl, blocking, offset, size, ptr);
h_.cl().enqueueWriteBuffer(buffer.h_.cl(), blocking, offset, size, ptr);
break;
default: throw;
}
@@ -107,7 +107,7 @@ void CommandQueue::read(Buffer const & buffer, bool blocking, std::size_t offset
break;
#endif
case OPENCL:
h_.cl->enqueueReadBuffer(*buffer.h_.cl, blocking, offset, size, ptr);
h_.cl().enqueueReadBuffer(buffer.h_.cl(), blocking, offset, size, ptr);
break;
default: throw;
}

View File

@@ -9,7 +9,7 @@ namespace driver
Context::Context(cl::Context const & context) : backend_(OPENCL), device_(context.getInfo<CL_CONTEXT_DEVICES>()[0]), h_(backend_)
{
*h_.cl = context;
h_.cl() = context;
}
Context::Context(Device const & device) : backend_(device.backend_), device_(device), h_(backend_)
@@ -30,7 +30,7 @@ Context::Context(Device const & device) : backend_(device.backend_), device_(dev
#endif
case OPENCL:
cl_int err;
*h_.cl = cl::Context(std::vector<cl::Device>(1, *device_.h_.cl), NULL, NULL, NULL, &err);
h_.cl() = cl::Context(std::vector<cl::Device>(1, device_.h_.cl()), NULL, NULL, NULL, &err);
ocl::check(err);
break;
default:

View File

@@ -24,7 +24,7 @@ Device::Device(int ordinal): backend_(CUDA), h_(backend_)
Device::Device(cl::Device const & device) : backend_(OPENCL), h_(backend_)
{ *h_.cl = device; }
{ h_.cl() = device; }
backend_type Device::backend() const
{ return backend_; }
@@ -36,7 +36,7 @@ unsigned int Device::address_bits() const
#ifdef ISAAC_WITH_CUDA
case CUDA: return sizeof(long long)*8;
#endif
case OPENCL: return h_.cl->getInfo<CL_DEVICE_ADDRESS_BITS>();
case OPENCL: return h_.cl().getInfo<CL_DEVICE_ADDRESS_BITS>();
default: throw;
}
@@ -50,7 +50,7 @@ driver::Platform Device::platform() const
#ifdef ISAAC_WITH_CUDA
case CUDA: return Platform(CUDA);
#endif
case OPENCL: return Platform(h_.cl->getInfo<CL_DEVICE_PLATFORM>());
case OPENCL: return Platform(h_.cl().getInfo<CL_DEVICE_PLATFORM>());
default: throw;
}
}
@@ -65,7 +65,7 @@ std::string Device::name() const
cuda::check(cuDeviceGetName(tmp, 128, *h_.cu));
return std::string(tmp);
#endif
case OPENCL: return h_.cl->getInfo<CL_DEVICE_NAME>();
case OPENCL: return h_.cl().getInfo<CL_DEVICE_NAME>();
default: throw;
}
}
@@ -77,7 +77,7 @@ std::string Device::vendor_str() const
#ifdef ISAAC_WITH_CUDA
case CUDA: return "NVidia";
#endif
case OPENCL: return h_.cl->getInfo<CL_DEVICE_VENDOR>();
case OPENCL: return h_.cl().getInfo<CL_DEVICE_VENDOR>();
default: throw;
}
}
@@ -111,7 +111,7 @@ std::vector<size_t> Device::max_work_item_sizes() const
}
#endif
case OPENCL:
return h_.cl->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
return h_.cl().getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
default:
throw;
}
@@ -124,7 +124,7 @@ device_type Device::type() const
#ifdef ISAAC_WITH_CUDA
case CUDA: return DEVICE_TYPE_GPU;
#endif
case OPENCL: return static_cast<device_type>(h_.cl->getInfo<CL_DEVICE_TYPE>());
case OPENCL: return static_cast<device_type>(h_.cl().getInfo<CL_DEVICE_TYPE>());
default: throw;
}
}
@@ -138,7 +138,7 @@ std::string Device::extensions() const
return "";
#endif
case OPENCL:
return h_.cl->getInfo<CL_DEVICE_EXTENSIONS>();
return h_.cl().getInfo<CL_DEVICE_EXTENSIONS>();
default: throw;
}
}
@@ -155,7 +155,7 @@ std::string Device::extensions() const
switch(backend_)\
{\
CUDACASE(CUNAME)\
case OPENCL: return h_.cl->getInfo<CLNAME>();\
case OPENCL: return h_.cl().getInfo<CLNAME>();\
default: throw;\
}\
}\
@@ -172,7 +172,7 @@ std::pair<unsigned int, unsigned int> Device::nv_compute_capability() const
switch(backend_)
{
case OPENCL:
return std::pair<unsigned int, unsigned int>( h_.cl->getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(), h_.cl->getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV> ());
return std::pair<unsigned int, unsigned int>( h_.cl().getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(), h_.cl().getInfo<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV> ());
#ifdef ISAAC_WITH_CUDA
case CUDA:
return std::pair<unsigned int, unsigned int>(cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>(), cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>());

View File

@@ -23,7 +23,7 @@ Event::Event(backend_type backend) : backend_(backend), h_(backend_)
Event::Event(cl::Event const & event) : backend_(OPENCL), h_(backend_)
{
*h_.cl = event;
h_.cl() = event;
}
long Event::elapsed_time() const
@@ -37,7 +37,7 @@ long Event::elapsed_time() const
return 1e6*time;
#endif
case OPENCL:
return (h_.cl->getProfilingInfo<CL_PROFILING_COMMAND_END>() - h_.cl->getProfilingInfo<CL_PROFILING_COMMAND_START>());
return (h_.cl().getProfilingInfo<CL_PROFILING_COMMAND_END>() - h_.cl().getProfilingInfo<CL_PROFILING_COMMAND_START>());
default:
throw;
}
@@ -45,7 +45,7 @@ long Event::elapsed_time() const
Event::operator cl::Event()
{
return *h_.cl;
return h_.cl();
}
}

View File

@@ -41,9 +41,9 @@ Handle<CLType, CUType>::Handle(backend_type backend): backend_(backend)
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: cu.reset(new CUType());
case CUDA: cu_.reset(new CUType());
#endif
case OPENCL: cl.reset(new CLType());
case OPENCL: cl_.reset(new CLType());
}
}
@@ -52,10 +52,10 @@ bool Handle<CLType, CUType>::operator==(Handle const & other) const
{
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==CUDA)
return (*cu)==(*other.cu);
return cu()==other.cu();
#endif
if(backend_==OPENCL && other.backend_==OPENCL)
return (*cl)()==(*other.cl)();
return cl()()==other.cl()();
return false;
}
@@ -64,10 +64,10 @@ bool Handle<CLType, CUType>::operator<(Handle const & other) const
{
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==CUDA)
return (*cu)<(*other.cu);
return (*cu_)<(*other.cu_);
#endif
if(backend_==OPENCL && other.backend_==OPENCL)
return (*cl)()<(*other.cl)();
return (*cl_)()<(*other.cl_)();
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==OPENCL)
return true;
@@ -78,21 +78,33 @@ bool Handle<CLType, CUType>::operator<(Handle const & other) const
template<class CLType, class CUType>
Handle<CLType, CUType>::~Handle()
{
if(cu && cu.unique())
if(cu_ && cu_.unique())
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: _delete(*cu); break;
case CUDA: _delete(*cu_); break;
#endif
default: break;
}
}
}
template<class CLType, class CUType>
CLType & Handle<CLType, CUType>::cl()
{ return *cl_; }
template<class CLType, class CUType>
CLType const & Handle<CLType, CUType>::cl() const
{ return *cl_; }
#ifdef ISAAC_WITH_CUDA
template<class CLType, class CUType>
CUType & Handle<CLType, CUType>::cu()
{
return *cu_;
}
template class Handle<cl::Buffer, CUdeviceptr>;
template class Handle<cl::CommandQueue, CUstream>;
template class Handle<cl::Context, CUcontext>;

View File

@@ -20,7 +20,7 @@ Kernel::Kernel(Program const & program, const char * name) : backend_(program.ba
break;
#endif
case OPENCL:
*h_.cl = cl::Kernel(*program.h_.cl, name);
h_.cl() = cl::Kernel(program.h_.cl(), name);
break;
default:
throw;
@@ -44,7 +44,7 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
break;
#endif
case OPENCL:
h_.cl->setArg(index, size, ptr);
h_.cl().setArg(index, size, ptr);
break;
default:
throw;
@@ -61,7 +61,7 @@ void Kernel::setArg(unsigned int index, Buffer const & data)
setArg(index, sizeof(CUdeviceptr), data.h_.cu.get()); break;
}
#endif
case OPENCL: h_.cl->setArg(index, *data.h_.cl); break;
case OPENCL: h_.cl().setArg(index, data.h_.cl()); break;
default: throw;
}
}
@@ -81,12 +81,12 @@ void Kernel::setSizeArg(unsigned int index, size_t N)
case OPENCL:
if(address_bits_==32){
int32_t NN = N;
h_.cl->setArg(index, 4, &NN);
h_.cl().setArg(index, 4, &NN);
}
else if(address_bits_==64)
{
int64_t NN = N;
h_.cl->setArg(index, 8, &NN);
h_.cl().setArg(index, 8, &NN);
}
else
throw;

View File

@@ -95,7 +95,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
#endif
case OPENCL:
{
std::vector<cl::Device> devices = context_.h_.cl->getInfo<CL_CONTEXT_DEVICES>();
std::vector<cl::Device> devices = context_.h_.cl().getInfo<CL_CONTEXT_DEVICES>();
std::string prefix;
for(std::vector<cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
@@ -115,29 +115,29 @@ Program::Program(Context const & context, std::string const & source) : backend_
buffer.resize(len);
cached.read((char*)buffer.data(), std::streamsize(len));
char* cbuffer = buffer.data();
*h_.cl = cl::Program(*context_.h_.cl, devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)));
h_.cl->build();
h_.cl() = cl::Program(context_.h_.cl(), devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)));
h_.cl().build();
return;
}
}
*h_.cl = cl::Program(*context_.h_.cl, source);
h_.cl() = cl::Program(context_.h_.cl(), source);
try{
ocl::check(h_.cl->build(devices));
ocl::check(h_.cl().build(devices));
}catch(ocl::exception::build_program_failure const & e){
for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
std::cout << "Device : " << it->getInfo<CL_DEVICE_NAME>()
<< "Build Status = " << h_.cl->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*it) << std::endl
<< "Build Log = " << h_.cl->getBuildInfo<CL_PROGRAM_BUILD_LOG>(*it) << std::endl;
<< "Build Status = " << h_.cl().getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*it) << std::endl
<< "Build Log = " << h_.cl().getBuildInfo<CL_PROGRAM_BUILD_LOG>(*it) << std::endl;
}
//Save cached program
if (cache_path.size())
{
std::ofstream cached(fname.c_str(),std::ios::binary);
std::vector<std::size_t> sizes = h_.cl->getInfo<CL_PROGRAM_BINARY_SIZES>();
std::vector<std::size_t> sizes = h_.cl().getInfo<CL_PROGRAM_BINARY_SIZES>();
cached.write((char*)&sizes[0], sizeof(std::size_t));
std::vector<char*> binaries = h_.cl->getInfo<CL_PROGRAM_BINARIES>();
std::vector<char*> binaries = h_.cl().getInfo<CL_PROGRAM_BINARIES>();
cached.write((char*)binaries[0], std::streamsize(sizes[0]));
}
break;

View File

@@ -17,7 +17,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
isc::numeric_type dtype = x.dtype();
isc::driver::Context const & ctx = x.context();
isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
cl_command_queue clqueue = (*queue.handle().cl)();
cl_command_queue clqueue = queue.handle().cl()();
int_t N = cz.size();
T aa = -4.378, bb=3.5;
@@ -45,15 +45,15 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
}
#define PREFIX "[C]"
RUN_TEST_VECTOR_AXPY("AXPY", cz[i] = a*cx[i] + cz[i], BLAS<T>::F(clblasSaxpy, clblasDaxpy)(N, a, (*x.data().handle().cl)(), x.start()[0], x.stride()[0],
(*z.data().handle().cl)(), z.start()[0], z.stride()[0],
RUN_TEST_VECTOR_AXPY("AXPY", cz[i] = a*cx[i] + cz[i], BLAS<T>::F(clblasSaxpy, clblasDaxpy)(N, a, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(z), z.start()[0], z.stride()[0],
1, &clqueue, 0, NULL, NULL));
RUN_TEST_VECTOR_AXPY("COPY", cz[i] = cx[i], BLAS<T>::F(clblasScopy, clblasDcopy)(N, (*x.data().handle().cl)(), x.start()[0], x.stride()[0],
(*z.data().handle().cl)(), z.start()[0], z.stride()[0],
RUN_TEST_VECTOR_AXPY("COPY", cz[i] = cx[i], BLAS<T>::F(clblasScopy, clblasDcopy)(N, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(z), z.start()[0], z.stride()[0],
1, &clqueue, 0, NULL, NULL));
RUN_TEST_VECTOR_AXPY("SCAL", cz[i] = a*cz[i], BLAS<T>::F(clblasSscal, clblasDscal)(N, a, (*z.data().handle().cl)(), z.start()[0], z.stride()[0],
RUN_TEST_VECTOR_AXPY("SCAL", cz[i] = a*cz[i], BLAS<T>::F(clblasSscal, clblasDscal)(N, a, CHANDLE(z), z.start()[0], z.stride()[0],
1, &clqueue, 0, NULL, NULL));

View File

@@ -16,7 +16,7 @@ enum interface_t
CPP
};
#define CHANDLE(X) (*X.data().handle().cl)()
#define CHANDLE(X) X.data().handle().cl()()
#define OFF(X) X.start()[0] + X.start()[1]*X.ld()
#define LD(X) X.ld()

View File

@@ -16,7 +16,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
isc::driver::Context const & ctx = x.context();
int_t N = cx.size();
isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
cl_command_queue clqueue = (*queue.handle().cl)();
cl_command_queue clqueue = queue.handle().cl()();
isc::array scratch(N, x.dtype());
unsigned int failure_count = 0;
@@ -45,10 +45,10 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
cout << endl;
#define PREFIX "[C]"
RUN_TEST("DOT", cs+=cx[i]*cy[i], 0, cs, BLAS<T>::F(clblasSdot, clblasDdot)(N, (*ds.data().handle().cl)(), 0, (*x.data().handle().cl)(), x.start()[0], x.stride()[0],
(*y.data().handle().cl)(), y.start()[0], y.stride()[0],
RUN_TEST("DOT", cs+=cx[i]*cy[i], 0, cs, BLAS<T>::F(clblasSdot, clblasDdot)(N, CHANDLE(ds), 0, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(y), y.start()[0], y.stride()[0],
CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL));
RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, BLAS<T>::F(clblasSasum, clblasDasum)(N, (*ds.data().handle().cl)(), 0, (*x.data().handle().cl)(), x.start()[0], x.stride()[0],
RUN_TEST("ASUM", cs+=std::fabs(cx[i]), 0, cs, BLAS<T>::F(clblasSasum, clblasDasum)(N, CHANDLE(ds), 0, CHANDLE(x), x.start()[0], x.stride()[0],
CHANDLE(scratch), 1, &clqueue, 0, NULL, NULL));
#undef PREFIX
#define PREFIX "[C++]"

View File

@@ -55,7 +55,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
if(interf==clBLAS)
{
cl_command_queue clqueue = (*queue.handle().cl)();
cl_command_queue clqueue = queue.handle().cl()();
//// //Row-major
RUN_TEST("GEMM(ROW, N, N)", BLAS<T>::F(clblasSgemm,clblasDgemm)(clblasRowMajor, clblasNoTrans, clblasNoTrans, N, M, K, alpha, CHANDLE(B), OFF(B), LD(B),

View File

@@ -47,7 +47,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
if(interf==clBLAS)
{
cl_command_queue clqueue = (*queue.handle().cl)();
cl_command_queue clqueue = queue.handle().cl()();
TEST_OPERATION("GEMV(ROW, NoTrans)", M, N, yi+=cA(i,j)*cx[j], cy[i] = alpha*yi + beta*cy[i],