Added a control flow API
This commit is contained in:
@@ -32,7 +32,7 @@ void bench(ad::numeric_type dtype)
|
|||||||
times.clear();\
|
times.clear();\
|
||||||
total_time = 0;\
|
total_time = 0;\
|
||||||
OP;\
|
OP;\
|
||||||
while(total_time < 5e-1){\
|
while(total_time < 1e-1){\
|
||||||
timer.start(); \
|
timer.start(); \
|
||||||
OP;\
|
OP;\
|
||||||
times.push_back(timer.get());\
|
times.push_back(timer.get());\
|
||||||
|
@@ -5,38 +5,47 @@
|
|||||||
|
|
||||||
namespace ad = atidlas;
|
namespace ad = atidlas;
|
||||||
|
|
||||||
|
#ifdef BENCH_CUBLAS
|
||||||
|
__global__ void dummy(){}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
for(ad::cl_ext::queues_t::iterator it = ad::cl_ext::queues.begin() ; it != ad::cl_ext::queues.end() ; ++it)
|
||||||
{
|
{
|
||||||
ad::array x(10, ad::FLOAT_TYPE, it->first);
|
cl::CommandQueue queue = it->second[0];
|
||||||
cl::Device device = it->second[0].getInfo<CL_QUEUE_DEVICE>();
|
cl::Device device = queue.getInfo<CL_QUEUE_DEVICE>();
|
||||||
ad::tools::timer t;
|
cl::Program program("__kernel void dummy(){}");
|
||||||
|
program.build();
|
||||||
|
cl::Kernel kernel(program, "dummy");
|
||||||
|
|
||||||
|
cl::NDRange offset = cl::NullRange;
|
||||||
|
cl::NDRange global(1);
|
||||||
|
cl::NDRange local(1);
|
||||||
|
|
||||||
|
cl::Event event;
|
||||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||||
std::cout << "-------------------------" << std::endl;
|
std::cout << "-------------------------" << std::endl;
|
||||||
x = x + x;
|
|
||||||
ad::cl_ext::synchronize(x.context());
|
|
||||||
t.start();\
|
|
||||||
for(unsigned int i = 0 ; i < 100 ; ++i){
|
|
||||||
x = x + x;
|
|
||||||
ad::cl_ext::synchronize(x.context());
|
|
||||||
}
|
|
||||||
std::cout << "Kernel launch overhead: " << t.get()/100 << std::endl;
|
|
||||||
std::cout << "Expression tree creation:" << std::endl;
|
|
||||||
#define BENCH(CREATE, STR) \
|
|
||||||
{\
|
|
||||||
ad::array_expression tmp1(CREATE);\
|
|
||||||
t.start();\
|
|
||||||
for(unsigned int i = 0 ; i < 1000 ; ++i)\
|
|
||||||
ad::array_expression tmp2(CREATE);\
|
|
||||||
std::cout << STR << ": " << t.get()/1000 << std::endl;\
|
|
||||||
}
|
|
||||||
|
|
||||||
BENCH(x + x, "2 terms");
|
queue.enqueueNDRangeKernel(kernel, offset, global, local, NULL, &event);
|
||||||
BENCH(x + x + x, "3 terms");
|
queue.flush();
|
||||||
BENCH(x + x + x + x, "4 terms");
|
queue.finish();
|
||||||
BENCH(x + x + x + x + x, "5 terms");
|
|
||||||
#undef BENCH
|
float time = event.getProfilingInfo<CL_PROFILING_COMMAND_START>() - event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
|
||||||
|
std::cout << "Kernel launch overhead: " << time << std::endl;
|
||||||
|
|
||||||
|
#ifdef BENCH_CUBLAS
|
||||||
|
cudaEvent_t start, stop;
|
||||||
|
cudaEventCreate(&start);
|
||||||
|
cudaEventCreate(&stop);
|
||||||
|
cudaEventRecord(start);
|
||||||
|
dummy<<1, 1>>>();
|
||||||
|
cudaEventRecord(stop);
|
||||||
|
cudaEventSynchronize();
|
||||||
|
cudaEventElapsedTime(&time, start, stop);
|
||||||
|
std::cout << "CUDA Kernel launch overhead: " << time << std::endl;
|
||||||
|
#endif
|
||||||
std::cout << "-------------------------" << std::endl;
|
std::cout << "-------------------------" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,16 +1,8 @@
|
|||||||
file(GLOB SYSTEM_STUDIO_ROOT /opt/intel/system_studio_*)
|
file(GLOB SYSTEM_STUDIO_ROOT /opt/intel/system_studio_*)
|
||||||
|
|
||||||
find_path(MKL_INCLUDE_DIR mkl_blas.h
|
find_path(MKL_INCLUDE_DIR mkl_blas.h HINTS ${SYSTEM_STUDIO_ROOT}/mkl/include/)
|
||||||
HINTS
|
find_library(MKL_LIBRARIES NAMES mkl_core HINTS ${SYSTEM_STUDIO_ROOT}/mkl/lib/intel64/)
|
||||||
${SYSTEM_STUDIO_ROOT}/mkl/include/)
|
find_library(ICC_LIBRARIES NAMES iomp5 HINTS ${SYSTEM_STUDIO_ROOT}/compiler/lib/intel64/)
|
||||||
|
|
||||||
find_library(MKL_LIBRARIES NAMES mkl_core
|
|
||||||
HINTS
|
|
||||||
${SYSTEM_STUDIO_ROOT}/mkl/lib/intel64/)
|
|
||||||
|
|
||||||
find_library(ICC_LIBRARIES NAMES iomp5
|
|
||||||
HINTS
|
|
||||||
${SYSTEM_STUDIO_ROOT}/compiler/lib/intel64/)
|
|
||||||
|
|
||||||
if(ICC_LIBRARIES)
|
if(ICC_LIBRARIES)
|
||||||
set(OMP_LIBRARIES ${ICC_LIBRARIES})
|
set(OMP_LIBRARIES ${ICC_LIBRARIES})
|
||||||
@@ -18,7 +10,6 @@ else()
|
|||||||
set(OMP_LIBRARIES gomp)
|
set(OMP_LIBRARIES gomp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
if(MKL_LIBRARIES AND OMP_LIBRARIES)
|
if(MKL_LIBRARIES AND OMP_LIBRARIES)
|
||||||
set(MKL_LIBRARIES mkl_intel_lp64 mkl_avx mkl_intel_thread ${MKL_LIBRARIES} ${OMP_LIBRARIES} pthread)
|
set(MKL_LIBRARIES mkl_intel_lp64 mkl_avx mkl_intel_thread ${MKL_LIBRARIES} ${OMP_LIBRARIES} pthread)
|
||||||
endif()
|
endif()
|
||||||
|
@@ -1,88 +1,9 @@
|
|||||||
# - Find the OpenCL headers and library
|
file(GLOB AMDAPPSDK_ROOT /opt/AMDAPPSDK*)
|
||||||
#
|
|
||||||
# Defines the following if found:
|
|
||||||
# OPENCL_FOUND : TRUE if found, FALSE otherwise
|
|
||||||
# OPENCL_INCLUDE_DIRS : Include directories for OpenCL
|
|
||||||
# OPENCL_LIBRARIES : The libraries to link against
|
|
||||||
#
|
|
||||||
# The user can set the OPENCLROOT environment variable to help finding OpenCL
|
|
||||||
# if it is installed in a non-standard place.
|
|
||||||
|
|
||||||
set(ENV_ATISTREAMSDKROOT $ENV{ATISTREAMSDKROOT})
|
find_package(CUDA QUIET)
|
||||||
if(ENV_ATISTREAMSDKROOT)
|
find_path(OPENCL_INCLUDE_DIR CL/cl.hpp HINTS ${AMDAPPSDK_ROOT}/include/ ${CUDA_SDK_ROOT_DIR}/include)
|
||||||
set(ENV_OPENCLROOT $ENV{ATISTREAMSDKROOT})
|
find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${AMDAPPSDK_ROOT}/lib/x86_64/ ${CUDA_SDK_ROOT_DIR}/lib64)
|
||||||
endif(ENV_ATISTREAMSDKROOT)
|
|
||||||
|
|
||||||
set(ENV_AMDAPPSDKROOT $ENV{AMDAPPSDKROOT})
|
|
||||||
if(ENV_AMDAPPSDKROOT)
|
|
||||||
set(ENV_OPENCLROOT $ENV{AMDAPPSDKROOT})
|
|
||||||
endif(ENV_AMDAPPSDKROOT)
|
|
||||||
|
|
||||||
set(ENV_INTELOCLSDKROOT $ENV{INTELOCLSDKROOT})
|
|
||||||
if(ENV_INTELOCLSDKROOT)
|
|
||||||
set(ENV_OPENCLROOT $ENV{INTELOCLSDKROOT})
|
|
||||||
endif(ENV_INTELOCLSDKROOT)
|
|
||||||
|
|
||||||
set(ENV_OPENCLROOT2 $ENV{OPENCLROOT})
|
|
||||||
if(ENV_OPENCLROOT2)
|
|
||||||
set(ENV_OPENCLROOT $ENV{OPENCLROOT})
|
|
||||||
endif(ENV_OPENCLROOT2)
|
|
||||||
|
|
||||||
if(ENV_OPENCLROOT)
|
|
||||||
find_path(
|
|
||||||
OPENCL_INCLUDE_DIR
|
|
||||||
NAMES CL/cl.h OpenCL/cl.h
|
|
||||||
PATHS ${ENV_OPENCLROOT}/include
|
|
||||||
#NO_DEFAULT_PATH #uncomment this is you wish to surpress the use of default paths for OpenCL
|
|
||||||
)
|
|
||||||
|
|
||||||
if (("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") OR (${CMAKE_SYSTEM_NAME} MATCHES "Windows"))
|
|
||||||
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
set(OPENCL_LIB_SEARCH_PATH
|
|
||||||
${OPENCL_LIB_SEARCH_PATH}
|
|
||||||
${ENV_OPENCLROOT}/lib/x86)
|
|
||||||
else(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
set(OPENCL_LIB_SEARCH_PATH
|
|
||||||
${OPENCL_LIB_SEARCH_PATH}
|
|
||||||
${ENV_OPENCLROOT}/lib/x86_64)
|
|
||||||
endif(CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
endif(("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") OR (${CMAKE_SYSTEM_NAME} MATCHES "Windows"))
|
|
||||||
find_library(
|
|
||||||
OPENCL_LIBRARY
|
|
||||||
NAMES OpenCL
|
|
||||||
PATHS ${OPENCL_LIB_SEARCH_PATH}
|
|
||||||
#NO_DEFAULT_PATH #uncomment this is you wish to surpress the use of default paths for OpenCL
|
|
||||||
)
|
|
||||||
else(ENV_OPENCLROOT)
|
|
||||||
find_path(
|
|
||||||
OPENCL_INCLUDE_DIR
|
|
||||||
NAMES CL/cl.h OpenCL/cl.h
|
|
||||||
PATHS ${PROJECT_SOURCE_DIR} #use the CL/ include folder provided with ViennaCL
|
|
||||||
)
|
|
||||||
|
|
||||||
find_library(
|
|
||||||
OPENCL_LIBRARY
|
|
||||||
NAMES OpenCL
|
|
||||||
)
|
|
||||||
endif(ENV_OPENCLROOT)
|
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
include(FindPackageHandleStandardArgs)
|
||||||
find_package_handle_standard_args(
|
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIR)
|
||||||
OPENCL
|
mark_as_advanced(OpenCL)
|
||||||
DEFAULT_MSG
|
|
||||||
OPENCL_LIBRARY OPENCL_INCLUDE_DIR
|
|
||||||
)
|
|
||||||
|
|
||||||
if(OPENCL_FOUND)
|
|
||||||
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
|
|
||||||
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
|
|
||||||
else(OPENCL_FOUND)
|
|
||||||
set(OPENCL_INCLUDE_DIRS)
|
|
||||||
set(OPENCL_LIBRARIES)
|
|
||||||
endif(OPENCL_FOUND)
|
|
||||||
|
|
||||||
mark_as_advanced(
|
|
||||||
OPENCL_INCLUDE_DIR
|
|
||||||
OPENCL_LIBRARY
|
|
||||||
)
|
|
||||||
|
|
||||||
|
@@ -15,6 +15,7 @@ class scalar;
|
|||||||
class array: public obj_base
|
class array: public obj_base
|
||||||
{
|
{
|
||||||
friend array reshape(array const &, int_t, int_t);
|
friend array reshape(array const &, int_t, int_t);
|
||||||
|
friend array reshape(array_expression const &, int_t, int_t);
|
||||||
static array_infos init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld);
|
static array_infos init_infos(numeric_type dtype, cl_mem data, int_t shape1, int_t shape2, int_t start1, int_t start2, int_t stride1, int_t stride2, int_t ld);
|
||||||
public:
|
public:
|
||||||
//1D Constructors
|
//1D Constructors
|
||||||
@@ -31,7 +32,7 @@ public:
|
|||||||
|
|
||||||
//General constructor
|
//General constructor
|
||||||
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
|
array(numeric_type dtype, cl::Buffer data, slice const & s1, slice const & s2, int_t ld, cl::Context context = cl_ext::default_context());
|
||||||
array(array_expression const & proxy);
|
array(control const & proxy);
|
||||||
array(array const &);
|
array(array const &);
|
||||||
|
|
||||||
//Getters
|
//Getters
|
||||||
@@ -50,7 +51,7 @@ public:
|
|||||||
|
|
||||||
//Numeric operators
|
//Numeric operators
|
||||||
array& operator=(array const &);
|
array& operator=(array const &);
|
||||||
array& operator=(array_expression const &);
|
array& operator=(control const &);
|
||||||
template<class T> array & operator=(std::vector<T> const & rhs);
|
template<class T> array & operator=(std::vector<T> const & rhs);
|
||||||
|
|
||||||
array_expression operator-();
|
array_expression operator-();
|
||||||
@@ -90,7 +91,7 @@ public:
|
|||||||
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
|
explicit scalar(numeric_type dtype, cl::Buffer const & data, int_t offset, cl::Context context = cl_ext::default_context());
|
||||||
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
|
explicit scalar(value_scalar value, cl::Context context = cl_ext::default_context());
|
||||||
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
|
explicit scalar(numeric_type dtype, cl::Context context = cl_ext::default_context());
|
||||||
scalar(array_expression const & proxy);
|
scalar(control const & proxy);
|
||||||
scalar& operator=(value_scalar const &);
|
scalar& operator=(value_scalar const &);
|
||||||
// scalar& operator=(scalar const & s);
|
// scalar& operator=(scalar const & s);
|
||||||
using array::operator =;
|
using array::operator =;
|
||||||
|
@@ -7,25 +7,25 @@
|
|||||||
|
|
||||||
namespace atidlas
|
namespace atidlas
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace cl_ext
|
namespace cl_ext
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_t;
|
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_t;
|
||||||
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
|
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
|
||||||
|
|
||||||
queues_t init_queues();
|
extern kernels_t kernels;
|
||||||
|
extern queues_t queues;
|
||||||
|
extern unsigned int default_context_idx;
|
||||||
|
extern cl_command_queue_properties queue_properties;
|
||||||
|
|
||||||
|
|
||||||
void synchronize(cl::Context const & context);
|
void synchronize(cl::Context const & context);
|
||||||
cl::Context default_context();
|
cl::Context default_context();
|
||||||
cl::CommandQueue & get_queue(cl::Context const &, std::size_t);
|
cl::CommandQueue & get_queue(cl::Context const &, std::size_t);
|
||||||
cl::Device get_device(cl::CommandQueue &);
|
cl::Device get_device(cl::CommandQueue &);
|
||||||
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx);
|
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx);
|
||||||
extern unsigned int default_context_idx;
|
|
||||||
extern kernels_t kernels;
|
|
||||||
extern queues_t queues;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -208,6 +208,27 @@ private:
|
|||||||
size4 shape_;
|
size4 shape_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class control
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
control(array_expression const & x, cl::Event* event = NULL, std::vector<cl::Event>* dependencies = NULL,
|
||||||
|
cl::CommandQueue* queue = NULL, operation_cache* cache = NULL) : x_(x), event_(event), dependencies_(dependencies), queue_(queue), cache_(cache){}
|
||||||
|
|
||||||
|
array_expression const & expression() const { return x_; }
|
||||||
|
cl::Event* event() const { return event_; }
|
||||||
|
std::vector<cl::Event>* dependencies() const { return dependencies_; }
|
||||||
|
cl::CommandQueue* queue() const { return queue_; }
|
||||||
|
operation_cache* cache() const { return cache_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
array_expression const & x_;
|
||||||
|
|
||||||
|
cl::Event* event_;
|
||||||
|
std::vector<cl::Event>* dependencies_;
|
||||||
|
cl::CommandQueue* queue_;
|
||||||
|
operation_cache* cache_;
|
||||||
|
};
|
||||||
|
|
||||||
class expressions_tuple
|
class expressions_tuple
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
@@ -96,9 +96,9 @@ context_(context), data_(data),
|
|||||||
infos_(init_infos(dtype, data_(), s1.size, s2.size, s1.start, s2.start, s1.stride, s2.stride, ld))
|
infos_(init_infos(dtype, data_(), s1.size, s2.size, s1.start, s2.start, s1.stride, s2.stride, ld))
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
array::array(array_expression const & x):
|
array::array(control const & x):
|
||||||
context_(x.context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.dtype())*prod(x.shape())),
|
context_(x.expression().context()), data_(context_, CL_MEM_READ_WRITE, size_of(x.expression().dtype())*prod(x.expression().shape())),
|
||||||
infos_(init_infos(x.dtype(), data_(), x.shape()._1, x.shape()._2, 0, 0, 1, 1, x.shape()._1))
|
infos_(init_infos(x.expression().dtype(), data_(), x.expression().shape()._1, x.expression().shape()._2, 0, 0, 1, 1, x.expression().shape()._1))
|
||||||
{
|
{
|
||||||
*this = x;
|
*this = x;
|
||||||
}
|
}
|
||||||
@@ -151,8 +151,10 @@ array & array::operator=(array const & rhs)
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
array & array::operator=(array_expression const & rhs)
|
array & array::operator=(control const & x)
|
||||||
{
|
{
|
||||||
|
array_expression const & rhs = x.expression();
|
||||||
|
|
||||||
assert(dtype() == rhs.dtype());
|
assert(dtype() == rhs.dtype());
|
||||||
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype(), shape());
|
array_expression expression(*this, rhs, op_element(OPERATOR_BINARY_TYPE_FAMILY, OPERATOR_ASSIGN_TYPE), dtype(), shape());
|
||||||
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
cl::CommandQueue & queue = cl_ext::get_queue(context_, 0);
|
||||||
@@ -293,7 +295,7 @@ scalar::scalar(value_scalar value, cl::Context context) : array(1, value.dtype()
|
|||||||
scalar::scalar(numeric_type dtype, cl::Context context) : array(1, dtype, context)
|
scalar::scalar(numeric_type dtype, cl::Context context) : array(1, dtype, context)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
scalar::scalar(array_expression const & proxy) : array(proxy){ }
|
scalar::scalar(control const &proxy) : array(proxy){ }
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
T scalar::cast() const
|
T scalar::cast() const
|
||||||
@@ -710,6 +712,13 @@ array reshape(array const & a, int_t size1, int_t size2)
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
array reshape(array_expression const & a, int_t size1, int_t size2)
|
||||||
|
{
|
||||||
|
array tmp(a);
|
||||||
|
tmp.infos_.shape1 = size1;
|
||||||
|
tmp.infos_.shape2 = size2;
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
#define DEFINE_DOT(LTYPE, RTYPE) \
|
#define DEFINE_DOT(LTYPE, RTYPE) \
|
||||||
array_expression dot(LTYPE const & x, RTYPE const & y)\
|
array_expression dot(LTYPE const & x, RTYPE const & y)\
|
||||||
|
@@ -8,6 +8,11 @@ namespace atidlas
|
|||||||
namespace cl_ext
|
namespace cl_ext
|
||||||
{
|
{
|
||||||
|
|
||||||
|
cl_command_queue_properties queue_properties = 0;
|
||||||
|
unsigned int default_context_idx = 0;
|
||||||
|
queues_t queues;
|
||||||
|
kernels_t kernels;
|
||||||
|
|
||||||
void synchronize(cl::Context const & context)
|
void synchronize(cl::Context const & context)
|
||||||
{
|
{
|
||||||
std::vector<cl::CommandQueue> & q = get_queues(context);
|
std::vector<cl::CommandQueue> & q = get_queues(context);
|
||||||
@@ -15,52 +20,43 @@ void synchronize(cl::Context const & context)
|
|||||||
it->finish();
|
it->finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
queues_t init_queues()
|
void init_queues()
|
||||||
{
|
{
|
||||||
queues_t result;
|
std::vector<cl::Platform> platforms;
|
||||||
|
cl::Platform::get(&platforms);
|
||||||
std::vector<cl::Platform> platforms;
|
for(std::vector<cl::Platform>::iterator it = platforms.begin() ; it != platforms.end() ; ++it)
|
||||||
cl::Platform::get(&platforms);
|
|
||||||
|
|
||||||
for(std::vector<cl::Platform>::iterator it = platforms.begin() ; it != platforms.end() ; ++it)
|
|
||||||
{
|
|
||||||
std::vector<cl::Device> devices;
|
|
||||||
it->getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
|
||||||
for(std::vector<cl::Device>::iterator itt = devices.begin() ; itt != devices.end() ; ++itt)
|
|
||||||
{
|
{
|
||||||
std::vector<cl::Device> current(1, *itt);
|
std::vector<cl::Device> devices;
|
||||||
cl::Context context(current);
|
it->getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||||
cl::CommandQueue queue(context, *itt);
|
for(std::vector<cl::Device>::iterator itt = devices.begin() ; itt != devices.end() ; ++itt)
|
||||||
result.push_back(std::make_pair(context, std::vector<cl::CommandQueue>(1, queue)));
|
queues.push_back(std::make_pair(cl::Context(std::vector<cl::Device>(1, *itt)), std::vector<cl::CommandQueue>()));
|
||||||
}
|
}
|
||||||
}
|
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
||||||
|
it->second.push_back(cl::CommandQueue(it->first, it->first.getInfo<CL_CONTEXT_DEVICES>()[0], queue_properties));
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cl::Context default_context()
|
cl::Context default_context()
|
||||||
{
|
{
|
||||||
return queues[default_context_idx].second.front().getInfo<CL_QUEUE_CONTEXT>();
|
if(queues.empty())
|
||||||
|
init_queues();
|
||||||
|
return queues.begin()->first;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx)
|
std::vector<cl::CommandQueue> & get_queues(cl::Context const & ctx)
|
||||||
{
|
{
|
||||||
|
if(queues.empty())
|
||||||
|
init_queues();
|
||||||
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
for(queues_t::iterator it = queues.begin() ; it != queues.end() ; ++it)
|
||||||
if(it->first()==ctx())
|
if(it->first()==ctx()) return it->second;
|
||||||
return it->second;
|
throw std::out_of_range("No such context registered in the backend. Please run atidlas::cl_ext:;register(context, queues)");
|
||||||
queues.push_back(std::make_pair(ctx, std::vector<cl::CommandQueue>(1, cl::CommandQueue(ctx, ctx.getInfo<CL_CONTEXT_DEVICES>()[0]))));
|
|
||||||
return queues.back().second;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
|
cl::CommandQueue & get_queue(cl::Context const & ctx, std::size_t idx)
|
||||||
{ return get_queues(ctx)[idx]; }
|
{
|
||||||
|
return get_queues(ctx)[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned int default_context_idx = 0;
|
|
||||||
|
|
||||||
queues_t queues = init_queues();
|
|
||||||
kernels_t kernels = kernels_t();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user