Driver: now loading the backend dynamically on Linux

This commit is contained in:
Philippe Tillet
2015-08-25 12:41:21 -04:00
parent 868df9dad2
commit 67a35a62bd
44 changed files with 11808 additions and 13703 deletions

View File

@@ -8,14 +8,6 @@ add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} )
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/lib/external/)
#CUDA and OpenCL
find_package(CUDA QUIET)
find_package(OpenCL QUIET REQUIRED)
if(CUDA_FOUND)
set(BACKEND_DEFINES "-DISAAC_WITH_CUDA")
include_directories(${CUDA_INCLUDE_DIRS})
endif()
#Compiler flags
add_definitions(${BACKEND_DEFINES})
if(WIN32)

View File

@@ -51,5 +51,5 @@ foreach(PROG blas)
add_executable(${PROG}-bench ${PROG}.cpp)
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "${BLAS_DEF_STR}")
endif()
target_link_libraries(${PROG}-bench ${BLAS_LIBS} isaac ${OPENCL_LIBRARIES} )
target_link_libraries(${PROG}-bench ${BLAS_LIBS} isaac)
endforeach(PROG)

File diff suppressed because it is too large Load Diff

View File

@@ -1,126 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
// Error Codes
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
// cl_d3d10_device_source_nv
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
// cl_d3d10_device_set_nv
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
// cl_context_info
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
// cl_mem_info
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
// cl_image_info
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
// cl_command_type
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif // __OPENCL_CL_D3D10_H

View File

@@ -1,162 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#define CL_GL_NUM_SAMPLES 0x2012
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
// Deprecated OpenCL 1.1 APIs
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

View File

@@ -1,69 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl_gl.h>
#endif
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

File diff suppressed because it is too large Load Diff

View File

@@ -1,54 +0,0 @@
/*******************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#include <OpenCL/cl_ext.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

View File

@@ -8,7 +8,7 @@
#include "isaac/common/expression_type.h"
#include "isaac/common/numeric_type.h"
#include "isaac/driver/common.h"
#include "isaac/driver/dispatch.h"
#include "isaac/defines.h"
#include "isaac/types.h"

View File

@@ -2,13 +2,7 @@
#define ISAAC_DRIVER_COMMON_H
#include <exception>
//OpenCL Backend
#include "isaac/driver/external/CL/cl.h"
#include "isaac/driver/external/CL/cl_ext.h"
//CUDA Backend
#include "isaac/driver/external/CUDA/cuda.h"
#include "isaac/driver/external/CUDA/nvrtc.h"
#include "isaac/driver/dispatch.h"
#include "isaac/defines.h"
DISABLE_MSVC_WARNING_C4275
@@ -20,10 +14,8 @@ namespace driver
enum backend_type
{
OPENCL
#ifdef ISAAC_WITH_CUDA
,CUDA
#endif
OPENCL,
CUDA
};

View File

@@ -26,6 +26,7 @@ public:
CPU = CL_DEVICE_TYPE_CPU,
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR
};
enum class Vendor
{
AMD,
@@ -42,14 +43,11 @@ public:
};
private:
#ifdef ISAAC_WITH_CUDA
template<CUdevice_attribute attr>
int cuGetInfo() const;
#endif
public:
#ifdef ISAAC_WITH_CUDA
Device(int ordinal);
#endif
Device(cl_device_id const & device, bool take_ownership = true);
bool operator==(Device const &) const;

View File

@@ -0,0 +1,203 @@
#ifndef ISAAC_DRIVER_DISPATCHER_H
#define ISAAC_DRIVER_DISPATCHER_H
#include <type_traits>
#include <dlfcn.h>
//OpenCL Backend
#include "isaac/driver/external/CL/cl.h"
#include "isaac/driver/external/CL/cl_ext.h"
//CUDA Backend
#include "isaac/driver/external/CUDA/cuda.h"
#include "isaac/driver/external/CUDA/nvrtc.h"
#include <iostream>
namespace isaac
{
namespace driver
{
class dispatch
{
private:
template <class F>
struct return_type;
template <class R, class... A>
struct return_type<R (*)(A...)>
{
typedef R type;
};
typedef bool (*f_init_t)();
template<f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
{
initializer();
if(cache == nullptr)
cache = dlsym(lib_h, name);
return (*(FunPtrT)cache)(args...);
}
public:
static bool clinit();
static bool cuinit();
static bool nvrtcinit();
static void release();
//OpenCL
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
static cl_int clReleaseMemObject(cl_mem);
static cl_int clFinish(cl_command_queue);
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
static cl_int clReleaseContext(cl_context);
static cl_int clReleaseEvent(cl_event);
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
static cl_int clReleaseDevice(cl_device_id);
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
static cl_int clReleaseCommandQueue(cl_command_queue);
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
static cl_int clRetainEvent(cl_event);
static cl_int clReleaseProgram(cl_program);
static cl_int clFlush(cl_command_queue);
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
static cl_int clReleaseKernel(cl_kernel);
//CUDA
static CUresult cuCtxDestroy_v2(CUcontext ctx);
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
static CUresult cuMemFree_v2(CUdeviceptr dptr);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
static CUresult cuDriverGetVersion(int *driverVersion);
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
static CUresult cuModuleUnload(CUmodule hmod);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
static CUresult cuDeviceGetCount(int *count);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
static CUresult cuInit(unsigned int Flags);
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
static CUresult cuStreamSynchronize(CUstream hStream);
static CUresult cuStreamDestroy_v2(CUstream hStream);
static CUresult cuEventDestroy_v2(CUevent hEvent);
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
private:
static void* opencl_;
static void* cuda_;
static void* nvrtc_;
//OpenCL
static void* clBuildProgram_;
static void* clEnqueueNDRangeKernel_;
static void* clSetKernelArg_;
static void* clReleaseMemObject_;
static void* clFinish_;
static void* clGetMemObjectInfo_;
static void* clGetCommandQueueInfo_;
static void* clReleaseContext_;
static void* clReleaseEvent_;
static void* clEnqueueWriteBuffer_;
static void* clEnqueueReadBuffer_;
static void* clGetProgramBuildInfo_;
static void* clReleaseDevice_;
static void* clCreateContext_;
static void* clGetDeviceIDs_;
static void* clGetContextInfo_;
static void* clGetDeviceInfo_;
static void* clReleaseCommandQueue_;
static void* clGetPlatformIDs_;
static void* clGetPlatformInfo_;
static void* clGetEventProfilingInfo_;
static void* clCreateProgramWithBinary_;
static void* clCreateCommandQueue_;
static void* clRetainEvent_;
static void* clReleaseProgram_;
static void* clFlush_;
static void* clGetProgramInfo_;
static void* clGetKernelInfo_;
static void* clGetKernelWorkGroupInfo_;
static void* clCreateKernel_;
static void* clCreateBuffer_;
static void* clCreateProgramWithSource_;
static void* clReleaseKernel_;
//CUDA
static void* cuCtxDestroy_v2_;
static void* cuEventCreate_;
static void* cuDeviceGet_;
static void* cuMemcpyDtoH_v2_;
static void* cuStreamCreate_;
static void* cuEventElapsedTime_;
static void* cuMemFree_v2_;
static void* cuMemcpyDtoHAsync_v2_;
static void* cuDriverGetVersion_;
static void* cuDeviceGetName_;
static void* cuMemcpyHtoDAsync_v2_;
static void* cuModuleLoad_;
static void* cuLaunchKernel_;
static void* cuModuleUnload_;
static void* cuModuleLoadDataEx_;
static void* cuDeviceGetAttribute_;
static void* cuDeviceGetCount_;
static void* cuMemcpyHtoD_v2_;
static void* cuInit_;
static void* cuEventRecord_;
static void* cuCtxCreate_v2_;
static void* cuModuleGetFunction_;
static void* cuStreamSynchronize_;
static void* cuStreamDestroy_v2_;
static void* cuEventDestroy_v2_;
static void* cuMemAlloc_v2_;
static void* nvrtcCompileProgram_;
static void* nvrtcGetProgramLogSize_;
static void* nvrtcGetPTX_;
static void* nvrtcGetPTXSize_;
static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_;
};
}
}
#endif

10796
include/isaac/driver/external/CUDA/cuda.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,392 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#ifndef __NVRTC_H__
#define __NVRTC_H__
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#include <stdlib.h>
/*****************************//**
*
* \defgroup error Error Handling
*
********************************/
/**
* \ingroup error
* \brief CUDA Online Compiler API call result code.
*/
typedef enum {
NVRTC_SUCCESS = 0,
NVRTC_ERROR_OUT_OF_MEMORY = 1,
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
NVRTC_ERROR_INVALID_INPUT = 3,
NVRTC_ERROR_INVALID_PROGRAM = 4,
NVRTC_ERROR_INVALID_OPTION = 5,
NVRTC_ERROR_COMPILATION = 6,
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7
} nvrtcResult;
/**
* \ingroup error
* \brief ::nvrtcGetErrorString is a helper function that stringifies the
* given #nvrtcResult code, e.g., \link #nvrtcResult NVRTC_SUCCESS
* \endlink to \c "NVRTC_SUCCESS". For unrecognized enumeration
* values, it returns \c "NVRTC_ERROR unknown".
*
* \param [in] result CUDA Online Compiler API result code.
* \return Message string for the given #nvrtcResult code.
*/
const char *nvrtcGetErrorString(nvrtcResult result);
/****************************************//**
*
* \defgroup query General Information Query
*
*******************************************/
/**
* \ingroup query
* \brief ::nvrtcVersion sets the output parameters \p major and \p minor
* with the CUDA Online Compiler version number.
*
* \param [out] major CUDA Online Compiler major version number.
* \param [out] minor CUDA Online Compiler minor version number.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
*
*/
nvrtcResult nvrtcVersion(int *major, int *minor);
/********************************//**
*
* \defgroup compilation Compilation
*
***********************************/
/**
* \ingroup compilation
* \brief ::nvrtcProgram is the unit of compilation, and an opaque handle for
* a program.
*
* To compile a CUDA program string, an instance of nvrtcProgram must be
* created first with ::nvrtcCreateProgram, then compiled with
* ::nvrtcCompileProgram.
*/
typedef struct _nvrtcProgram *nvrtcProgram;
/**
* \ingroup compilation
* \brief ::nvrtcCreateProgram creates an instance of ::nvrtcProgram with the
* given input parameters, and sets the output parameter \p prog with
* it.
*
* \param [out] prog CUDA Online Compiler program.
* \param [in] src CUDA program source.
* \param [in] name CUDA program name.\n
* \p name can be \c NULL; \c "default_program" is
* used when \p name is \c NULL.
* \param [in] numHeaders Number of headers used.\n
* \p numHeaders must be greater than or equal to 0.
* \param [in] headers Sources of the headers.\n
* \p headers can be \c NULL when \p numHeaders is
* 0.
* \param [in] includeNames Name of each header by which they can be
* included in the CUDA program source.\n
* \p includeNames can be \c NULL when \p numHeaders
* is 0.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
* - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcDestroyProgram
*/
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
const char *src,
const char *name,
int numHeaders,
const char **headers,
const char **includeNames);
/**
* \ingroup compilation
* \brief ::nvrtcDestroyProgram destroys the given program.
*
* \param [in] prog CUDA Online Compiler program.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcCreateProgram
*/
nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
/**
* \ingroup compilation
* \brief ::nvrtcCompileProgram compiles the given program.
*
* The valid compiler options are:
*
* - Compilation targets
* - --gpu-architecture=<em>\<GPU architecture name\></em> (-arch)\n
* Specify the name of the class of GPU architectures for which the
* input must be compiled.\n
* - Valid <em>GPU architecture name</em>s:
* - compute_20
* - compute_30
* - compute_35
* - compute_50
* - Default: compute_20
* - Separate compilation / whole-program compilation
* - --device-c (-dc)\n
* Generate relocatable code that can be linked with other relocatable
* device code. It is equivalent to --relocatable-device-code=true.
* - --device-w (-dw)\n
* Generate non-relocatable code. It is equivalent to
* --relocatable-device-code=false.
* - --relocatable-device-code=<em>[true, false]</em> (-rdc)\n
* Enable (disable) the generation of relocatable device code.
* - Default: false
* - Debugging support
* - --device-debug (-G)\n
* Generate debug information.
* - --generate-line-info (-lineinfo)\n
* Generate line-number information.
* - Code generation
* - --maxrregcount=<em>\<N\></em> (-maxrregcount)\n
* Specify the maximum amount of registers that GPU functions can use.
* Until a function-specific limit, a higher value will generally
* increase the performance of individual GPU threads that execute this
* function. However, because thread registers are allocated from a
* global register pool on each GPU, a higher value of this option will
* also reduce the maximum thread block size, thereby reducing the amount
* of thread parallelism. Hence, a good maxrregcount value is the result
* of a trade-off. If this option is not specified, then no maximum is
* assumed. Value less than the minimum registers required by ABI will
* be bumped up by the compiler to ABI minimum limit.
* - --ftz=<em>[true, false]</em> (-ftz)\n
* When performing single-precision floating-point operations, flush
* denormal values to zero or preserve denormal values. --use_fast_math
* implies --ftz=true.
* - Default: false
* - --prec-sqrt=<em>[true, false]</em> (-prec-sqrt)\n
* For single-precision floating-point square root, use IEEE
* round-to-nearest mode or use a faster approximation. --use_fast_math
* implies --prec-sqrt=false.
* - Default: true
* - --prec-div=<em>[true, false]</em> (-prec-div)\n
* For single-precision floating-point division and reciprocals, use IEEE
* round-to-nearest mode or use a faster approximation. --use_fast_math
* implies --prec-div=false.
* - Default: true
* - --fmad=<em>[true, false]</em> (-fmad)\n
* Enables (disables) the contraction of floating-point multiplies and
* adds/subtracts into floating-point multiply-add operations (FMAD,
* FFMA, or DFMA). --use_fast_math implies --fmad=true.
* - Default: true
* - --use_fast_math (-use_fast_math)\n
* Make use of fast math operations. --use_fast_math implies --ftz=true
* --prec-div=false --prec-sqrt=false --fmad=true.
* - Preprocessing
* - --define-macro=<em>\<macrodef\></em> (-D)\n
* <em>macrodef</em> can be either <em>name</em> or
* <em>name=definitions</em>.
* - <em>name</em>\n
* Predefine <em>name</em> as a macro with definition 1.
* - <em>name=definition</em>\n
* The contents of <em>definition</em> are tokenized and preprocessed
* as if they appeared during translation phase three in a \c \#define
* directive. In particular, the definition will be truncated by
* embedded new line characters.
* - --undefine-macro=<em>\<name\></em> (-U)\n
* Cancel any previous definition of \em name.
* - --include-path=<em>\<dir\></em> (-I)\n
* Add the directory <em>dir</em> to the list of directories to be
* searched for headers. These paths are searched after the list of
* headers given to ::nvrtcCreateProgram.
* - --pre-include=<em>\<header\></em> (-include)\n
* Preinclude <em>header</em> during preprocessing.
* - Language Dialect
* - --std=c++11 (-std=c++11)\n
* Set language dialect to C++11.
* - --builtin-move-forward=<em>[true, false]</em> (-builtin-move-forward)\n
* Provide builtin definitions of std::move and std::forward, when C++11
* language dialect is selected.
* - Default : true
* - --builtin-initializer-list=<em>[true, false]</em> (-builtin-initializer-list)\n
* Provide builtin definitions of std::initializer_list class and member
* functions when C++11 language dialect is selected.
* - Default : true
* - Misc
* - --disable-warnings (-w)\n
* Inhibit all warning messages.
* - --restrict (-restrict)\n
* Programmer assertion that all kernel pointer parameters are restrict
* pointers.
* - --device-as-default-execution-space
* (-default-device)\n
* Treat entities with no execution space annotation as \c __device__
* entities.
*
* \param [in] prog CUDA Online Compiler program.
* \param [in] numOptions Number of compiler options passed.
* \param [in] options Compiler options in the form of C string array.\n
* \p options can be \c NULL when \p numOptions is 0.
*
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink
* - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink
* - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink
*/
nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
int numOptions, const char **options);
/**
* \ingroup compilation
* \brief ::nvrtcGetPTXSize sets \p ptxSizeRet with the size of the PTX
* generated by the previous compilation of \p prog (including the
* trailing \c NULL).
*
* \param [in] prog CUDA Online Compiler program.
* \param [out] ptxSizeRet Size of the generated PTX (including the trailing
* \c NULL).
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetPTX
*/
nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
/**
* \ingroup compilation
* \brief ::nvrtcGetPTX stores the PTX generated by the previous compilation
* of \p prog in the memory pointed by \p ptx.
*
* \param [in] prog CUDA Online Compiler program.
* \param [out] ptx Compiled result.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetPTXSize
*/
nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
/**
* \ingroup compilation
* \brief ::nvrtcGetProgramLogSize sets \p logSizeRet with the size of the
* log generated by the previous compilation of \p prog (including the
* trailing \c NULL).
*
* Note that compilation log may be generated with warnings and informative
* messages, even when the compilation of \p prog succeeds.
*
* \param [in] prog CUDA Online Compiler program.
* \param [out] logSizeRet Size of the compilation log
* (including the trailing \c NULL).
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetProgramLog
*/
nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
/**
* \ingroup compilation
* \brief ::nvrtcGetProgramLog stores the log generated by the previous
* compilation of \p prog in the memory pointed by \p log.
*
* \param [in] prog CUDA Online Compiler program.
* \param [out] log Compilation log.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetProgramLogSize
*/
nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* __NVRTC_H__ */

View File

@@ -12,26 +12,18 @@ namespace isaac
namespace driver
{
#ifdef ISAAC_WITH_CUDA
struct cu_event_t{
operator bool() const { return first && second; }
CUevent first;
CUevent second;
};
#endif
#ifdef ISAAC_WITH_CUDA
#define HANDLE_TYPE(CLTYPE, CUTYPE) Handle<CLTYPE, CUTYPE>
#else
#define HANDLE_TYPE(CLTYPE, CUTYPE) Handle<CLTYPE, void>
#endif
template<class CLType, class CUType>
class ISAACAPI Handle
{
private:
#ifdef ISAAC_WITH_CUDA
static void _delete(CUcontext x);
static void _delete(CUdeviceptr x);
static void _delete(CUstream x);
@@ -40,7 +32,6 @@ private:
static void _delete(CUfunction);
static void _delete(CUmodule x);
static void _delete(cu_event_t x);
#endif
static void release(cl_context x);
static void release(cl_mem x);
@@ -56,10 +47,8 @@ public:
bool operator<(Handle const & other) const;
CLType & cl();
CLType const & cl() const;
#ifdef ISAAC_WITH_CUDA
CUType & cu();
CUType const & cu() const;
#endif
~Handle();
private:

View File

@@ -30,10 +30,8 @@ public:
private:
backend_type backend_;
unsigned int address_bits_;
#ifdef ISAAC_WITH_CUDA
std::vector<std::shared_ptr<void> > cu_params_store_;
std::vector<void*> cu_params_;
#endif
HANDLE_TYPE(cl_kernel, CUfunction) h_;
};

View File

@@ -19,9 +19,7 @@ class ISAACAPI Platform
{
private:
public:
#ifdef ISAAC_WITH_CUDA
Platform(backend_type);
#endif
Platform(cl_platform_id const &);
std::string name() const;
std::string version() const;

View File

@@ -21,10 +21,8 @@ static inline std::string size_type(driver::Device const & device)
{
switch(device.backend())
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA:
return "int";
#endif
case driver::OPENCL:
return "int";
default:

View File

@@ -18,19 +18,13 @@ else()
endif()
add_dependencies(isaac database)
#Linkage
if(OPENCL_FOUND)
target_link_libraries(isaac ${OPENCL_LIBRARIES})
endif()
if(CUDA_FOUND)
cuda_find_library_local_first(CUDA_NVRTC_LIBRARY nvrtc "\"nvrtc\" library")
target_link_libraries(isaac ${CUDA_CUDA_LIBRARY} ${CUDA_NVRTC_LIBRARY})
#Cuda JIT headers to file
set(CUDA_HELPERS_PATH ${CMAKE_CURRENT_SOURCE_DIR}/driver/helpers/cuda/)
file(GLOB_RECURSE CUDA_HELPERS_SRC ${CUDA_HELPERS_PATH}/*.cu)
CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "hpp"
OUTPUT_DIR ${CUDA_HELPERS_PATH} NAMESPACE "isaac helpers cuda" TARGET cuda_headers EOF "0")
endif()
target_link_libraries(isaac "dl")
#Cuda JIT headers to file
set(CUDA_HELPERS_PATH ${CMAKE_CURRENT_SOURCE_DIR}/driver/helpers/cuda/)
file(GLOB_RECURSE CUDA_HELPERS_SRC ${CUDA_HELPERS_PATH}/*.cu)
CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "hpp"
OUTPUT_DIR ${CUDA_HELPERS_PATH} NAMESPACE "isaac helpers cuda" TARGET cuda_headers EOF "0")
#Installation
install(TARGETS isaac LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)

View File

@@ -127,15 +127,28 @@ std::list<Context const *> backend::contexts::cache_;
void backend::platforms(std::vector<Platform> & platforms)
{
#ifdef ISAAC_WITH_CUDA
platforms.push_back(Platform(CUDA));
#endif
cl_uint nplatforms;
ocl::check(clGetPlatformIDs(0, NULL, &nplatforms));
std::vector<cl_platform_id> clplatforms(nplatforms);
ocl::check(clGetPlatformIDs(nplatforms, clplatforms.data(), NULL));
for(cl_platform_id p: clplatforms)
platforms.push_back(Platform(p));
//if cuda is here
if(dispatch::cuinit())
{
if(dispatch::nvrtcinit())
platforms.push_back(Platform(CUDA));
else
throw std::runtime_error("ISAAC: Unable to find NVRTC. Make sure you are using CUDA >= 7.0");
}
//if OpenCL is here
if(dispatch::clinit())
{
cl_uint nplatforms;
ocl::check(dispatch::dispatch::clGetPlatformIDs(0, NULL, &nplatforms));
std::vector<cl_platform_id> clplatforms(nplatforms);
ocl::check(dispatch::dispatch::clGetPlatformIDs(nplatforms, clplatforms.data(), NULL));
for(cl_platform_id p: clplatforms)
platforms.push_back(Platform(p));
}
if(platforms.empty())
throw std::runtime_error("ISAAC: No backend available. Make sure OpenCL and/or CUDA are available in your library path");
}
void backend::synchronize(Context const & context)

View File

@@ -18,14 +18,12 @@ Buffer::Buffer(Context const & context, size_t size) : backend_(context.backend_
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuMemAlloc(&h_.cu(), size));
cuda::check(dispatch::cuMemAlloc(&h_.cu(), size));
break;
#endif
case OPENCL:
cl_int err;
h_.cl() = clCreateBuffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
h_.cl() = dispatch::clCreateBuffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
ocl::check(err);
break;
default:

View File

@@ -5,8 +5,6 @@ namespace isaac
namespace driver
{
#ifdef ISAAC_WITH_CUDA
namespace nvrtc
{
@@ -101,8 +99,6 @@ void check(CUresult err)
}
}
#endif
namespace ocl
{

View File

@@ -27,15 +27,14 @@ CommandQueue::CommandQueue(Context const & context, Device const & device, cl_co
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuStreamCreate(&h_.cu(), 0));
cuda::check(dispatch::cuStreamCreate(&h_.cu(), 0));
break;
#endif
case OPENCL:
{
cl_int err;
h_.cl() = clCreateCommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
h_.cl() = dispatch::clCreateCommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
ocl::check(err);
break;
}
@@ -57,10 +56,8 @@ void CommandQueue::synchronize()
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: cuda::check(cuStreamSynchronize(h_.cu())); break;
#endif
case OPENCL: ocl::check(clFinish(h_.cl())); break;
case CUDA: cuda::check(dispatch::cuStreamSynchronize(h_.cu())); break;
case OPENCL: ocl::check(dispatch::clFinish(h_.cl())); break;
default: throw;
}
}
@@ -70,16 +67,14 @@ Event CommandQueue::enqueue(Kernel const & kernel, NDRange global, driver::NDRan
Event event(backend_);
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuEventRecord(event.h_.cu().first, h_.cu()));
cuda::check(cuLaunchKernel(kernel.h_.cu(), global[0]/local[0], global[1]/local[1], global[2]/local[2],
cuda::check(dispatch::cuEventRecord(event.h_.cu().first, h_.cu()));
cuda::check(dispatch::cuLaunchKernel(kernel.h_.cu(), global[0]/local[0], global[1]/local[1], global[2]/local[2],
local[0], local[1], local[2], 0, h_.cu(),(void**)&kernel.cu_params_[0], NULL));
cuda::check(cuEventRecord(event.h_.cu().second, h_.cu()));
cuda::check(dispatch::cuEventRecord(event.h_.cu().second, h_.cu()));
break;
#endif
case OPENCL:
ocl::check(clEnqueueNDRangeKernel(h_.cl(), kernel.h_.cl(), global.dimension(), NULL, (const size_t *)global, (const size_t *) local, 0, NULL, &event.handle().cl()));
ocl::check(dispatch::clEnqueueNDRangeKernel(h_.cl(), kernel.h_.cl(), global.dimension(), NULL, (const size_t *)global, (const size_t *) local, 0, NULL, &event.handle().cl()));
break;
default: throw;
}
@@ -90,16 +85,14 @@ void CommandQueue::write(Buffer const & buffer, bool blocking, std::size_t offse
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
if(blocking)
cuda::check(cuMemcpyHtoD(buffer.h_.cu() + offset, ptr, size));
cuda::check(dispatch::cuMemcpyHtoD(buffer.h_.cu() + offset, ptr, size));
else
cuda::check(cuMemcpyHtoDAsync(buffer.h_.cu() + offset, ptr, size, h_.cu()));
cuda::check(dispatch::cuMemcpyHtoDAsync(buffer.h_.cu() + offset, ptr, size, h_.cu()));
break;
#endif
case OPENCL:
ocl::check(clEnqueueWriteBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
ocl::check(dispatch::clEnqueueWriteBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
break;
default: throw;
}
@@ -109,16 +102,14 @@ void CommandQueue::read(Buffer const & buffer, bool blocking, std::size_t offset
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
if(blocking)
cuda::check(cuMemcpyDtoH(ptr, buffer.h_.cu() + offset, size));
cuda::check(dispatch::cuMemcpyDtoH(ptr, buffer.h_.cu() + offset, size));
else
cuda::check(cuMemcpyDtoHAsync(ptr, buffer.h_.cu() + offset, size, h_.cu()));
cuda::check(dispatch::cuMemcpyDtoHAsync(ptr, buffer.h_.cu() + offset, size, h_.cu()));
break;
#endif
case OPENCL:
ocl::check(clEnqueueReadBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
ocl::check(dispatch::clEnqueueReadBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
break;
default: throw;
}

View File

@@ -21,14 +21,12 @@ Context::Context(Device const & device) : backend_(device.backend_), device_(dev
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuCtxCreate(&h_.cu(), CU_CTX_SCHED_AUTO, device.h_.cu()));
cuda::check(dispatch::cuCtxCreate(&h_.cu(), CU_CTX_SCHED_AUTO, device.h_.cu()));
break;
#endif
case OPENCL:
cl_int err;
h_.cl() = clCreateContext(NULL, 1, &device_.h_.cl(), NULL, NULL, &err);
h_.cl() = dispatch::clCreateContext(NULL, 1, &device_.h_.cl(), NULL, NULL, &err);
ocl::check(err);
break;
default:

View File

@@ -13,23 +13,19 @@ namespace driver
{
#ifdef ISAAC_WITH_CUDA
template<CUdevice_attribute attr>
int Device::cuGetInfo() const
{
int res;
cuda::check(cuDeviceGetAttribute(&res, attr, h_.cu()));
cuda::check(dispatch::cuDeviceGetAttribute(&res, attr, h_.cu()));
return res;
}
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
{
cuda::check(cuDeviceGet(&h_.cu(), ordinal));
cuda::check(dispatch::cuDeviceGet(&h_.cu(), ordinal));
}
#endif
Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership)
{
h_.cl() = device;
@@ -85,9 +81,7 @@ unsigned int Device::address_bits() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return sizeof(long long)*8;
#endif
case CUDA: return sizeof(size_t)*8;
case OPENCL: return ocl::info<CL_DEVICE_ADDRESS_BITS>(h_.cl());
default: throw;
}
@@ -99,9 +93,7 @@ driver::Platform Device::platform() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return Platform(CUDA);
#endif
case OPENCL: return Platform(ocl::info<CL_DEVICE_PLATFORM>(h_.cl()));
default: throw;
}
@@ -111,13 +103,12 @@ std::string Device::name() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
char tmp[128];
cuda::check(cuDeviceGetName(tmp, 128, h_.cu()));
cuda::check(dispatch::cuDeviceGetName(tmp, 128, h_.cu()));
return std::string(tmp);
#endif
case OPENCL: return ocl::info<CL_DEVICE_NAME>(h_.cl());
case OPENCL:
return ocl::info<CL_DEVICE_NAME>(h_.cl());
default: throw;
}
}
@@ -126,10 +117,10 @@ std::string Device::vendor_str() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return "NVidia";
#endif
case OPENCL: return ocl::info<CL_DEVICE_VENDOR>(h_.cl());
case CUDA:
return "NVidia";
case OPENCL:
return ocl::info<CL_DEVICE_VENDOR>(h_.cl());
default: throw;
}
}
@@ -139,7 +130,6 @@ std::vector<size_t> Device::max_work_item_sizes() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
std::vector<size_t> result(3);
@@ -148,7 +138,6 @@ std::vector<size_t> Device::max_work_item_sizes() const
result[2] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z>();
return result;
}
#endif
case OPENCL:
return ocl::info<CL_DEVICE_MAX_WORK_ITEM_SIZES>(h_.cl());
default:
@@ -160,9 +149,7 @@ Device::Type Device::type() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return Type::GPU;
#endif
case OPENCL: return static_cast<Type>(ocl::info<CL_DEVICE_TYPE>(h_.cl()));
default: throw;
}
@@ -172,10 +159,8 @@ std::string Device::extensions() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
return "";
#endif
case OPENCL:
return ocl::info<CL_DEVICE_EXTENSIONS>(h_.cl());
default: throw;
@@ -188,10 +173,8 @@ std::pair<unsigned int, unsigned int> Device::nv_compute_capability() const
{
case OPENCL:
return std::pair<unsigned int, unsigned int>(ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(h_.cl()), ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV>(h_.cl()));
#ifdef ISAAC_WITH_CUDA
case CUDA:
return std::pair<unsigned int, unsigned int>(cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>(), cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>());
#endif
default:
throw;
}
@@ -203,12 +186,8 @@ bool Device::fp64_support() const
{
case OPENCL:
return extensions().find("cl_khr_fp64")!=std::string::npos;
#ifdef ISAAC_WITH_CUDA
case CUDA:
return true;
#endif
default:
throw;
}
@@ -230,19 +209,12 @@ std::string Device::infos() const
}
// Properties
#ifdef ISAAC_WITH_CUDA
#define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>();
#else
#define CUDACASE(CUNAME)
#endif\
#define WRAP_ATTRIBUTE(ret, fname, CUNAME, CLNAME) \
ret Device::fname() const\
{\
switch(backend_)\
{\
CUDACASE(CUNAME)\
case CUDA: return cuGetInfo<CUNAME>();\
case OPENCL: return static_cast<ret>(ocl::info<CLNAME>(h_.cl()));\
default: throw;\
}\

257
lib/driver/dispatch.cpp Normal file
View File

@@ -0,0 +1,257 @@
#include "isaac/driver/dispatch.h"
namespace isaac
{
namespace driver
{
//Helpers for function definition
#define DEFINE1(init, hlib, ret, fname, t1) ret dispatch::fname(t1 a)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a); }
#define DEFINE2(init, hlib, ret, fname, t1, t2) ret dispatch::fname(t1 a, t2 b)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b); }
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) ret dispatch::fname(t1 a, t2 b, t3 c)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c); }
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d); }
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e); }
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f); }
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g); }
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h); }
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i); }
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j); }
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }
//Specialized helpers for OpenCL
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3)
#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4)
#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5)
#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6)
#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
//Specialized helpers for CUDA
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
bool dispatch::clinit()
{
if(opencl_==nullptr)
opencl_ = dlopen("libOpenCL.so", RTLD_LAZY);
return opencl_ != nullptr;
}
bool dispatch::cuinit()
{
if(cuda_==nullptr)
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
return cuda_ != nullptr;
}
bool dispatch::nvrtcinit()
{
if(nvrtc_==nullptr)
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
return nvrtc_ != nullptr;
}
//OpenCL
cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f)
{ return f_impl<dispatch::clinit>(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); }
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }
OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
OCL_DEFINE1(cl_int, clFinish, cl_command_queue)
OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseContext, cl_context)
OCL_DEFINE1(cl_int, clReleaseEvent, cl_event)
OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id)
OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *)
OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *)
OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue)
OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *)
OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *)
OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *)
OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *)
OCL_DEFINE1(cl_int, clRetainEvent, cl_event)
OCL_DEFINE1(cl_int, clReleaseProgram, cl_program)
OCL_DEFINE1(cl_int, clFlush, cl_command_queue)
OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *)
OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *)
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
//CUDA
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
CUDA_DEFINE2(CUresult, cuEventCreate, CUevent *, unsigned int)
CUDA_DEFINE2(CUresult, cuDeviceGet, CUdevice *, int)
CUDA_DEFINE3(CUresult, dispatch::cuMemcpyDtoH_v2, void *, CUdeviceptr, size_t)
CUDA_DEFINE2(CUresult, cuStreamCreate, CUstream *, unsigned int)
CUDA_DEFINE3(CUresult, cuEventElapsedTime, float *, CUevent, CUevent)
CUDA_DEFINE1(CUresult, dispatch::cuMemFree_v2, CUdeviceptr)
CUDA_DEFINE4(CUresult, dispatch::cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t, CUstream)
CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
CUDA_DEFINE3(CUresult, cuDeviceGetName, char *, int, CUdevice)
CUDA_DEFINE4(CUresult, dispatch::cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t, CUstream)
CUDA_DEFINE2(CUresult, cuModuleLoad, CUmodule *, const char *)
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **)
CUDA_DEFINE1(CUresult, cuModuleUnload, CUmodule)
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *, unsigned int, CUjit_option *, void **)
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute, CUdevice)
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int *)
CUDA_DEFINE3(CUresult, dispatch::cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t )
CUDA_DEFINE1(CUresult, cuInit, unsigned int)
CUDA_DEFINE2(CUresult, cuEventRecord, CUevent, CUstream)
CUDA_DEFINE3(CUresult, cuCtxCreate_v2, CUcontext *, unsigned int, CUdevice)
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule, const char *)
CUDA_DEFINE1(CUresult, cuStreamSynchronize, CUstream)
CUDA_DEFINE1(CUresult, cuStreamDestroy_v2, CUstream)
CUDA_DEFINE1(CUresult, cuEventDestroy_v2, CUevent)
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr*, size_t)
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
void dispatch::release()
{
if(opencl_)
dlclose(opencl_);
}
void * dispatch::opencl_;
void * dispatch::cuda_;
void * dispatch::nvrtc_;
//OpenCL
void* dispatch::clBuildProgram_;
void* dispatch::clEnqueueNDRangeKernel_;
void* dispatch::clSetKernelArg_;
void* dispatch::clReleaseMemObject_;
void* dispatch::clFinish_;
void* dispatch::clGetMemObjectInfo_;
void* dispatch::clGetCommandQueueInfo_;
void* dispatch::clReleaseContext_;
void* dispatch::clReleaseEvent_;
void* dispatch::clEnqueueWriteBuffer_;
void* dispatch::clEnqueueReadBuffer_;
void* dispatch::clGetProgramBuildInfo_;
void* dispatch::clReleaseDevice_;
void* dispatch::clCreateContext_;
void* dispatch::clGetDeviceIDs_;
void* dispatch::clGetContextInfo_;
void* dispatch::clGetDeviceInfo_;
void* dispatch::clReleaseCommandQueue_;
void* dispatch::clGetPlatformIDs_;
void* dispatch::clGetPlatformInfo_;
void* dispatch::clGetEventProfilingInfo_;
void* dispatch::clCreateProgramWithBinary_;
void* dispatch::clCreateCommandQueue_;
void* dispatch::clRetainEvent_;
void* dispatch::clReleaseProgram_;
void* dispatch::clFlush_;
void* dispatch::clGetProgramInfo_;
void* dispatch::clGetKernelInfo_;
void* dispatch::clGetKernelWorkGroupInfo_;
void* dispatch::clCreateKernel_;
void* dispatch::clCreateBuffer_;
void* dispatch::clCreateProgramWithSource_;
void* dispatch::clReleaseKernel_;
//CUDA
void* dispatch::cuCtxDestroy_v2_;
void* dispatch::cuEventCreate_;
void* dispatch::cuDeviceGet_;
void* dispatch::cuMemcpyDtoH_v2_;
void* dispatch::cuStreamCreate_;
void* dispatch::cuEventElapsedTime_;
void* dispatch::cuMemFree_v2_;
void* dispatch::cuMemcpyDtoHAsync_v2_;
void* dispatch::cuDriverGetVersion_;
void* dispatch::cuDeviceGetName_;
void* dispatch::cuMemcpyHtoDAsync_v2_;
void* dispatch::cuModuleLoad_;
void* dispatch::cuLaunchKernel_;
void* dispatch::cuModuleUnload_;
void* dispatch::cuModuleLoadDataEx_;
void* dispatch::cuDeviceGetAttribute_;
void* dispatch::cuDeviceGetCount_;
void* dispatch::cuMemcpyHtoD_v2_;
void* dispatch::cuInit_;
void* dispatch::cuEventRecord_;
void* dispatch::cuCtxCreate_v2_;
void* dispatch::cuModuleGetFunction_;
void* dispatch::cuStreamSynchronize_;
void* dispatch::cuStreamDestroy_v2_;
void* dispatch::cuEventDestroy_v2_;
void* dispatch::cuMemAlloc_v2_;
void* dispatch::nvrtcCompileProgram_;
void* dispatch::nvrtcGetProgramLogSize_;
void* dispatch::nvrtcGetPTX_;
void* dispatch::nvrtcGetPTXSize_;
void* dispatch::nvrtcCreateProgram_;
void* dispatch::nvrtcGetProgramLog_;
}
}

View File

@@ -11,14 +11,14 @@ Event::Event(backend_type backend) : backend_(backend), h_(backend_, true)
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuEventCreate(&h_.cu().first, CU_EVENT_DEFAULT));
cuda::check(cuEventCreate(&h_.cu().second, CU_EVENT_DEFAULT));
cuda::check(dispatch::dispatch::cuEventCreate(&h_.cu().first, CU_EVENT_DEFAULT));
cuda::check(dispatch::dispatch::cuEventCreate(&h_.cu().second, CU_EVENT_DEFAULT));
break;
#endif
case OPENCL: break;
default: throw;
case OPENCL:
break;
default:
throw;
}
}
@@ -31,12 +31,10 @@ long Event::elapsed_time() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
float time;
cuda::check(cuEventElapsedTime(&time, h_.cu().first, h_.cu().second));
cuda::check(dispatch::cuEventElapsedTime(&time, h_.cu().first, h_.cu().second));
return 1e6*time;
#endif
case OPENCL:
return static_cast<long>(ocl::info<CL_PROFILING_COMMAND_END>(h_.cl()) - ocl::info<CL_PROFILING_COMMAND_START>(h_.cl()));
default:

View File

@@ -7,63 +7,59 @@ namespace isaac
namespace driver
{
#ifdef ISAAC_WITH_CUDA
//CUDA
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUcontext x) { cuda::check(dispatch::cuCtxDestroy(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUcontext x) { cuCtxDestroy(x); }
void Handle<CLType, CUType>::_delete(CUdeviceptr x) { cuda::check(dispatch::dispatch::cuMemFree(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUdeviceptr x) { cuMemFree(x); }
void Handle<CLType, CUType>::_delete(CUstream x) { cuda::check(dispatch::cuStreamDestroy(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUstream x) { cuStreamDestroy(x); }
void Handle<CLType, CUType>::_delete(CUdevice) { }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUdevice) { std::cout << "CUdevice" << std::endl;}
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUevent x) { cuEventDestroy(x); }
void Handle<CLType, CUType>::_delete(CUevent x) { cuda::check(dispatch::dispatch::cuEventDestroy(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUfunction) { }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(CUmodule x) { cuModuleUnload(x); }
void Handle<CLType, CUType>::_delete(CUmodule x) { cuda::check(dispatch::dispatch::cuModuleUnload(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::_delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
#endif
//OpenCL
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_context x) { ocl::check(dispatch::clReleaseContext(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_context x) { ocl::check(clReleaseContext(x)); }
void Handle<CLType, CUType>::release(cl_mem x) { ocl::check(dispatch::clReleaseMemObject(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_mem x) { ocl::check(clReleaseMemObject(x)); }
void Handle<CLType, CUType>::release(cl_command_queue x) { ocl::check(dispatch::clReleaseCommandQueue(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_command_queue x) { ocl::check(clReleaseCommandQueue(x)); }
void Handle<CLType, CUType>::release(cl_device_id x) { ocl::check(dispatch::clReleaseDevice(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_device_id x) { ocl::check(clReleaseDevice(x)); }
void Handle<CLType, CUType>::release(cl_event x) { ocl::check(dispatch::clReleaseEvent(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_event x) { ocl::check(clReleaseEvent(x)); }
void Handle<CLType, CUType>::release(cl_kernel x) { ocl::check(dispatch::clReleaseKernel(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_kernel x) { ocl::check(clReleaseKernel(x)); }
template<class CLType, class CUType>
void Handle<CLType, CUType>::release(cl_program x) { ocl::check(clReleaseProgram(x)); }
void Handle<CLType, CUType>::release(cl_program x) { ocl::check(dispatch::clReleaseProgram(x)); }
template<class CLType, class CUType>
Handle<CLType, CUType>::Handle(backend_type backend, bool take_ownership): backend_(backend), has_ownership_(take_ownership)
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: cu_.reset(new CUType());
#endif
case OPENCL: cl_.reset(new CLType());
}
}
@@ -71,10 +67,8 @@ Handle<CLType, CUType>::Handle(backend_type backend, bool take_ownership): backe
template<class CLType, class CUType>
bool Handle<CLType, CUType>::operator==(Handle const & other) const
{
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==CUDA)
return cu()==other.cu();
#endif
if(backend_==OPENCL && other.backend_==OPENCL)
return cl()==other.cl();
return false;
@@ -83,28 +77,22 @@ bool Handle<CLType, CUType>::operator==(Handle const & other) const
template<class CLType, class CUType>
bool Handle<CLType, CUType>::operator<(Handle const & other) const
{
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==CUDA)
return (*cu_)<(*other.cu_);
#endif
if(backend_==OPENCL && other.backend_==OPENCL)
return (*cl_)<(*other.cl_);
#ifdef ISAAC_WITH_CUDA
if(backend_==CUDA && other.backend_==OPENCL)
return true;
#endif
return false;
}
template<class CLType, class CUType>
Handle<CLType, CUType>::~Handle()
{
#ifdef ISAAC_WITH_CUDA
if(has_ownership_ && cu_ && cu_.unique() && *cu_){
if(backend_==CUDA && has_ownership_ && cu_ && cu_.unique() && *cu_){
_delete(*cu_);
}
#endif
if(has_ownership_ && cl_ && cl_.unique() && *cl_)
if(backend_==OPENCL && has_ownership_ && cl_ && cl_.unique() && *cl_)
release(*cl_);
}
@@ -116,7 +104,6 @@ template<class CLType, class CUType>
CLType const & Handle<CLType, CUType>::cl() const
{ return *cl_; }
#ifdef ISAAC_WITH_CUDA
template<class CLType, class CUType>
CUType & Handle<CLType, CUType>::cu()
{
@@ -136,16 +123,6 @@ template class Handle<cl_device_id, CUdevice>;
template class Handle<cl_event, cu_event_t>;
template class Handle<cl_kernel, CUfunction>;
template class Handle<cl_program, CUmodule>;
#else
template class Handle<cl_mem, void>;
template class Handle<cl_command_queue, void>;
template class Handle<cl_context, void>;
template class Handle<cl_device_id, void>;
template class Handle<cl_event, void>;
template class Handle<cl_kernel, void>;
template class Handle<cl_program, void>;
#endif
}
}

View File

@@ -44,7 +44,7 @@ struct info<cl_mem>
static void get(cl_mem handle, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
{
cl_int err = clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -56,7 +56,7 @@ struct info<cl_device_id>
static void get(cl_device_id handle, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
{
cl_int err = clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -67,12 +67,12 @@ struct info<cl_kernel>
typedef cl_kernel_info type;
static void get(cl_kernel handle, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
static void get(cl_kernel handle, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -83,7 +83,7 @@ struct info<cl_context>
typedef cl_context_info type;
static void get(cl_context handle, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -94,12 +94,12 @@ struct info<cl_program>
typedef cl_program_info type;
static void get(cl_program handle, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
static void get(cl_program handle, cl_device_id device, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -110,7 +110,7 @@ struct info<cl_event>
{
typedef cl_profiling_info type;
static void get(cl_event handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -120,7 +120,7 @@ struct info<cl_command_queue>
{
typedef cl_command_queue_info type;
static void get(cl_command_queue handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -130,7 +130,7 @@ struct info<cl_platform_id>
{
typedef cl_command_queue_info type;
static void get(cl_platform_id handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
cl_int err = dispatch::clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
@@ -343,7 +343,7 @@ inline typename detail::return_type<cl_program, CL_PROGRAM_BINARIES>::Result inf
std::vector<size_t> sizes = info<CL_PROGRAM_BINARY_SIZES>(handle);
for(size_t s: sizes)
res.push_back(new unsigned char[s]);
clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
dispatch::clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
return res;
}

View File

@@ -13,16 +13,14 @@ Kernel::Kernel(Program const & program, const char * name) : backend_(program.ba
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cu_params_store_.reserve(32);
cu_params_.reserve(32);
cuda::check(cuModuleGetFunction(&h_.cu(), program.h_.cu(), name));\
cu_params_store_.reserve(64);
cu_params_.reserve(64);
cuda::check(dispatch::cuModuleGetFunction(&h_.cu(), program.h_.cu(), name));\
break;
#endif
case OPENCL:
cl_int err;
h_.cl() = clCreateKernel(program.h_.cl(), name, &err);
h_.cl() = dispatch::clCreateKernel(program.h_.cl(), name, &err);
ocl::check(err);
break;
default:
@@ -34,7 +32,6 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
if(index + 1> cu_params_store_.size())
{
@@ -45,9 +42,8 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
memcpy(cu_params_store_[index].get(), ptr, size);
cu_params_[index] = cu_params_store_[index].get();
break;
#endif
case OPENCL:
ocl::check(clSetKernelArg(h_.cl(), index, size, ptr));
ocl::check(dispatch::clSetKernelArg(h_.cl(), index, size, ptr));
break;
default:
throw;
@@ -58,14 +54,12 @@ void Kernel::setArg(unsigned int index, Buffer const & data)
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
setArg(index, sizeof(CUdeviceptr), (void*)&data.h_.cu()); break;
}
#endif
case OPENCL:
ocl::check(clSetKernelArg(h_.cl(), index, sizeof(cl_mem), (void*)&data.h_.cl()));
ocl::check(dispatch::clSetKernelArg(h_.cl(), index, sizeof(cl_mem), (void*)&data.h_.cl()));
break;
default: throw;
}
@@ -75,14 +69,12 @@ void Kernel::setSizeArg(unsigned int index, size_t N)
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
int NN = static_cast<cl_int>(N);
setArg(index, sizeof(int), &NN);
break;
}
#endif
case OPENCL:
{
cl_int NN = static_cast<cl_int>(N);

View File

@@ -11,13 +11,11 @@ namespace isaac
namespace driver
{
#ifdef ISAAC_WITH_CUDA
Platform::Platform(backend_type backend): backend_(backend)
{
if(backend==CUDA)
cuInit(0);
dispatch::cuInit(0);
}
#endif
Platform::Platform(cl_platform_id const & platform) : backend_(OPENCL)
{
@@ -28,13 +26,12 @@ std::string Platform::version() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
int version;
cuDriverGetVersion(&version);
dispatch::cuDriverGetVersion(&version);
return tools::to_string(version);
#endif
case OPENCL: return ocl::info<CL_PLATFORM_VERSION>(cl_platform_);
case OPENCL:
return ocl::info<CL_PLATFORM_VERSION>(cl_platform_);
default: throw;
}
}
@@ -42,10 +39,7 @@ std::string Platform::name() const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return "CUDA";
#endif
case OPENCL: return ocl::info<CL_PLATFORM_NAME>(cl_platform_);
default: throw;
}
@@ -60,22 +54,20 @@ void Platform::devices(std::vector<Device> & devices) const
{
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
int N;
cuda::check(cuDeviceGetCount(&N));
cuda::check(dispatch::cuDeviceGetCount(&N));
for(int i = 0 ; i < N ; ++i)
devices.push_back(Device(i));
break;
}
#endif
case OPENCL:
{
cl_uint ndevices;
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
ocl::check(dispatch::dispatch::clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
std::vector<cl_device_id> device_ids(ndevices);
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, ndevices, device_ids.data(), NULL));
ocl::check(dispatch::dispatch::clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, ndevices, device_ids.data(), NULL));
for(cl_device_id d : device_ids)
devices.push_back(Device(d));
break;

View File

@@ -4,9 +4,7 @@
#include "isaac/driver/program.h"
#include "isaac/driver/context.h"
#ifdef ISAAC_WITH_CUDA
#include "helpers/cuda/vector.hpp"
#endif
#include "helpers/ocl/infos.hpp"
#include "sha1.hpp"
@@ -23,7 +21,6 @@ Program::Program(Context const & context, std::string const & source) : backend_
std::string cache_path = context.cache_path_;
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
@@ -34,7 +31,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
//Load cached program
if(cache_path.size() && std::ifstream(fname, std::ios::binary))
{
cuda::check(cuModuleLoad(&h_.cu(), fname.c_str()));
cuda::check(dispatch::cuModuleLoad(&h_.cu(), fname.c_str()));
break;
}
@@ -43,25 +40,25 @@ Program::Program(Context const & context, std::string const & source) : backend_
const char * includes[] = {"helper_math.h"};
const char * src[] = {helpers::cuda::vector};
nvrtc::check(nvrtcCreateProgram(&prog, source.c_str(), NULL, 1, src, includes));
nvrtc::check(dispatch::nvrtcCreateProgram(&prog, source.c_str(), NULL, 1, src, includes));
try{
const char * options[] = {"--gpu-architecture=compute_52", "--restrict"};
nvrtc::check(nvrtcCompileProgram(prog, 2, options));
nvrtc::check(dispatch::nvrtcCompileProgram(prog, 2, options));
}catch(nvrtc::exception::compilation const &)
{
size_t logsize;
nvrtc::check(nvrtcGetProgramLogSize(prog, &logsize));
nvrtc::check(dispatch::nvrtcGetProgramLogSize(prog, &logsize));
std::string log(logsize, 0);
nvrtc::check(nvrtcGetProgramLog(prog, (char*)log.data()));
nvrtc::check(dispatch::nvrtcGetProgramLog(prog, (char*)log.data()));
std::cout << "Compilation failed:" << std::endl;
std::cout << log << std::endl;
}
size_t ptx_size;
nvrtc::check(nvrtcGetPTXSize(prog, &ptx_size));
nvrtc::check(dispatch::nvrtcGetPTXSize(prog, &ptx_size));
std::vector<char> ptx(ptx_size);
nvrtc::check(nvrtcGetPTX(prog, ptx.data()));
cuda::check(cuModuleLoadDataEx(&h_.cu(), ptx.data(), 0, NULL, NULL));
nvrtc::check(dispatch::nvrtcGetPTX(prog, ptx.data()));
cuda::check(dispatch::cuModuleLoadDataEx(&h_.cu(), ptx.data(), 0, NULL, NULL));
//Save cached program
if (cache_path.size())
@@ -77,8 +74,8 @@ Program::Program(Context const & context, std::string const & source) : backend_
// oss.close();
// system(("/usr/local/cuda-7.0/bin/nvcc " + sha1 + ".cu -gencode arch=compute_50,code=sm_50 -cubin").c_str());
// system(("perl /home/philippe/Development/maxas/maxas.pl -e " + sha1 + ".cubin > " + sha1 + ".sass").c_str());
// system(("perl /home/philippe/Development/maxas/maxas.pl -i --noreuse" + sha1 + ".sass " + sha1 + ".cubin").c_str());
// system(("perl /maxas.pl -e " + sha1 + ".cubin > " + sha1 + ".sass").c_str());
// system(("perl /maxas.pl -i --noreuse" + sha1 + ".sass " + sha1 + ".cubin").c_str());
// std::ifstream ifs(sha1 + ".cubin");
// std::cout << sha1 << std::endl;
@@ -90,11 +87,10 @@ Program::Program(Context const & context, std::string const & source) : backend_
// str.assign((std::istreambuf_iterator<char>(ifs)),
// std::istreambuf_iterator<char>());
// cuda::check(cuModuleLoadDataEx(&h_.cu(), str.c_str(), 0, NULL, NULL));
// cuda::check(dispatch::cuModuleLoadDataEx(&h_.cu(), str.c_str(), 0, NULL, NULL));
break;
}
#endif
case OPENCL:
{
cl_int err;
@@ -118,18 +114,18 @@ Program::Program(Context const & context, std::string const & source) : backend_
buffer.resize(len);
cached.read((char*)buffer.data(), std::streamsize(len));
char* cbuffer = buffer.data();
h_.cl() = clCreateProgramWithBinary(context_.h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), &len, (const unsigned char **)&cbuffer, NULL, &err);
h_.cl() = dispatch::clCreateProgramWithBinary(context_.h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), &len, (const unsigned char **)&cbuffer, NULL, &err);
ocl::check(err);
ocl::check(clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
ocl::check(dispatch::clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
return;
}
}
std::size_t srclen = source.size();
const char * csrc = source.c_str();
h_.cl() = clCreateProgramWithSource(context_.h_.cl(), 1, &csrc, &srclen, &err);
h_.cl() = dispatch::clCreateProgramWithSource(context_.h_.cl(), 1, &csrc, &srclen, &err);
try{
ocl::check(clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
ocl::check(dispatch::clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
}catch(ocl::exception::build_program_failure const &){
for(std::vector<cl_device_id>::const_iterator it = devices.begin(); it != devices.end(); ++it)
{

View File

@@ -12,10 +12,10 @@ std::string const & keyword::get() const
{
switch(backend_)
{
case driver::OPENCL: return opencl_;
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: return cuda_;
#endif
case driver::OPENCL:
return opencl_;
case driver::CUDA:
return cuda_;
default: throw;
}
}

View File

@@ -42,10 +42,10 @@ std::string axpy::generate_impl(std::string const & suffix, expressions_tuple co
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
#endif
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
case driver::CUDA:
stream << "#include \"helper_math.h\"" << std::endl; break;
case driver::OPENCL:
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;

View File

@@ -98,10 +98,10 @@ std::string dot::generate_impl(std::string const & suffix, expressions_tuple con
* -----------------------*/
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
#endif
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break;
case driver::CUDA:
stream << "#include \"helper_math.h\"" << std::endl; break;
case driver::OPENCL:
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;

View File

@@ -143,10 +143,10 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
#endif
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
case driver::CUDA:
stream << "#include \"helper_math.h\"" << std::endl; break;
case driver::OPENCL:
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "

View File

@@ -74,10 +74,10 @@ std::string gemv::generate_impl(std::string const & suffix, expressions_tuple co
int col_simd_width = (dot_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
#endif
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
case driver::CUDA:
stream << "#include \"helper_math.h\"" << std::endl; break;
case driver::OPENCL:
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;

View File

@@ -39,10 +39,10 @@ std::string ger::generate_impl(std::string const & suffix, expressions_tuple con
switch(backend)
{
#ifdef ISAAC_WITH_CUDA
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
#endif
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
case driver::CUDA:
stream << "#include \"helper_math.h\"" << std::endl; break;
case driver::OPENCL:
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;

View File

@@ -33,9 +33,9 @@ extern "C"
if(events)
{
events[i] = levents.front().handle().cl();
clRetainEvent(events[i]);
is::driver::dispatch::clRetainEvent(events[i]);
}
clFlush(commandQueues[i]);
is::driver::dispatch::clFlush(commandQueues[i]);
}
}

View File

@@ -60,7 +60,7 @@ def main():
def find_opencl():
cvars = sysconfig.get_config_vars()
lib = find_library('OpenCL', '/opt/adreno-driver*/lib' if for_android else '/opt/AMDAPPSDK*/lib/x86_64')
lib = find_library('OpenCL', '' if for_android else '')
return {'include': '', 'lib': dirname(lib)} if lib else None
def find_in_path(name, path):
@@ -124,7 +124,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]

View File

@@ -107,9 +107,7 @@ void export_driver()
bp::enum_<sc::driver::backend_type>
("backend_type")
.value("OPENCL", sc::driver::OPENCL)
#ifdef ISAAC_WITH_CUDA
.value("CUDA", sc::driver::CUDA)
#endif
;
bp::enum_<sc::driver::Device::Type>

View File

@@ -7,5 +7,5 @@ get_property(ISAAC_PATH TARGET isaac PROPERTY LOCATION)
foreach(PROG axpy dot ger gemv gemm)
add_executable(${PROG}-test ${PROG}.cpp)
add_test(${PROG} ${PROG}-test)
target_link_libraries(${PROG}-test isaac ${OPENCL_LIBRARIES})
target_link_libraries(${PROG}-test isaac)
endforeach(PROG)