Driver: now loading the backend dynamically on Linux
This commit is contained in:
@@ -8,14 +8,6 @@ add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} )
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/lib/external/)
|
||||
|
||||
#CUDA and OpenCL
|
||||
find_package(CUDA QUIET)
|
||||
find_package(OpenCL QUIET REQUIRED)
|
||||
if(CUDA_FOUND)
|
||||
set(BACKEND_DEFINES "-DISAAC_WITH_CUDA")
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
#Compiler flags
|
||||
add_definitions(${BACKEND_DEFINES})
|
||||
if(WIN32)
|
||||
|
@@ -51,5 +51,5 @@ foreach(PROG blas)
|
||||
add_executable(${PROG}-bench ${PROG}.cpp)
|
||||
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "${BLAS_DEF_STR}")
|
||||
endif()
|
||||
target_link_libraries(${PROG}-bench ${BLAS_LIBS} isaac ${OPENCL_LIBRARIES} )
|
||||
target_link_libraries(${PROG}-bench ${BLAS_LIBS} isaac)
|
||||
endforeach(PROG)
|
||||
|
11758
include/CL/cl.hpp
11758
include/CL/cl.hpp
File diff suppressed because it is too large
Load Diff
@@ -1,126 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#include <d3d10.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
// Error Codes
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
// cl_d3d10_device_source_nv
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
// cl_d3d10_device_set_nv
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
// cl_context_info
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
// cl_mem_info
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
// cl_image_info
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
// cl_command_type
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __OPENCL_CL_D3D10_H
|
||||
|
@@ -1,162 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* bufobj */,
|
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* renderbuffer */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem /* memobj */,
|
||||
cl_gl_object_type * /* gl_object_type */,
|
||||
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem /* memobj */,
|
||||
cl_gl_texture_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
// Deprecated OpenCL 1.1 APIs
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
|
||||
cl_gl_context_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
@@ -1,69 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
|
||||
/* OpenGL dependencies. */
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#else
|
||||
#include <CL/cl_gl.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For each extension, follow this template
|
||||
* cl_VEN_extname extension */
|
||||
/* #define cl_VEN_extname 1
|
||||
* ... define new types, if any
|
||||
* ... define new tokens, if any
|
||||
* ... define new APIs, if any
|
||||
*
|
||||
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
|
||||
* This allows us to avoid having to decide whether to include GL headers or GLES here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
* See section 9.9 in the OpenCL 1.1 spec for more information
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context /* context */,
|
||||
cl_GLsync /* cl_GLsync */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
File diff suppressed because it is too large
Load Diff
@@ -1,54 +0,0 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#include <OpenCL/cl.h>
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#include <OpenCL/cl_gl_ext.h>
|
||||
#include <OpenCL/cl_ext.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
|
@@ -8,7 +8,7 @@
|
||||
#include "isaac/common/expression_type.h"
|
||||
#include "isaac/common/numeric_type.h"
|
||||
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/types.h"
|
||||
|
||||
|
@@ -2,13 +2,7 @@
|
||||
#define ISAAC_DRIVER_COMMON_H
|
||||
#include <exception>
|
||||
|
||||
//OpenCL Backend
|
||||
#include "isaac/driver/external/CL/cl.h"
|
||||
#include "isaac/driver/external/CL/cl_ext.h"
|
||||
//CUDA Backend
|
||||
#include "isaac/driver/external/CUDA/cuda.h"
|
||||
#include "isaac/driver/external/CUDA/nvrtc.h"
|
||||
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/defines.h"
|
||||
|
||||
DISABLE_MSVC_WARNING_C4275
|
||||
@@ -20,10 +14,8 @@ namespace driver
|
||||
|
||||
enum backend_type
|
||||
{
|
||||
OPENCL
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
,CUDA
|
||||
#endif
|
||||
OPENCL,
|
||||
CUDA
|
||||
};
|
||||
|
||||
|
||||
|
@@ -26,6 +26,7 @@ public:
|
||||
CPU = CL_DEVICE_TYPE_CPU,
|
||||
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR
|
||||
};
|
||||
|
||||
enum class Vendor
|
||||
{
|
||||
AMD,
|
||||
@@ -42,14 +43,11 @@ public:
|
||||
};
|
||||
|
||||
private:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
template<CUdevice_attribute attr>
|
||||
int cuGetInfo() const;
|
||||
#endif
|
||||
|
||||
public:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
Device(int ordinal);
|
||||
#endif
|
||||
Device(cl_device_id const & device, bool take_ownership = true);
|
||||
|
||||
bool operator==(Device const &) const;
|
||||
|
203
include/isaac/driver/dispatch.h
Normal file
203
include/isaac/driver/dispatch.h
Normal file
@@ -0,0 +1,203 @@
|
||||
#ifndef ISAAC_DRIVER_DISPATCHER_H
|
||||
#define ISAAC_DRIVER_DISPATCHER_H
|
||||
|
||||
#include <type_traits>
|
||||
#include <dlfcn.h>
|
||||
|
||||
//OpenCL Backend
|
||||
#include "isaac/driver/external/CL/cl.h"
|
||||
#include "isaac/driver/external/CL/cl_ext.h"
|
||||
//CUDA Backend
|
||||
#include "isaac/driver/external/CUDA/cuda.h"
|
||||
#include "isaac/driver/external/CUDA/nvrtc.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
|
||||
|
||||
class dispatch
|
||||
{
|
||||
private:
|
||||
template <class F>
|
||||
struct return_type;
|
||||
|
||||
template <class R, class... A>
|
||||
struct return_type<R (*)(A...)>
|
||||
{
|
||||
typedef R type;
|
||||
};
|
||||
|
||||
typedef bool (*f_init_t)();
|
||||
|
||||
template<f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
|
||||
{
|
||||
initializer();
|
||||
if(cache == nullptr)
|
||||
cache = dlsym(lib_h, name);
|
||||
return (*(FunPtrT)cache)(args...);
|
||||
}
|
||||
|
||||
public:
|
||||
static bool clinit();
|
||||
static bool cuinit();
|
||||
static bool nvrtcinit();
|
||||
|
||||
static void release();
|
||||
|
||||
//OpenCL
|
||||
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
|
||||
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
|
||||
static cl_int clReleaseMemObject(cl_mem);
|
||||
static cl_int clFinish(cl_command_queue);
|
||||
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
|
||||
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseContext(cl_context);
|
||||
static cl_int clReleaseEvent(cl_event);
|
||||
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseDevice(cl_device_id);
|
||||
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
|
||||
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
|
||||
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
|
||||
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseCommandQueue(cl_command_queue);
|
||||
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
|
||||
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
|
||||
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
|
||||
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
|
||||
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
|
||||
static cl_int clRetainEvent(cl_event);
|
||||
static cl_int clReleaseProgram(cl_program);
|
||||
static cl_int clFlush(cl_command_queue);
|
||||
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
|
||||
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
|
||||
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
|
||||
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
|
||||
static cl_int clReleaseKernel(cl_kernel);
|
||||
|
||||
//CUDA
|
||||
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||||
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
|
||||
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||
static CUresult cuModuleUnload(CUmodule hmod);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
static CUresult cuDeviceGetCount(int *count);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||
static CUresult cuInit(unsigned int Flags);
|
||||
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||
|
||||
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
|
||||
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
private:
|
||||
static void* opencl_;
|
||||
static void* cuda_;
|
||||
static void* nvrtc_;
|
||||
|
||||
//OpenCL
|
||||
static void* clBuildProgram_;
|
||||
static void* clEnqueueNDRangeKernel_;
|
||||
static void* clSetKernelArg_;
|
||||
static void* clReleaseMemObject_;
|
||||
static void* clFinish_;
|
||||
static void* clGetMemObjectInfo_;
|
||||
static void* clGetCommandQueueInfo_;
|
||||
static void* clReleaseContext_;
|
||||
static void* clReleaseEvent_;
|
||||
static void* clEnqueueWriteBuffer_;
|
||||
static void* clEnqueueReadBuffer_;
|
||||
static void* clGetProgramBuildInfo_;
|
||||
static void* clReleaseDevice_;
|
||||
static void* clCreateContext_;
|
||||
static void* clGetDeviceIDs_;
|
||||
static void* clGetContextInfo_;
|
||||
static void* clGetDeviceInfo_;
|
||||
static void* clReleaseCommandQueue_;
|
||||
static void* clGetPlatformIDs_;
|
||||
static void* clGetPlatformInfo_;
|
||||
static void* clGetEventProfilingInfo_;
|
||||
static void* clCreateProgramWithBinary_;
|
||||
static void* clCreateCommandQueue_;
|
||||
static void* clRetainEvent_;
|
||||
static void* clReleaseProgram_;
|
||||
static void* clFlush_;
|
||||
static void* clGetProgramInfo_;
|
||||
static void* clGetKernelInfo_;
|
||||
static void* clGetKernelWorkGroupInfo_;
|
||||
static void* clCreateKernel_;
|
||||
static void* clCreateBuffer_;
|
||||
static void* clCreateProgramWithSource_;
|
||||
static void* clReleaseKernel_;
|
||||
|
||||
//CUDA
|
||||
static void* cuCtxDestroy_v2_;
|
||||
static void* cuEventCreate_;
|
||||
static void* cuDeviceGet_;
|
||||
static void* cuMemcpyDtoH_v2_;
|
||||
static void* cuStreamCreate_;
|
||||
static void* cuEventElapsedTime_;
|
||||
static void* cuMemFree_v2_;
|
||||
static void* cuMemcpyDtoHAsync_v2_;
|
||||
static void* cuDriverGetVersion_;
|
||||
static void* cuDeviceGetName_;
|
||||
static void* cuMemcpyHtoDAsync_v2_;
|
||||
static void* cuModuleLoad_;
|
||||
static void* cuLaunchKernel_;
|
||||
static void* cuModuleUnload_;
|
||||
static void* cuModuleLoadDataEx_;
|
||||
static void* cuDeviceGetAttribute_;
|
||||
static void* cuDeviceGetCount_;
|
||||
static void* cuMemcpyHtoD_v2_;
|
||||
static void* cuInit_;
|
||||
static void* cuEventRecord_;
|
||||
static void* cuCtxCreate_v2_;
|
||||
static void* cuModuleGetFunction_;
|
||||
static void* cuStreamSynchronize_;
|
||||
static void* cuStreamDestroy_v2_;
|
||||
static void* cuEventDestroy_v2_;
|
||||
static void* cuMemAlloc_v2_;
|
||||
|
||||
static void* nvrtcCompileProgram_;
|
||||
static void* nvrtcGetProgramLogSize_;
|
||||
static void* nvrtcGetPTX_;
|
||||
static void* nvrtcGetPTXSize_;
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
10796
include/isaac/driver/external/CUDA/cuda.h
vendored
Normal file
10796
include/isaac/driver/external/CUDA/cuda.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
392
include/isaac/driver/external/CUDA/nvrtc.h
vendored
Normal file
392
include/isaac/driver/external/CUDA/nvrtc.h
vendored
Normal file
@@ -0,0 +1,392 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#ifndef __NVRTC_H__
|
||||
#define __NVRTC_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
/*****************************//**
|
||||
*
|
||||
* \defgroup error Error Handling
|
||||
*
|
||||
********************************/
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup error
|
||||
* \brief CUDA Online Compiler API call result code.
|
||||
*/
|
||||
typedef enum {
|
||||
NVRTC_SUCCESS = 0,
|
||||
NVRTC_ERROR_OUT_OF_MEMORY = 1,
|
||||
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
||||
NVRTC_ERROR_INVALID_INPUT = 3,
|
||||
NVRTC_ERROR_INVALID_PROGRAM = 4,
|
||||
NVRTC_ERROR_INVALID_OPTION = 5,
|
||||
NVRTC_ERROR_COMPILATION = 6,
|
||||
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7
|
||||
} nvrtcResult;
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup error
|
||||
* \brief ::nvrtcGetErrorString is a helper function that stringifies the
|
||||
* given #nvrtcResult code, e.g., \link #nvrtcResult NVRTC_SUCCESS
|
||||
* \endlink to \c "NVRTC_SUCCESS". For unrecognized enumeration
|
||||
* values, it returns \c "NVRTC_ERROR unknown".
|
||||
*
|
||||
* \param [in] result CUDA Online Compiler API result code.
|
||||
* \return Message string for the given #nvrtcResult code.
|
||||
*/
|
||||
const char *nvrtcGetErrorString(nvrtcResult result);
|
||||
|
||||
|
||||
/****************************************//**
|
||||
*
|
||||
* \defgroup query General Information Query
|
||||
*
|
||||
*******************************************/
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup query
|
||||
* \brief ::nvrtcVersion sets the output parameters \p major and \p minor
|
||||
* with the CUDA Online Compiler version number.
|
||||
*
|
||||
* \param [out] major CUDA Online Compiler major version number.
|
||||
* \param [out] minor CUDA Online Compiler minor version number.
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
*
|
||||
*/
|
||||
nvrtcResult nvrtcVersion(int *major, int *minor);
|
||||
|
||||
|
||||
/********************************//**
|
||||
*
|
||||
* \defgroup compilation Compilation
|
||||
*
|
||||
***********************************/
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcProgram is the unit of compilation, and an opaque handle for
|
||||
* a program.
|
||||
*
|
||||
* To compile a CUDA program string, an instance of nvrtcProgram must be
|
||||
* created first with ::nvrtcCreateProgram, then compiled with
|
||||
* ::nvrtcCompileProgram.
|
||||
*/
|
||||
typedef struct _nvrtcProgram *nvrtcProgram;
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcCreateProgram creates an instance of ::nvrtcProgram with the
|
||||
* given input parameters, and sets the output parameter \p prog with
|
||||
* it.
|
||||
*
|
||||
* \param [out] prog CUDA Online Compiler program.
|
||||
* \param [in] src CUDA program source.
|
||||
* \param [in] name CUDA program name.\n
|
||||
* \p name can be \c NULL; \c "default_program" is
|
||||
* used when \p name is \c NULL.
|
||||
* \param [in] numHeaders Number of headers used.\n
|
||||
* \p numHeaders must be greater than or equal to 0.
|
||||
* \param [in] headers Sources of the headers.\n
|
||||
* \p headers can be \c NULL when \p numHeaders is
|
||||
* 0.
|
||||
* \param [in] includeNames Name of each header by which they can be
|
||||
* included in the CUDA program source.\n
|
||||
* \p includeNames can be \c NULL when \p numHeaders
|
||||
* is 0.
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcDestroyProgram
|
||||
*/
|
||||
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
||||
const char *src,
|
||||
const char *name,
|
||||
int numHeaders,
|
||||
const char **headers,
|
||||
const char **includeNames);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcDestroyProgram destroys the given program.
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcCreateProgram
|
||||
*/
|
||||
nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcCompileProgram compiles the given program.
|
||||
*
|
||||
* The valid compiler options are:
|
||||
*
|
||||
* - Compilation targets
|
||||
* - --gpu-architecture=<em>\<GPU architecture name\></em> (-arch)\n
|
||||
* Specify the name of the class of GPU architectures for which the
|
||||
* input must be compiled.\n
|
||||
* - Valid <em>GPU architecture name</em>s:
|
||||
* - compute_20
|
||||
* - compute_30
|
||||
* - compute_35
|
||||
* - compute_50
|
||||
* - Default: compute_20
|
||||
* - Separate compilation / whole-program compilation
|
||||
* - --device-c (-dc)\n
|
||||
* Generate relocatable code that can be linked with other relocatable
|
||||
* device code. It is equivalent to --relocatable-device-code=true.
|
||||
* - --device-w (-dw)\n
|
||||
* Generate non-relocatable code. It is equivalent to
|
||||
* --relocatable-device-code=false.
|
||||
* - --relocatable-device-code=<em>[true, false]</em> (-rdc)\n
|
||||
* Enable (disable) the generation of relocatable device code.
|
||||
* - Default: false
|
||||
* - Debugging support
|
||||
* - --device-debug (-G)\n
|
||||
* Generate debug information.
|
||||
* - --generate-line-info (-lineinfo)\n
|
||||
* Generate line-number information.
|
||||
* - Code generation
|
||||
* - --maxrregcount=<em>\<N\></em> (-maxrregcount)\n
|
||||
* Specify the maximum amount of registers that GPU functions can use.
|
||||
* Until a function-specific limit, a higher value will generally
|
||||
* increase the performance of individual GPU threads that execute this
|
||||
* function. However, because thread registers are allocated from a
|
||||
* global register pool on each GPU, a higher value of this option will
|
||||
* also reduce the maximum thread block size, thereby reducing the amount
|
||||
* of thread parallelism. Hence, a good maxrregcount value is the result
|
||||
* of a trade-off. If this option is not specified, then no maximum is
|
||||
* assumed. Value less than the minimum registers required by ABI will
|
||||
* be bumped up by the compiler to ABI minimum limit.
|
||||
* - --ftz=<em>[true, false]</em> (-ftz)\n
|
||||
* When performing single-precision floating-point operations, flush
|
||||
* denormal values to zero or preserve denormal values. --use_fast_math
|
||||
* implies --ftz=true.
|
||||
* - Default: false
|
||||
* - --prec-sqrt=<em>[true, false]</em> (-prec-sqrt)\n
|
||||
* For single-precision floating-point square root, use IEEE
|
||||
* round-to-nearest mode or use a faster approximation. --use_fast_math
|
||||
* implies --prec-sqrt=false.
|
||||
* - Default: true
|
||||
* - --prec-div=<em>[true, false]</em> (-prec-div)\n
|
||||
* For single-precision floating-point division and reciprocals, use IEEE
|
||||
* round-to-nearest mode or use a faster approximation. --use_fast_math
|
||||
* implies --prec-div=false.
|
||||
* - Default: true
|
||||
* - --fmad=<em>[true, false]</em> (-fmad)\n
|
||||
* Enables (disables) the contraction of floating-point multiplies and
|
||||
* adds/subtracts into floating-point multiply-add operations (FMAD,
|
||||
* FFMA, or DFMA). --use_fast_math implies --fmad=true.
|
||||
* - Default: true
|
||||
* - --use_fast_math (-use_fast_math)\n
|
||||
* Make use of fast math operations. --use_fast_math implies --ftz=true
|
||||
* --prec-div=false --prec-sqrt=false --fmad=true.
|
||||
* - Preprocessing
|
||||
* - --define-macro=<em>\<macrodef\></em> (-D)\n
|
||||
* <em>macrodef</em> can be either <em>name</em> or
|
||||
* <em>name=definitions</em>.
|
||||
* - <em>name</em>\n
|
||||
* Predefine <em>name</em> as a macro with definition 1.
|
||||
* - <em>name=definition</em>\n
|
||||
* The contents of <em>definition</em> are tokenized and preprocessed
|
||||
* as if they appeared during translation phase three in a \c \#define
|
||||
* directive. In particular, the definition will be truncated by
|
||||
* embedded new line characters.
|
||||
* - --undefine-macro=<em>\<name\></em> (-U)\n
|
||||
* Cancel any previous definition of \em name.
|
||||
* - --include-path=<em>\<dir\></em> (-I)\n
|
||||
* Add the directory <em>dir</em> to the list of directories to be
|
||||
* searched for headers. These paths are searched after the list of
|
||||
* headers given to ::nvrtcCreateProgram.
|
||||
* - --pre-include=<em>\<header\></em> (-include)\n
|
||||
* Preinclude <em>header</em> during preprocessing.
|
||||
* - Language Dialect
|
||||
* - --std=c++11 (-std=c++11)\n
|
||||
* Set language dialect to C++11.
|
||||
* - --builtin-move-forward=<em>[true, false]</em> (-builtin-move-forward)\n
|
||||
* Provide builtin definitions of std::move and std::forward, when C++11
|
||||
* language dialect is selected.
|
||||
* - Default : true
|
||||
* - --builtin-initializer-list=<em>[true, false]</em> (-builtin-initializer-list)\n
|
||||
* Provide builtin definitions of std::initializer_list class and member
|
||||
* functions when C++11 language dialect is selected.
|
||||
* - Default : true
|
||||
* - Misc
|
||||
* - --disable-warnings (-w)\n
|
||||
* Inhibit all warning messages.
|
||||
* - --restrict (-restrict)\n
|
||||
* Programmer assertion that all kernel pointer parameters are restrict
|
||||
* pointers.
|
||||
* - --device-as-default-execution-space
|
||||
* (-default-device)\n
|
||||
* Treat entities with no execution space annotation as \c __device__
|
||||
* entities.
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \param [in] numOptions Number of compiler options passed.
|
||||
* \param [in] options Compiler options in the form of C string array.\n
|
||||
* \p options can be \c NULL when \p numOptions is 0.
|
||||
*
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink
|
||||
*/
|
||||
nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
|
||||
int numOptions, const char **options);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcGetPTXSize sets \p ptxSizeRet with the size of the PTX
|
||||
* generated by the previous compilation of \p prog (including the
|
||||
* trailing \c NULL).
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \param [out] ptxSizeRet Size of the generated PTX (including the trailing
|
||||
* \c NULL).
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcGetPTX
|
||||
*/
|
||||
nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcGetPTX stores the PTX generated by the previous compilation
|
||||
* of \p prog in the memory pointed by \p ptx.
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \param [out] ptx Compiled result.
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcGetPTXSize
|
||||
*/
|
||||
nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcGetProgramLogSize sets \p logSizeRet with the size of the
|
||||
* log generated by the previous compilation of \p prog (including the
|
||||
* trailing \c NULL).
|
||||
*
|
||||
* Note that compilation log may be generated with warnings and informative
|
||||
* messages, even when the compilation of \p prog succeeds.
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \param [out] logSizeRet Size of the compilation log
|
||||
* (including the trailing \c NULL).
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcGetProgramLog
|
||||
*/
|
||||
nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief ::nvrtcGetProgramLog stores the log generated by the previous
|
||||
* compilation of \p prog in the memory pointed by \p log.
|
||||
*
|
||||
* \param [in] prog CUDA Online Compiler program.
|
||||
* \param [out] log Compilation log.
|
||||
* \return
|
||||
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||
*
|
||||
* \see ::nvrtcGetProgramLogSize
|
||||
*/
|
||||
nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* __NVRTC_H__ */
|
@@ -12,26 +12,18 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
struct cu_event_t{
|
||||
operator bool() const { return first && second; }
|
||||
CUevent first;
|
||||
CUevent second;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#define HANDLE_TYPE(CLTYPE, CUTYPE) Handle<CLTYPE, CUTYPE>
|
||||
#else
|
||||
#define HANDLE_TYPE(CLTYPE, CUTYPE) Handle<CLTYPE, void>
|
||||
#endif
|
||||
|
||||
template<class CLType, class CUType>
|
||||
class ISAACAPI Handle
|
||||
{
|
||||
private:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
static void _delete(CUcontext x);
|
||||
static void _delete(CUdeviceptr x);
|
||||
static void _delete(CUstream x);
|
||||
@@ -40,7 +32,6 @@ private:
|
||||
static void _delete(CUfunction);
|
||||
static void _delete(CUmodule x);
|
||||
static void _delete(cu_event_t x);
|
||||
#endif
|
||||
|
||||
static void release(cl_context x);
|
||||
static void release(cl_mem x);
|
||||
@@ -56,10 +47,8 @@ public:
|
||||
bool operator<(Handle const & other) const;
|
||||
CLType & cl();
|
||||
CLType const & cl() const;
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
CUType & cu();
|
||||
CUType const & cu() const;
|
||||
#endif
|
||||
~Handle();
|
||||
|
||||
private:
|
||||
|
@@ -30,10 +30,8 @@ public:
|
||||
private:
|
||||
backend_type backend_;
|
||||
unsigned int address_bits_;
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
std::vector<std::shared_ptr<void> > cu_params_store_;
|
||||
std::vector<void*> cu_params_;
|
||||
#endif
|
||||
HANDLE_TYPE(cl_kernel, CUfunction) h_;
|
||||
};
|
||||
|
||||
|
@@ -19,9 +19,7 @@ class ISAACAPI Platform
|
||||
{
|
||||
private:
|
||||
public:
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
Platform(backend_type);
|
||||
#endif
|
||||
Platform(cl_platform_id const &);
|
||||
std::string name() const;
|
||||
std::string version() const;
|
||||
|
@@ -21,10 +21,8 @@ static inline std::string size_type(driver::Device const & device)
|
||||
{
|
||||
switch(device.backend())
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA:
|
||||
return "int";
|
||||
#endif
|
||||
case driver::OPENCL:
|
||||
return "int";
|
||||
default:
|
||||
|
@@ -18,19 +18,13 @@ else()
|
||||
endif()
|
||||
add_dependencies(isaac database)
|
||||
|
||||
#Linkage
|
||||
if(OPENCL_FOUND)
|
||||
target_link_libraries(isaac ${OPENCL_LIBRARIES})
|
||||
endif()
|
||||
if(CUDA_FOUND)
|
||||
cuda_find_library_local_first(CUDA_NVRTC_LIBRARY nvrtc "\"nvrtc\" library")
|
||||
target_link_libraries(isaac ${CUDA_CUDA_LIBRARY} ${CUDA_NVRTC_LIBRARY})
|
||||
#Cuda JIT headers to file
|
||||
set(CUDA_HELPERS_PATH ${CMAKE_CURRENT_SOURCE_DIR}/driver/helpers/cuda/)
|
||||
file(GLOB_RECURSE CUDA_HELPERS_SRC ${CUDA_HELPERS_PATH}/*.cu)
|
||||
CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "hpp"
|
||||
OUTPUT_DIR ${CUDA_HELPERS_PATH} NAMESPACE "isaac helpers cuda" TARGET cuda_headers EOF "0")
|
||||
endif()
|
||||
target_link_libraries(isaac "dl")
|
||||
|
||||
#Cuda JIT headers to file
|
||||
set(CUDA_HELPERS_PATH ${CMAKE_CURRENT_SOURCE_DIR}/driver/helpers/cuda/)
|
||||
file(GLOB_RECURSE CUDA_HELPERS_SRC ${CUDA_HELPERS_PATH}/*.cu)
|
||||
CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "hpp"
|
||||
OUTPUT_DIR ${CUDA_HELPERS_PATH} NAMESPACE "isaac helpers cuda" TARGET cuda_headers EOF "0")
|
||||
|
||||
#Installation
|
||||
install(TARGETS isaac LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
|
||||
|
@@ -127,15 +127,28 @@ std::list<Context const *> backend::contexts::cache_;
|
||||
|
||||
void backend::platforms(std::vector<Platform> & platforms)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
platforms.push_back(Platform(CUDA));
|
||||
#endif
|
||||
cl_uint nplatforms;
|
||||
ocl::check(clGetPlatformIDs(0, NULL, &nplatforms));
|
||||
std::vector<cl_platform_id> clplatforms(nplatforms);
|
||||
ocl::check(clGetPlatformIDs(nplatforms, clplatforms.data(), NULL));
|
||||
for(cl_platform_id p: clplatforms)
|
||||
platforms.push_back(Platform(p));
|
||||
//if cuda is here
|
||||
if(dispatch::cuinit())
|
||||
{
|
||||
if(dispatch::nvrtcinit())
|
||||
platforms.push_back(Platform(CUDA));
|
||||
else
|
||||
throw std::runtime_error("ISAAC: Unable to find NVRTC. Make sure you are using CUDA >= 7.0");
|
||||
}
|
||||
|
||||
//if OpenCL is here
|
||||
if(dispatch::clinit())
|
||||
{
|
||||
cl_uint nplatforms;
|
||||
ocl::check(dispatch::dispatch::clGetPlatformIDs(0, NULL, &nplatforms));
|
||||
std::vector<cl_platform_id> clplatforms(nplatforms);
|
||||
ocl::check(dispatch::dispatch::clGetPlatformIDs(nplatforms, clplatforms.data(), NULL));
|
||||
for(cl_platform_id p: clplatforms)
|
||||
platforms.push_back(Platform(p));
|
||||
}
|
||||
|
||||
if(platforms.empty())
|
||||
throw std::runtime_error("ISAAC: No backend available. Make sure OpenCL and/or CUDA are available in your library path");
|
||||
}
|
||||
|
||||
void backend::synchronize(Context const & context)
|
||||
|
@@ -18,14 +18,12 @@ Buffer::Buffer(Context const & context, size_t size) : backend_(context.backend_
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cuda::check(cuMemAlloc(&h_.cu(), size));
|
||||
cuda::check(dispatch::cuMemAlloc(&h_.cu(), size));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
cl_int err;
|
||||
h_.cl() = clCreateBuffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
||||
h_.cl() = dispatch::clCreateBuffer(context.h_.cl(), CL_MEM_READ_WRITE, size, NULL, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
|
@@ -5,8 +5,6 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
|
||||
namespace nvrtc
|
||||
{
|
||||
|
||||
@@ -101,8 +99,6 @@ void check(CUresult err)
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
|
@@ -27,15 +27,14 @@ CommandQueue::CommandQueue(Context const & context, Device const & device, cl_co
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cuda::check(cuStreamCreate(&h_.cu(), 0));
|
||||
cuda::check(dispatch::cuStreamCreate(&h_.cu(), 0));
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OPENCL:
|
||||
{
|
||||
cl_int err;
|
||||
h_.cl() = clCreateCommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
|
||||
h_.cl() = dispatch::clCreateCommandQueue(context.h_.cl(), device.h_.cl(), properties, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
}
|
||||
@@ -57,10 +56,8 @@ void CommandQueue::synchronize()
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: cuda::check(cuStreamSynchronize(h_.cu())); break;
|
||||
#endif
|
||||
case OPENCL: ocl::check(clFinish(h_.cl())); break;
|
||||
case CUDA: cuda::check(dispatch::cuStreamSynchronize(h_.cu())); break;
|
||||
case OPENCL: ocl::check(dispatch::clFinish(h_.cl())); break;
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -70,16 +67,14 @@ Event CommandQueue::enqueue(Kernel const & kernel, NDRange global, driver::NDRan
|
||||
Event event(backend_);
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cuda::check(cuEventRecord(event.h_.cu().first, h_.cu()));
|
||||
cuda::check(cuLaunchKernel(kernel.h_.cu(), global[0]/local[0], global[1]/local[1], global[2]/local[2],
|
||||
cuda::check(dispatch::cuEventRecord(event.h_.cu().first, h_.cu()));
|
||||
cuda::check(dispatch::cuLaunchKernel(kernel.h_.cu(), global[0]/local[0], global[1]/local[1], global[2]/local[2],
|
||||
local[0], local[1], local[2], 0, h_.cu(),(void**)&kernel.cu_params_[0], NULL));
|
||||
cuda::check(cuEventRecord(event.h_.cu().second, h_.cu()));
|
||||
cuda::check(dispatch::cuEventRecord(event.h_.cu().second, h_.cu()));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(clEnqueueNDRangeKernel(h_.cl(), kernel.h_.cl(), global.dimension(), NULL, (const size_t *)global, (const size_t *) local, 0, NULL, &event.handle().cl()));
|
||||
ocl::check(dispatch::clEnqueueNDRangeKernel(h_.cl(), kernel.h_.cl(), global.dimension(), NULL, (const size_t *)global, (const size_t *) local, 0, NULL, &event.handle().cl()));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -90,16 +85,14 @@ void CommandQueue::write(Buffer const & buffer, bool blocking, std::size_t offse
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
if(blocking)
|
||||
cuda::check(cuMemcpyHtoD(buffer.h_.cu() + offset, ptr, size));
|
||||
cuda::check(dispatch::cuMemcpyHtoD(buffer.h_.cu() + offset, ptr, size));
|
||||
else
|
||||
cuda::check(cuMemcpyHtoDAsync(buffer.h_.cu() + offset, ptr, size, h_.cu()));
|
||||
cuda::check(dispatch::cuMemcpyHtoDAsync(buffer.h_.cu() + offset, ptr, size, h_.cu()));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(clEnqueueWriteBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
||||
ocl::check(dispatch::clEnqueueWriteBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -109,16 +102,14 @@ void CommandQueue::read(Buffer const & buffer, bool blocking, std::size_t offset
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
if(blocking)
|
||||
cuda::check(cuMemcpyDtoH(ptr, buffer.h_.cu() + offset, size));
|
||||
cuda::check(dispatch::cuMemcpyDtoH(ptr, buffer.h_.cu() + offset, size));
|
||||
else
|
||||
cuda::check(cuMemcpyDtoHAsync(ptr, buffer.h_.cu() + offset, size, h_.cu()));
|
||||
cuda::check(dispatch::cuMemcpyDtoHAsync(ptr, buffer.h_.cu() + offset, size, h_.cu()));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(clEnqueueReadBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
||||
ocl::check(dispatch::clEnqueueReadBuffer(h_.cl(), buffer.h_.cl(), blocking?CL_TRUE:CL_FALSE, offset, size, ptr, 0, NULL, NULL));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
|
@@ -21,14 +21,12 @@ Context::Context(Device const & device) : backend_(device.backend_), device_(dev
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cuda::check(cuCtxCreate(&h_.cu(), CU_CTX_SCHED_AUTO, device.h_.cu()));
|
||||
cuda::check(dispatch::cuCtxCreate(&h_.cu(), CU_CTX_SCHED_AUTO, device.h_.cu()));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
cl_int err;
|
||||
h_.cl() = clCreateContext(NULL, 1, &device_.h_.cl(), NULL, NULL, &err);
|
||||
h_.cl() = dispatch::clCreateContext(NULL, 1, &device_.h_.cl(), NULL, NULL, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
|
@@ -13,23 +13,19 @@ namespace driver
|
||||
{
|
||||
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
template<CUdevice_attribute attr>
|
||||
int Device::cuGetInfo() const
|
||||
{
|
||||
int res;
|
||||
cuda::check(cuDeviceGetAttribute(&res, attr, h_.cu()));
|
||||
cuda::check(dispatch::cuDeviceGetAttribute(&res, attr, h_.cu()));
|
||||
return res;
|
||||
}
|
||||
|
||||
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
|
||||
{
|
||||
cuda::check(cuDeviceGet(&h_.cu(), ordinal));
|
||||
cuda::check(dispatch::cuDeviceGet(&h_.cu(), ordinal));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Device::Device(cl_device_id const & device, bool take_ownership) : backend_(OPENCL), h_(backend_, take_ownership)
|
||||
{
|
||||
h_.cl() = device;
|
||||
@@ -85,9 +81,7 @@ unsigned int Device::address_bits() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return sizeof(long long)*8;
|
||||
#endif
|
||||
case CUDA: return sizeof(size_t)*8;
|
||||
case OPENCL: return ocl::info<CL_DEVICE_ADDRESS_BITS>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
@@ -99,9 +93,7 @@ driver::Platform Device::platform() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return Platform(CUDA);
|
||||
#endif
|
||||
case OPENCL: return Platform(ocl::info<CL_DEVICE_PLATFORM>(h_.cl()));
|
||||
default: throw;
|
||||
}
|
||||
@@ -111,13 +103,12 @@ std::string Device::name() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
char tmp[128];
|
||||
cuda::check(cuDeviceGetName(tmp, 128, h_.cu()));
|
||||
cuda::check(dispatch::cuDeviceGetName(tmp, 128, h_.cu()));
|
||||
return std::string(tmp);
|
||||
#endif
|
||||
case OPENCL: return ocl::info<CL_DEVICE_NAME>(h_.cl());
|
||||
case OPENCL:
|
||||
return ocl::info<CL_DEVICE_NAME>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -126,10 +117,10 @@ std::string Device::vendor_str() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return "NVidia";
|
||||
#endif
|
||||
case OPENCL: return ocl::info<CL_DEVICE_VENDOR>(h_.cl());
|
||||
case CUDA:
|
||||
return "NVidia";
|
||||
case OPENCL:
|
||||
return ocl::info<CL_DEVICE_VENDOR>(h_.cl());
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -139,7 +130,6 @@ std::vector<size_t> Device::max_work_item_sizes() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
{
|
||||
std::vector<size_t> result(3);
|
||||
@@ -148,7 +138,6 @@ std::vector<size_t> Device::max_work_item_sizes() const
|
||||
result[2] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z>();
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
return ocl::info<CL_DEVICE_MAX_WORK_ITEM_SIZES>(h_.cl());
|
||||
default:
|
||||
@@ -160,9 +149,7 @@ Device::Type Device::type() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return Type::GPU;
|
||||
#endif
|
||||
case OPENCL: return static_cast<Type>(ocl::info<CL_DEVICE_TYPE>(h_.cl()));
|
||||
default: throw;
|
||||
}
|
||||
@@ -172,10 +159,8 @@ std::string Device::extensions() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
return "";
|
||||
#endif
|
||||
case OPENCL:
|
||||
return ocl::info<CL_DEVICE_EXTENSIONS>(h_.cl());
|
||||
default: throw;
|
||||
@@ -188,10 +173,8 @@ std::pair<unsigned int, unsigned int> Device::nv_compute_capability() const
|
||||
{
|
||||
case OPENCL:
|
||||
return std::pair<unsigned int, unsigned int>(ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV>(h_.cl()), ocl::info<CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV>(h_.cl()));
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
return std::pair<unsigned int, unsigned int>(cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>(), cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>());
|
||||
#endif
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
@@ -203,12 +186,8 @@ bool Device::fp64_support() const
|
||||
{
|
||||
case OPENCL:
|
||||
return extensions().find("cl_khr_fp64")!=std::string::npos;
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
return true;
|
||||
#endif
|
||||
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
@@ -230,19 +209,12 @@ std::string Device::infos() const
|
||||
}
|
||||
|
||||
// Properties
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#define CUDACASE(CUNAME) case CUDA: return cuGetInfo<CUNAME>();
|
||||
#else
|
||||
#define CUDACASE(CUNAME)
|
||||
#endif\
|
||||
|
||||
#define WRAP_ATTRIBUTE(ret, fname, CUNAME, CLNAME) \
|
||||
ret Device::fname() const\
|
||||
{\
|
||||
switch(backend_)\
|
||||
{\
|
||||
CUDACASE(CUNAME)\
|
||||
case CUDA: return cuGetInfo<CUNAME>();\
|
||||
case OPENCL: return static_cast<ret>(ocl::info<CLNAME>(h_.cl()));\
|
||||
default: throw;\
|
||||
}\
|
||||
|
257
lib/driver/dispatch.cpp
Normal file
257
lib/driver/dispatch.cpp
Normal file
@@ -0,0 +1,257 @@
|
||||
#include "isaac/driver/dispatch.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
|
||||
//Helpers for function definition
|
||||
#define DEFINE1(init, hlib, ret, fname, t1) ret dispatch::fname(t1 a)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a); }
|
||||
|
||||
#define DEFINE2(init, hlib, ret, fname, t1, t2) ret dispatch::fname(t1 a, t2 b)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b); }
|
||||
|
||||
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) ret dispatch::fname(t1 a, t2 b, t3 c)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c); }
|
||||
|
||||
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d); }
|
||||
|
||||
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e); }
|
||||
|
||||
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f); }
|
||||
|
||||
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g); }
|
||||
|
||||
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h); }
|
||||
|
||||
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i); }
|
||||
|
||||
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j); }
|
||||
|
||||
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }
|
||||
|
||||
//Specialized helpers for OpenCL
|
||||
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
|
||||
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
|
||||
#define OCL_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(clinit, opencl_, ret, fname, t1, t2, t3)
|
||||
#define OCL_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(clinit, opencl_, ret, fname, t1, t2, t3, t4)
|
||||
#define OCL_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define OCL_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define OCL_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define OCL_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define OCL_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(clinit, opencl_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
|
||||
//Specialized helpers for CUDA
|
||||
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
||||
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
|
||||
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
|
||||
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
|
||||
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
|
||||
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
|
||||
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
|
||||
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
|
||||
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
|
||||
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
|
||||
|
||||
bool dispatch::clinit()
|
||||
{
|
||||
if(opencl_==nullptr)
|
||||
opencl_ = dlopen("libOpenCL.so", RTLD_LAZY);
|
||||
return opencl_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::cuinit()
|
||||
{
|
||||
if(cuda_==nullptr)
|
||||
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
|
||||
return cuda_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::nvrtcinit()
|
||||
{
|
||||
if(nvrtc_==nullptr)
|
||||
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
|
||||
return nvrtc_ != nullptr;
|
||||
}
|
||||
|
||||
|
||||
//OpenCL
|
||||
|
||||
cl_int dispatch::clBuildProgram(cl_program a, cl_uint b, const cl_device_id * c, const char * d, void (*e)(cl_program, void *), void * f)
|
||||
{ return f_impl<dispatch::clinit>(opencl_, clBuildProgram, clBuildProgram_, "clBuildProgram", a, b, c, d, e, f); }
|
||||
|
||||
cl_context dispatch::clCreateContext(const cl_context_properties * a, cl_uint b, const cl_device_id * c, void (*d)(const char *, const void *, size_t, void *), void * e, cl_int * f)
|
||||
{ return f_impl<dispatch::clinit>(opencl_, dispatch::clCreateContext, dispatch::clCreateContext_, "clCreateContext", a, b, c, d, e, f); }
|
||||
|
||||
OCL_DEFINE9(cl_int, clEnqueueNDRangeKernel, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*)
|
||||
OCL_DEFINE4(cl_int, clSetKernelArg, cl_kernel, cl_uint, size_t, const void *)
|
||||
OCL_DEFINE1(cl_int, clReleaseMemObject, cl_mem)
|
||||
OCL_DEFINE1(cl_int, clFinish, cl_command_queue)
|
||||
OCL_DEFINE5(cl_int, clGetMemObjectInfo, cl_mem, cl_mem_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetCommandQueueInfo, cl_command_queue, cl_command_queue_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseContext, cl_context)
|
||||
OCL_DEFINE1(cl_int, clReleaseEvent, cl_event)
|
||||
OCL_DEFINE9(cl_int, clEnqueueWriteBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *)
|
||||
OCL_DEFINE9(cl_int, clEnqueueReadBuffer, cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *)
|
||||
OCL_DEFINE6(cl_int, clGetProgramBuildInfo, cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseDevice, cl_device_id)
|
||||
OCL_DEFINE5(cl_int, clGetDeviceIDs, cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *)
|
||||
OCL_DEFINE5(cl_int, clGetContextInfo, cl_context, cl_context_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetDeviceInfo, cl_device_id, cl_device_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE1(cl_int, clReleaseCommandQueue, cl_command_queue)
|
||||
OCL_DEFINE3(cl_int, clGetPlatformIDs, cl_uint, cl_platform_id *, cl_uint *)
|
||||
OCL_DEFINE5(cl_int, clGetPlatformInfo, cl_platform_id, cl_platform_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetEventProfilingInfo, cl_event, cl_profiling_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE7(cl_program, clCreateProgramWithBinary, cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *)
|
||||
OCL_DEFINE4(cl_command_queue, clCreateCommandQueue, cl_context, cl_device_id, cl_command_queue_properties, cl_int *)
|
||||
OCL_DEFINE1(cl_int, clRetainEvent, cl_event)
|
||||
OCL_DEFINE1(cl_int, clReleaseProgram, cl_program)
|
||||
OCL_DEFINE1(cl_int, clFlush, cl_command_queue)
|
||||
OCL_DEFINE5(cl_int, clGetProgramInfo, cl_program, cl_program_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE5(cl_int, clGetKernelInfo, cl_kernel, cl_kernel_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE6(cl_int, clGetKernelWorkGroupInfo, cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *)
|
||||
OCL_DEFINE3(cl_kernel, clCreateKernel, cl_program, const char *, cl_int *)
|
||||
OCL_DEFINE5(cl_mem, clCreateBuffer, cl_context, cl_mem_flags, size_t, void *, cl_int *)
|
||||
OCL_DEFINE5(cl_program, clCreateProgramWithSource, cl_context, cl_uint, const char **, const size_t *, cl_int *)
|
||||
OCL_DEFINE1(cl_int, clReleaseKernel, cl_kernel)
|
||||
|
||||
//CUDA
|
||||
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
|
||||
CUDA_DEFINE2(CUresult, cuEventCreate, CUevent *, unsigned int)
|
||||
CUDA_DEFINE2(CUresult, cuDeviceGet, CUdevice *, int)
|
||||
CUDA_DEFINE3(CUresult, dispatch::cuMemcpyDtoH_v2, void *, CUdeviceptr, size_t)
|
||||
CUDA_DEFINE2(CUresult, cuStreamCreate, CUstream *, unsigned int)
|
||||
CUDA_DEFINE3(CUresult, cuEventElapsedTime, float *, CUevent, CUevent)
|
||||
CUDA_DEFINE1(CUresult, dispatch::cuMemFree_v2, CUdeviceptr)
|
||||
CUDA_DEFINE4(CUresult, dispatch::cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t, CUstream)
|
||||
CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetName, char *, int, CUdevice)
|
||||
CUDA_DEFINE4(CUresult, dispatch::cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t, CUstream)
|
||||
CUDA_DEFINE2(CUresult, cuModuleLoad, CUmodule *, const char *)
|
||||
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **)
|
||||
CUDA_DEFINE1(CUresult, cuModuleUnload, CUmodule)
|
||||
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *, unsigned int, CUjit_option *, void **)
|
||||
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute, CUdevice)
|
||||
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int *)
|
||||
CUDA_DEFINE3(CUresult, dispatch::cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t )
|
||||
CUDA_DEFINE1(CUresult, cuInit, unsigned int)
|
||||
CUDA_DEFINE2(CUresult, cuEventRecord, CUevent, CUstream)
|
||||
CUDA_DEFINE3(CUresult, cuCtxCreate_v2, CUcontext *, unsigned int, CUdevice)
|
||||
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule, const char *)
|
||||
CUDA_DEFINE1(CUresult, cuStreamSynchronize, CUstream)
|
||||
CUDA_DEFINE1(CUresult, cuStreamDestroy_v2, CUstream)
|
||||
CUDA_DEFINE1(CUresult, cuEventDestroy_v2, CUevent)
|
||||
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr*, size_t)
|
||||
|
||||
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||
|
||||
void dispatch::release()
|
||||
{
|
||||
if(opencl_)
|
||||
dlclose(opencl_);
|
||||
}
|
||||
|
||||
void * dispatch::opencl_;
|
||||
void * dispatch::cuda_;
|
||||
void * dispatch::nvrtc_;
|
||||
|
||||
//OpenCL
|
||||
void* dispatch::clBuildProgram_;
|
||||
void* dispatch::clEnqueueNDRangeKernel_;
|
||||
void* dispatch::clSetKernelArg_;
|
||||
void* dispatch::clReleaseMemObject_;
|
||||
void* dispatch::clFinish_;
|
||||
void* dispatch::clGetMemObjectInfo_;
|
||||
void* dispatch::clGetCommandQueueInfo_;
|
||||
void* dispatch::clReleaseContext_;
|
||||
void* dispatch::clReleaseEvent_;
|
||||
void* dispatch::clEnqueueWriteBuffer_;
|
||||
void* dispatch::clEnqueueReadBuffer_;
|
||||
void* dispatch::clGetProgramBuildInfo_;
|
||||
void* dispatch::clReleaseDevice_;
|
||||
void* dispatch::clCreateContext_;
|
||||
void* dispatch::clGetDeviceIDs_;
|
||||
void* dispatch::clGetContextInfo_;
|
||||
void* dispatch::clGetDeviceInfo_;
|
||||
void* dispatch::clReleaseCommandQueue_;
|
||||
void* dispatch::clGetPlatformIDs_;
|
||||
void* dispatch::clGetPlatformInfo_;
|
||||
void* dispatch::clGetEventProfilingInfo_;
|
||||
void* dispatch::clCreateProgramWithBinary_;
|
||||
void* dispatch::clCreateCommandQueue_;
|
||||
void* dispatch::clRetainEvent_;
|
||||
void* dispatch::clReleaseProgram_;
|
||||
void* dispatch::clFlush_;
|
||||
void* dispatch::clGetProgramInfo_;
|
||||
void* dispatch::clGetKernelInfo_;
|
||||
void* dispatch::clGetKernelWorkGroupInfo_;
|
||||
void* dispatch::clCreateKernel_;
|
||||
void* dispatch::clCreateBuffer_;
|
||||
void* dispatch::clCreateProgramWithSource_;
|
||||
void* dispatch::clReleaseKernel_;
|
||||
|
||||
//CUDA
|
||||
void* dispatch::cuCtxDestroy_v2_;
|
||||
void* dispatch::cuEventCreate_;
|
||||
void* dispatch::cuDeviceGet_;
|
||||
void* dispatch::cuMemcpyDtoH_v2_;
|
||||
void* dispatch::cuStreamCreate_;
|
||||
void* dispatch::cuEventElapsedTime_;
|
||||
void* dispatch::cuMemFree_v2_;
|
||||
void* dispatch::cuMemcpyDtoHAsync_v2_;
|
||||
void* dispatch::cuDriverGetVersion_;
|
||||
void* dispatch::cuDeviceGetName_;
|
||||
void* dispatch::cuMemcpyHtoDAsync_v2_;
|
||||
void* dispatch::cuModuleLoad_;
|
||||
void* dispatch::cuLaunchKernel_;
|
||||
void* dispatch::cuModuleUnload_;
|
||||
void* dispatch::cuModuleLoadDataEx_;
|
||||
void* dispatch::cuDeviceGetAttribute_;
|
||||
void* dispatch::cuDeviceGetCount_;
|
||||
void* dispatch::cuMemcpyHtoD_v2_;
|
||||
void* dispatch::cuInit_;
|
||||
void* dispatch::cuEventRecord_;
|
||||
void* dispatch::cuCtxCreate_v2_;
|
||||
void* dispatch::cuModuleGetFunction_;
|
||||
void* dispatch::cuStreamSynchronize_;
|
||||
void* dispatch::cuStreamDestroy_v2_;
|
||||
void* dispatch::cuEventDestroy_v2_;
|
||||
void* dispatch::cuMemAlloc_v2_;
|
||||
|
||||
void* dispatch::nvrtcCompileProgram_;
|
||||
void* dispatch::nvrtcGetProgramLogSize_;
|
||||
void* dispatch::nvrtcGetPTX_;
|
||||
void* dispatch::nvrtcGetPTXSize_;
|
||||
void* dispatch::nvrtcCreateProgram_;
|
||||
void* dispatch::nvrtcGetProgramLog_;
|
||||
|
||||
}
|
||||
}
|
@@ -11,14 +11,14 @@ Event::Event(backend_type backend) : backend_(backend), h_(backend_, true)
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cuda::check(cuEventCreate(&h_.cu().first, CU_EVENT_DEFAULT));
|
||||
cuda::check(cuEventCreate(&h_.cu().second, CU_EVENT_DEFAULT));
|
||||
cuda::check(dispatch::dispatch::cuEventCreate(&h_.cu().first, CU_EVENT_DEFAULT));
|
||||
cuda::check(dispatch::dispatch::cuEventCreate(&h_.cu().second, CU_EVENT_DEFAULT));
|
||||
break;
|
||||
#endif
|
||||
case OPENCL: break;
|
||||
default: throw;
|
||||
case OPENCL:
|
||||
break;
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,12 +31,10 @@ long Event::elapsed_time() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
float time;
|
||||
cuda::check(cuEventElapsedTime(&time, h_.cu().first, h_.cu().second));
|
||||
cuda::check(dispatch::cuEventElapsedTime(&time, h_.cu().first, h_.cu().second));
|
||||
return 1e6*time;
|
||||
#endif
|
||||
case OPENCL:
|
||||
return static_cast<long>(ocl::info<CL_PROFILING_COMMAND_END>(h_.cl()) - ocl::info<CL_PROFILING_COMMAND_START>(h_.cl()));
|
||||
default:
|
||||
|
@@ -7,63 +7,59 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
//CUDA
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUcontext x) { cuda::check(dispatch::cuCtxDestroy(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUcontext x) { cuCtxDestroy(x); }
|
||||
void Handle<CLType, CUType>::_delete(CUdeviceptr x) { cuda::check(dispatch::dispatch::cuMemFree(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUdeviceptr x) { cuMemFree(x); }
|
||||
void Handle<CLType, CUType>::_delete(CUstream x) { cuda::check(dispatch::cuStreamDestroy(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUstream x) { cuStreamDestroy(x); }
|
||||
void Handle<CLType, CUType>::_delete(CUdevice) { }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUdevice) { std::cout << "CUdevice" << std::endl;}
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUevent x) { cuEventDestroy(x); }
|
||||
void Handle<CLType, CUType>::_delete(CUevent x) { cuda::check(dispatch::dispatch::cuEventDestroy(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUfunction) { }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(CUmodule x) { cuModuleUnload(x); }
|
||||
void Handle<CLType, CUType>::_delete(CUmodule x) { cuda::check(dispatch::dispatch::cuModuleUnload(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::_delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
|
||||
|
||||
#endif
|
||||
//OpenCL
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_context x) { ocl::check(dispatch::clReleaseContext(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_context x) { ocl::check(clReleaseContext(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_mem x) { ocl::check(dispatch::clReleaseMemObject(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_mem x) { ocl::check(clReleaseMemObject(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_command_queue x) { ocl::check(dispatch::clReleaseCommandQueue(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_command_queue x) { ocl::check(clReleaseCommandQueue(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_device_id x) { ocl::check(dispatch::clReleaseDevice(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_device_id x) { ocl::check(clReleaseDevice(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_event x) { ocl::check(dispatch::clReleaseEvent(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_event x) { ocl::check(clReleaseEvent(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_kernel x) { ocl::check(dispatch::clReleaseKernel(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_kernel x) { ocl::check(clReleaseKernel(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
void Handle<CLType, CUType>::release(cl_program x) { ocl::check(clReleaseProgram(x)); }
|
||||
void Handle<CLType, CUType>::release(cl_program x) { ocl::check(dispatch::clReleaseProgram(x)); }
|
||||
|
||||
template<class CLType, class CUType>
|
||||
Handle<CLType, CUType>::Handle(backend_type backend, bool take_ownership): backend_(backend), has_ownership_(take_ownership)
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: cu_.reset(new CUType());
|
||||
#endif
|
||||
case OPENCL: cl_.reset(new CLType());
|
||||
}
|
||||
}
|
||||
@@ -71,10 +67,8 @@ Handle<CLType, CUType>::Handle(backend_type backend, bool take_ownership): backe
|
||||
template<class CLType, class CUType>
|
||||
bool Handle<CLType, CUType>::operator==(Handle const & other) const
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
if(backend_==CUDA && other.backend_==CUDA)
|
||||
return cu()==other.cu();
|
||||
#endif
|
||||
if(backend_==OPENCL && other.backend_==OPENCL)
|
||||
return cl()==other.cl();
|
||||
return false;
|
||||
@@ -83,28 +77,22 @@ bool Handle<CLType, CUType>::operator==(Handle const & other) const
|
||||
template<class CLType, class CUType>
|
||||
bool Handle<CLType, CUType>::operator<(Handle const & other) const
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
if(backend_==CUDA && other.backend_==CUDA)
|
||||
return (*cu_)<(*other.cu_);
|
||||
#endif
|
||||
if(backend_==OPENCL && other.backend_==OPENCL)
|
||||
return (*cl_)<(*other.cl_);
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
if(backend_==CUDA && other.backend_==OPENCL)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class CLType, class CUType>
|
||||
Handle<CLType, CUType>::~Handle()
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
if(has_ownership_ && cu_ && cu_.unique() && *cu_){
|
||||
if(backend_==CUDA && has_ownership_ && cu_ && cu_.unique() && *cu_){
|
||||
_delete(*cu_);
|
||||
}
|
||||
#endif
|
||||
if(has_ownership_ && cl_ && cl_.unique() && *cl_)
|
||||
if(backend_==OPENCL && has_ownership_ && cl_ && cl_.unique() && *cl_)
|
||||
release(*cl_);
|
||||
}
|
||||
|
||||
@@ -116,7 +104,6 @@ template<class CLType, class CUType>
|
||||
CLType const & Handle<CLType, CUType>::cl() const
|
||||
{ return *cl_; }
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
template<class CLType, class CUType>
|
||||
CUType & Handle<CLType, CUType>::cu()
|
||||
{
|
||||
@@ -136,16 +123,6 @@ template class Handle<cl_device_id, CUdevice>;
|
||||
template class Handle<cl_event, cu_event_t>;
|
||||
template class Handle<cl_kernel, CUfunction>;
|
||||
template class Handle<cl_program, CUmodule>;
|
||||
#else
|
||||
template class Handle<cl_mem, void>;
|
||||
template class Handle<cl_command_queue, void>;
|
||||
template class Handle<cl_context, void>;
|
||||
template class Handle<cl_device_id, void>;
|
||||
template class Handle<cl_event, void>;
|
||||
template class Handle<cl_kernel, void>;
|
||||
template class Handle<cl_program, void>;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -44,7 +44,7 @@ struct info<cl_mem>
|
||||
|
||||
static void get(cl_mem handle, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -56,7 +56,7 @@ struct info<cl_device_id>
|
||||
|
||||
static void get(cl_device_id handle, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -67,12 +67,12 @@ struct info<cl_kernel>
|
||||
typedef cl_kernel_info type;
|
||||
|
||||
static void get(cl_kernel handle, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_kernel handle, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -83,7 +83,7 @@ struct info<cl_context>
|
||||
typedef cl_context_info type;
|
||||
|
||||
static void get(cl_context handle, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -94,12 +94,12 @@ struct info<cl_program>
|
||||
typedef cl_program_info type;
|
||||
|
||||
static void get(cl_program handle, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_program handle, cl_device_id device, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -110,7 +110,7 @@ struct info<cl_event>
|
||||
{
|
||||
typedef cl_profiling_info type;
|
||||
static void get(cl_event handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -120,7 +120,7 @@ struct info<cl_command_queue>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_command_queue handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -130,7 +130,7 @@ struct info<cl_platform_id>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_platform_id handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
cl_int err = dispatch::clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
@@ -343,7 +343,7 @@ inline typename detail::return_type<cl_program, CL_PROGRAM_BINARIES>::Result inf
|
||||
std::vector<size_t> sizes = info<CL_PROGRAM_BINARY_SIZES>(handle);
|
||||
for(size_t s: sizes)
|
||||
res.push_back(new unsigned char[s]);
|
||||
clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
|
||||
dispatch::clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@@ -13,16 +13,14 @@ Kernel::Kernel(Program const & program, const char * name) : backend_(program.ba
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
cu_params_store_.reserve(32);
|
||||
cu_params_.reserve(32);
|
||||
cuda::check(cuModuleGetFunction(&h_.cu(), program.h_.cu(), name));\
|
||||
cu_params_store_.reserve(64);
|
||||
cu_params_.reserve(64);
|
||||
cuda::check(dispatch::cuModuleGetFunction(&h_.cu(), program.h_.cu(), name));\
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
cl_int err;
|
||||
h_.cl() = clCreateKernel(program.h_.cl(), name, &err);
|
||||
h_.cl() = dispatch::clCreateKernel(program.h_.cl(), name, &err);
|
||||
ocl::check(err);
|
||||
break;
|
||||
default:
|
||||
@@ -34,7 +32,6 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
if(index + 1> cu_params_store_.size())
|
||||
{
|
||||
@@ -45,9 +42,8 @@ void Kernel::setArg(unsigned int index, std::size_t size, void* ptr)
|
||||
memcpy(cu_params_store_[index].get(), ptr, size);
|
||||
cu_params_[index] = cu_params_store_[index].get();
|
||||
break;
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, size, ptr));
|
||||
ocl::check(dispatch::clSetKernelArg(h_.cl(), index, size, ptr));
|
||||
break;
|
||||
default:
|
||||
throw;
|
||||
@@ -58,14 +54,12 @@ void Kernel::setArg(unsigned int index, Buffer const & data)
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
{
|
||||
setArg(index, sizeof(CUdeviceptr), (void*)&data.h_.cu()); break;
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
ocl::check(clSetKernelArg(h_.cl(), index, sizeof(cl_mem), (void*)&data.h_.cl()));
|
||||
ocl::check(dispatch::clSetKernelArg(h_.cl(), index, sizeof(cl_mem), (void*)&data.h_.cl()));
|
||||
break;
|
||||
default: throw;
|
||||
}
|
||||
@@ -75,14 +69,12 @@ void Kernel::setSizeArg(unsigned int index, size_t N)
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
{
|
||||
int NN = static_cast<cl_int>(N);
|
||||
setArg(index, sizeof(int), &NN);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
cl_int NN = static_cast<cl_int>(N);
|
||||
|
@@ -11,13 +11,11 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
Platform::Platform(backend_type backend): backend_(backend)
|
||||
{
|
||||
if(backend==CUDA)
|
||||
cuInit(0);
|
||||
dispatch::cuInit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
Platform::Platform(cl_platform_id const & platform) : backend_(OPENCL)
|
||||
{
|
||||
@@ -28,13 +26,12 @@ std::string Platform::version() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
int version;
|
||||
cuDriverGetVersion(&version);
|
||||
dispatch::cuDriverGetVersion(&version);
|
||||
return tools::to_string(version);
|
||||
#endif
|
||||
case OPENCL: return ocl::info<CL_PLATFORM_VERSION>(cl_platform_);
|
||||
case OPENCL:
|
||||
return ocl::info<CL_PLATFORM_VERSION>(cl_platform_);
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
@@ -42,10 +39,7 @@ std::string Platform::name() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA: return "CUDA";
|
||||
#endif
|
||||
|
||||
case OPENCL: return ocl::info<CL_PLATFORM_NAME>(cl_platform_);
|
||||
default: throw;
|
||||
}
|
||||
@@ -60,22 +54,20 @@ void Platform::devices(std::vector<Device> & devices) const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
{
|
||||
int N;
|
||||
cuda::check(cuDeviceGetCount(&N));
|
||||
cuda::check(dispatch::cuDeviceGetCount(&N));
|
||||
for(int i = 0 ; i < N ; ++i)
|
||||
devices.push_back(Device(i));
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
cl_uint ndevices;
|
||||
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
|
||||
ocl::check(dispatch::dispatch::clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices));
|
||||
std::vector<cl_device_id> device_ids(ndevices);
|
||||
ocl::check(clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, ndevices, device_ids.data(), NULL));
|
||||
ocl::check(dispatch::dispatch::clGetDeviceIDs(cl_platform_, CL_DEVICE_TYPE_ALL, ndevices, device_ids.data(), NULL));
|
||||
for(cl_device_id d : device_ids)
|
||||
devices.push_back(Device(d));
|
||||
break;
|
||||
|
@@ -4,9 +4,7 @@
|
||||
#include "isaac/driver/program.h"
|
||||
#include "isaac/driver/context.h"
|
||||
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
#include "helpers/cuda/vector.hpp"
|
||||
#endif
|
||||
#include "helpers/ocl/infos.hpp"
|
||||
|
||||
#include "sha1.hpp"
|
||||
@@ -23,7 +21,6 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
std::string cache_path = context.cache_path_;
|
||||
switch(backend_)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case CUDA:
|
||||
{
|
||||
|
||||
@@ -34,7 +31,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
//Load cached program
|
||||
if(cache_path.size() && std::ifstream(fname, std::ios::binary))
|
||||
{
|
||||
cuda::check(cuModuleLoad(&h_.cu(), fname.c_str()));
|
||||
cuda::check(dispatch::cuModuleLoad(&h_.cu(), fname.c_str()));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -43,25 +40,25 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
const char * includes[] = {"helper_math.h"};
|
||||
const char * src[] = {helpers::cuda::vector};
|
||||
|
||||
nvrtc::check(nvrtcCreateProgram(&prog, source.c_str(), NULL, 1, src, includes));
|
||||
nvrtc::check(dispatch::nvrtcCreateProgram(&prog, source.c_str(), NULL, 1, src, includes));
|
||||
try{
|
||||
const char * options[] = {"--gpu-architecture=compute_52", "--restrict"};
|
||||
nvrtc::check(nvrtcCompileProgram(prog, 2, options));
|
||||
nvrtc::check(dispatch::nvrtcCompileProgram(prog, 2, options));
|
||||
}catch(nvrtc::exception::compilation const &)
|
||||
{
|
||||
size_t logsize;
|
||||
nvrtc::check(nvrtcGetProgramLogSize(prog, &logsize));
|
||||
nvrtc::check(dispatch::nvrtcGetProgramLogSize(prog, &logsize));
|
||||
std::string log(logsize, 0);
|
||||
nvrtc::check(nvrtcGetProgramLog(prog, (char*)log.data()));
|
||||
nvrtc::check(dispatch::nvrtcGetProgramLog(prog, (char*)log.data()));
|
||||
std::cout << "Compilation failed:" << std::endl;
|
||||
std::cout << log << std::endl;
|
||||
}
|
||||
|
||||
size_t ptx_size;
|
||||
nvrtc::check(nvrtcGetPTXSize(prog, &ptx_size));
|
||||
nvrtc::check(dispatch::nvrtcGetPTXSize(prog, &ptx_size));
|
||||
std::vector<char> ptx(ptx_size);
|
||||
nvrtc::check(nvrtcGetPTX(prog, ptx.data()));
|
||||
cuda::check(cuModuleLoadDataEx(&h_.cu(), ptx.data(), 0, NULL, NULL));
|
||||
nvrtc::check(dispatch::nvrtcGetPTX(prog, ptx.data()));
|
||||
cuda::check(dispatch::cuModuleLoadDataEx(&h_.cu(), ptx.data(), 0, NULL, NULL));
|
||||
|
||||
//Save cached program
|
||||
if (cache_path.size())
|
||||
@@ -77,8 +74,8 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
// oss.close();
|
||||
|
||||
// system(("/usr/local/cuda-7.0/bin/nvcc " + sha1 + ".cu -gencode arch=compute_50,code=sm_50 -cubin").c_str());
|
||||
// system(("perl /home/philippe/Development/maxas/maxas.pl -e " + sha1 + ".cubin > " + sha1 + ".sass").c_str());
|
||||
// system(("perl /home/philippe/Development/maxas/maxas.pl -i --noreuse" + sha1 + ".sass " + sha1 + ".cubin").c_str());
|
||||
// system(("perl /maxas.pl -e " + sha1 + ".cubin > " + sha1 + ".sass").c_str());
|
||||
// system(("perl /maxas.pl -i --noreuse" + sha1 + ".sass " + sha1 + ".cubin").c_str());
|
||||
|
||||
// std::ifstream ifs(sha1 + ".cubin");
|
||||
// std::cout << sha1 << std::endl;
|
||||
@@ -90,11 +87,10 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
|
||||
// str.assign((std::istreambuf_iterator<char>(ifs)),
|
||||
// std::istreambuf_iterator<char>());
|
||||
// cuda::check(cuModuleLoadDataEx(&h_.cu(), str.c_str(), 0, NULL, NULL));
|
||||
// cuda::check(dispatch::cuModuleLoadDataEx(&h_.cu(), str.c_str(), 0, NULL, NULL));
|
||||
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case OPENCL:
|
||||
{
|
||||
cl_int err;
|
||||
@@ -118,18 +114,18 @@ Program::Program(Context const & context, std::string const & source) : backend_
|
||||
buffer.resize(len);
|
||||
cached.read((char*)buffer.data(), std::streamsize(len));
|
||||
char* cbuffer = buffer.data();
|
||||
h_.cl() = clCreateProgramWithBinary(context_.h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), &len, (const unsigned char **)&cbuffer, NULL, &err);
|
||||
h_.cl() = dispatch::clCreateProgramWithBinary(context_.h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), &len, (const unsigned char **)&cbuffer, NULL, &err);
|
||||
ocl::check(err);
|
||||
ocl::check(clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
|
||||
ocl::check(dispatch::clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t srclen = source.size();
|
||||
const char * csrc = source.c_str();
|
||||
h_.cl() = clCreateProgramWithSource(context_.h_.cl(), 1, &csrc, &srclen, &err);
|
||||
h_.cl() = dispatch::clCreateProgramWithSource(context_.h_.cl(), 1, &csrc, &srclen, &err);
|
||||
try{
|
||||
ocl::check(clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
|
||||
ocl::check(dispatch::clBuildProgram(h_.cl(), static_cast<cl_uint>(devices.size()), devices.data(), build_opt.c_str(), NULL, NULL));
|
||||
}catch(ocl::exception::build_program_failure const &){
|
||||
for(std::vector<cl_device_id>::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
{
|
||||
|
@@ -12,10 +12,10 @@ std::string const & keyword::get() const
|
||||
{
|
||||
switch(backend_)
|
||||
{
|
||||
case driver::OPENCL: return opencl_;
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: return cuda_;
|
||||
#endif
|
||||
case driver::OPENCL:
|
||||
return opencl_;
|
||||
case driver::CUDA:
|
||||
return cuda_;
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
|
@@ -42,10 +42,10 @@ std::string axpy::generate_impl(std::string const & suffix, expressions_tuple co
|
||||
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
#endif
|
||||
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
case driver::CUDA:
|
||||
stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
case driver::OPENCL:
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
|
||||
|
@@ -98,10 +98,10 @@ std::string dot::generate_impl(std::string const & suffix, expressions_tuple con
|
||||
* -----------------------*/
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
#endif
|
||||
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break;
|
||||
case driver::CUDA:
|
||||
stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
case driver::OPENCL:
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
|
||||
|
@@ -143,10 +143,10 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
#endif
|
||||
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
case driver::CUDA:
|
||||
stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
case driver::OPENCL:
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
|
||||
|
@@ -74,10 +74,10 @@ std::string gemv::generate_impl(std::string const & suffix, expressions_tuple co
|
||||
int col_simd_width = (dot_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
#endif
|
||||
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
case driver::CUDA:
|
||||
stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
case driver::OPENCL:
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
|
||||
|
@@ -39,10 +39,10 @@ std::string ger::generate_impl(std::string const & suffix, expressions_tuple con
|
||||
|
||||
switch(backend)
|
||||
{
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
#endif
|
||||
case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
case driver::CUDA:
|
||||
stream << "#include \"helper_math.h\"" << std::endl; break;
|
||||
case driver::OPENCL:
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
|
||||
|
@@ -33,9 +33,9 @@ extern "C"
|
||||
if(events)
|
||||
{
|
||||
events[i] = levents.front().handle().cl();
|
||||
clRetainEvent(events[i]);
|
||||
is::driver::dispatch::clRetainEvent(events[i]);
|
||||
}
|
||||
clFlush(commandQueues[i]);
|
||||
is::driver::dispatch::clFlush(commandQueues[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -60,7 +60,7 @@ def main():
|
||||
|
||||
def find_opencl():
|
||||
cvars = sysconfig.get_config_vars()
|
||||
lib = find_library('OpenCL', '/opt/adreno-driver*/lib' if for_android else '/opt/AMDAPPSDK*/lib/x86_64')
|
||||
lib = find_library('OpenCL', '' if for_android else '')
|
||||
return {'include': '', 'lib': dirname(lib)} if lib else None
|
||||
|
||||
def find_in_path(name, path):
|
||||
@@ -124,7 +124,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
@@ -107,9 +107,7 @@ void export_driver()
|
||||
bp::enum_<sc::driver::backend_type>
|
||||
("backend_type")
|
||||
.value("OPENCL", sc::driver::OPENCL)
|
||||
#ifdef ISAAC_WITH_CUDA
|
||||
.value("CUDA", sc::driver::CUDA)
|
||||
#endif
|
||||
;
|
||||
|
||||
bp::enum_<sc::driver::Device::Type>
|
||||
|
@@ -7,5 +7,5 @@ get_property(ISAAC_PATH TARGET isaac PROPERTY LOCATION)
|
||||
foreach(PROG axpy dot ger gemv gemm)
|
||||
add_executable(${PROG}-test ${PROG}.cpp)
|
||||
add_test(${PROG} ${PROG}-test)
|
||||
target_link_libraries(${PROG}-test isaac ${OPENCL_LIBRARIES})
|
||||
target_link_libraries(${PROG}-test isaac)
|
||||
endforeach(PROG)
|
||||
|
Reference in New Issue
Block a user