From c7665021d1c32babc0f36d444ae59f316e608949 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Wed, 28 Jan 2015 22:07:09 -0500 Subject: [PATCH] reducing overhead; reverted custom CL/ header because CL/cl.hpp was buggy --- bench/blas.cpp | 162 +- include/CL/cl.h | 364 +- include/CL/cl.hpp | 10845 ++++++++++++++++++--- include/CL/cl_d3d10.h | 4 +- include/CL/cl_ext.h | 145 +- include/CL/cl_gl.h | 91 +- include/CL/cl_gl_ext.h | 2 +- include/CL/cl_platform.h | 80 +- include/CL/opencl.h | 2 +- include/atidlas/backend/templates/base.h | 4 +- include/atidlas/cl/queues.h | 2 +- lib/CMakeLists.txt | 2 +- lib/backend/templates/base.cpp | 25 +- lib/backend/templates/mreduction.cpp | 2 +- lib/backend/templates/reduction.cpp | 6 +- lib/backend/templates/vaxpy.cpp | 12 +- lib/cl/program_map.cpp | 41 +- lib/model/model.cpp | 1 + python/autotune/pysrc/genetic.py | 1 - python/autotune/pysrc/genetic.pyc | Bin 8270 -> 8182 bytes python/autotune/pysrc/misc_tools.pyc | Bin 8910 -> 8910 bytes 21 files changed, 10317 insertions(+), 1474 deletions(-) diff --git a/bench/blas.cpp b/bench/blas.cpp index c08ee6798..9e4169c15 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -40,7 +40,7 @@ void bench(ad::numeric_type dtype) total_time += times.back();\ }\ float tres = median(times);\ - std::cout << " " << PERF << std::flush;\ + std::cout << " " << tres << std::flush;\ } #define CL_BENCHMARK(OP, PERF) BENCHMARK(OP, PERF, ad::cl_ext::synchronize(ad::cl_ext::default_context())) @@ -86,89 +86,89 @@ void bench(ad::numeric_type dtype) } std::cout << "\n\n" << std::flush; - std::cout << "#DOT" << std::endl; - for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) - { - int_t N = *it; - std::cout << N; - /* ATIDLAS */ - ad::array x(N, dtype), y(N, dtype); - ad::array scratch(N, dtype); - ad::scalar s(dtype); - CL_BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize)); - /* clAmdBlas */ -#ifdef BENCH_CLAMDBLAS - CL_BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize)) -#endif - /* BLAS */ -#ifdef BENCH_CBLAS - std::vector cx(N), cy(N); - ad::copy(x, cx); - ad::copy(y, cy); - CPU_BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize)); -#endif - std::cout << std::endl; - } - std::cout << "\n\n" << std::flush; - - /*---------*/ - /*--BLAS2--*/ - /*---------*/ - //T-layout - std::cout << "#GEMV-T" << std::endl; - for(std::vector::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit) - for(std::vector::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit) - { - int_t M = *Mit; - int_t N = *Nit; - std::cout << M << "," << N; - /* ATIDLAS */ - ad::array A(N, M, dtype), y(M, dtype), x(N, dtype); - CL_BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize)); - /* clAmdBlas */ - #ifdef BENCH_CLAMDBLAS - CL_BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize)) - #endif - /* BLAS */ - #ifdef BENCH_CBLAS - std::vector cA(N*M), cx(N), cy(M); - ad::copy(x, cx); - ad::copy(y, cy); - ad::copy(A, cA); - CPU_BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize)); - #endif - std::cout << std::endl; - } - std::cout << "\n\n" << std::flush; +// std::cout << "#DOT" << std::endl; +// for(std::vector::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it) +// { +// int_t N = *it; +// std::cout << N; +// /* ATIDLAS */ +// ad::array x(N, dtype), y(N, dtype); +// ad::array scratch(N, dtype); +// ad::scalar s(dtype); +// CL_BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize)); +// /* clAmdBlas */ +//#ifdef BENCH_CLAMDBLAS +// CL_BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize)) +//#endif +// /* BLAS */ +//#ifdef BENCH_CBLAS +// std::vector cx(N), cy(N); +// ad::copy(x, cx); +// ad::copy(y, cy); +// CPU_BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize)); +//#endif +// std::cout << std::endl; +// } +// std::cout << "\n\n" << std::flush; // /*---------*/ -// /*--BLAS3--*/ +// /*--BLAS2--*/ // /*---------*/ - std::cout << "#GEMM-NT" << std::endl; - for(std::vector::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit) - for(std::vector::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit) - for(std::vector::const_iterator Kit = BLAS3_K.begin() ; Kit != BLAS3_K.end() ; ++Kit) - { - int_t M = *Kit, N = *Kit, K = *Kit; - std::cout << M << "," << N << "," << K; - /* ATIDLAS */ - ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype); - CL_BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres)); - /* clAmdBlas */ - #ifdef BENCH_CLAMDBLAS - CL_BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(), - 0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres)) - #endif - /* BLAS */ - #ifdef BENCH_CBLAS - std::vector cC(M*N), cA(M*K), cB(N*K); - ad::copy(C, cC); - ad::copy(A, cA); - ad::copy(B, cB); - CPU_BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres)); - #endif - std::cout << std::endl; - } +// //T-layout +// std::cout << "#GEMV-T" << std::endl; +// for(std::vector::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit) +// for(std::vector::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit) +// { +// int_t M = *Mit; +// int_t N = *Nit; +// std::cout << M << "," << N; +// /* ATIDLAS */ +// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype); +// CL_BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize)); +// /* clAmdBlas */ +// #ifdef BENCH_CLAMDBLAS +// CL_BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize)) +// #endif +// /* BLAS */ +// #ifdef BENCH_CBLAS +// std::vector cA(N*M), cx(N), cy(M); +// ad::copy(x, cx); +// ad::copy(y, cy); +// ad::copy(A, cA); +// CPU_BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize)); +// #endif +// std::cout << std::endl; +// } +// std::cout << "\n\n" << std::flush; + +//// /*---------*/ +//// /*--BLAS3--*/ +//// /*---------*/ +// std::cout << "#GEMM-NT" << std::endl; +// for(std::vector::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit) +// for(std::vector::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit) +// for(std::vector::const_iterator Kit = BLAS3_K.begin() ; Kit != BLAS3_K.end() ; ++Kit) +// { +// int_t M = *Kit, N = *Kit, K = *Kit; +// std::cout << M << "," << N << "," << K; +// /* ATIDLAS */ +// ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype); +// CL_BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres)); +// /* clAmdBlas */ +// #ifdef BENCH_CLAMDBLAS +// CL_BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(), +// 0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres)) +// #endif +// /* BLAS */ +// #ifdef BENCH_CBLAS +// std::vector cC(M*N), cA(M*K), cB(N*K); +// ad::copy(C, cC); +// ad::copy(A, cA); +// ad::copy(B, cB); +// CPU_BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres)); +// #endif +// std::cout << std::endl; +// } } diff --git a/include/CL/cl.h b/include/CL/cl.h index 4f21afe55..203c65974 100644 --- a/include/CL/cl.h +++ b/include/CL/cl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008 - 2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -21,8 +21,6 @@ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. ******************************************************************************/ -/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */ - #ifndef __OPENCL_CL_H #define __OPENCL_CL_H @@ -58,8 +56,10 @@ typedef cl_uint cl_device_mem_cache_type; typedef cl_uint cl_device_local_mem_type; typedef cl_bitfield cl_device_exec_capabilities; typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; -typedef intptr_t cl_context_properties; +typedef intptr_t cl_context_properties; typedef cl_uint cl_context_info; typedef cl_uint cl_command_queue_info; typedef cl_uint cl_channel_order; @@ -67,6 +67,7 @@ typedef cl_uint cl_channel_type; typedef cl_bitfield cl_mem_flags; typedef cl_uint cl_mem_object_type; typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; typedef cl_uint cl_image_info; typedef cl_uint cl_buffer_create_type; typedef cl_uint cl_addressing_mode; @@ -75,24 +76,43 @@ typedef cl_uint cl_sampler_info; typedef cl_bitfield cl_map_flags; typedef cl_uint cl_program_info; typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; typedef cl_int cl_build_status; typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; typedef cl_uint cl_kernel_work_group_info; typedef cl_uint cl_event_info; typedef cl_uint cl_command_type; typedef cl_uint cl_profiling_info; + typedef struct _cl_image_format { cl_channel_order image_channel_order; cl_channel_type image_channel_data_type; } cl_image_format; +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; + cl_mem buffer; +} cl_image_desc; typedef struct _cl_buffer_region { size_t origin; size_t size; } cl_buffer_region; + /******************************************************************************/ /* Error Codes */ @@ -111,6 +131,11 @@ typedef struct _cl_buffer_region { #define CL_MAP_FAILURE -12 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 #define CL_INVALID_VALUE -30 #define CL_INVALID_DEVICE_TYPE -31 @@ -147,14 +172,21 @@ typedef struct _cl_buffer_region { #define CL_INVALID_MIP_LEVEL -62 #define CL_INVALID_GLOBAL_WORK_SIZE -63 #define CL_INVALID_PROPERTY -64 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 /* OpenCL Version */ #define CL_VERSION_1_0 1 #define CL_VERSION_1_1 1 +#define CL_VERSION_1_2 1 /* cl_bool */ #define CL_FALSE 0 #define CL_TRUE 1 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE /* cl_platform_info */ #define CL_PLATFORM_PROFILE 0x0900 @@ -168,6 +200,7 @@ typedef struct _cl_buffer_region { #define CL_DEVICE_TYPE_CPU (1 << 1) #define CL_DEVICE_TYPE_GPU (1 << 2) #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF /* cl_device_info */ @@ -221,7 +254,7 @@ typedef struct _cl_buffer_region { #define CL_DEVICE_VERSION 0x102F #define CL_DEVICE_EXTENSIONS 0x1030 #define CL_DEVICE_PLATFORM 0x1031 -/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 @@ -233,6 +266,20 @@ typedef struct _cl_buffer_region { #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B /* cl_device_fp_config - bitfield */ #define CL_FP_DENORM (1 << 0) @@ -242,6 +289,7 @@ typedef struct _cl_buffer_region { #define CL_FP_ROUND_TO_INF (1 << 4) #define CL_FP_FMA (1 << 5) #define CL_FP_SOFT_FLOAT (1 << 6) +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) /* cl_device_mem_cache_type */ #define CL_NONE 0x0 @@ -266,8 +314,23 @@ typedef struct _cl_buffer_region { #define CL_CONTEXT_PROPERTIES 0x1082 #define CL_CONTEXT_NUM_DEVICES 0x1083 -/* cl_context_info + cl_context_properties */ +/* cl_context_properties */ #define CL_CONTEXT_PLATFORM 0x1084 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) /* cl_command_queue_info */ #define CL_QUEUE_CONTEXT 0x1090 @@ -282,6 +345,14 @@ typedef struct _cl_buffer_region { #define CL_MEM_USE_HOST_PTR (1 << 3) #define CL_MEM_ALLOC_HOST_PTR (1 << 4) #define CL_MEM_COPY_HOST_PTR (1 << 5) +// reserved (1 << 6) +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) /* cl_channel_order */ #define CL_R 0x10B0 @@ -297,6 +368,8 @@ typedef struct _cl_buffer_region { #define CL_Rx 0x10BA #define CL_RGx 0x10BB #define CL_RGBx 0x10BC +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE /* cl_channel_type */ #define CL_SNORM_INT8 0x10D0 @@ -314,11 +387,16 @@ typedef struct _cl_buffer_region { #define CL_UNSIGNED_INT32 0x10DC #define CL_HALF_FLOAT 0x10DD #define CL_FLOAT 0x10DE +#define CL_UNORM_INT24 0x10DF /* cl_mem_object_type */ #define CL_MEM_OBJECT_BUFFER 0x10F0 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 /* cl_mem_info */ #define CL_MEM_TYPE 0x1100 @@ -339,6 +417,10 @@ typedef struct _cl_buffer_region { #define CL_IMAGE_WIDTH 0x1114 #define CL_IMAGE_HEIGHT 0x1115 #define CL_IMAGE_DEPTH 0x1116 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A /* cl_addressing_mode */ #define CL_ADDRESS_NONE 0x1130 @@ -361,6 +443,7 @@ typedef struct _cl_buffer_region { /* cl_map_flags - bitfield */ #define CL_MAP_READ (1 << 0) #define CL_MAP_WRITE (1 << 1) +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) /* cl_program_info */ #define CL_PROGRAM_REFERENCE_COUNT 0x1160 @@ -370,11 +453,20 @@ typedef struct _cl_buffer_region { #define CL_PROGRAM_SOURCE 0x1164 #define CL_PROGRAM_BINARY_SIZES 0x1165 #define CL_PROGRAM_BINARIES 0x1166 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 /* cl_program_build_info */ #define CL_PROGRAM_BUILD_STATUS 0x1181 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 #define CL_PROGRAM_BUILD_LOG 0x1183 +#define CL_PROGRAM_BINARY_TYPE 0x1184 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 /* cl_build_status */ #define CL_BUILD_SUCCESS 0 @@ -388,6 +480,32 @@ typedef struct _cl_buffer_region { #define CL_KERNEL_REFERENCE_COUNT 0x1192 #define CL_KERNEL_CONTEXT 0x1193 #define CL_KERNEL_PROGRAM 0x1194 +#define CL_KERNEL_ATTRIBUTES 0x1195 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +/* cl_kernel_arg_type_qualifer */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) /* cl_kernel_work_group_info */ #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 @@ -395,6 +513,7 @@ typedef struct _cl_buffer_region { #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 /* cl_event_info */ #define CL_EVENT_COMMAND_QUEUE 0x11D0 @@ -425,13 +544,17 @@ typedef struct _cl_buffer_region { #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 #define CL_COMMAND_USER 0x1204 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 /* command execution status */ #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 #define CL_QUEUED 0x3 - + /* cl_buffer_create_type */ #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 @@ -470,22 +593,35 @@ clGetDeviceInfo(cl_device_id /* device */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id /* in_device */, + const cl_device_partition_property * /* properties */, + cl_uint /* num_devices */, + cl_device_id * /* out_devices */, + cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; + /* Context APIs */ extern CL_API_ENTRY cl_context CL_API_CALL clCreateContext(const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, + cl_uint /* num_devices */, + const cl_device_id * /* devices */, void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */, - cl_device_type /* device_type */, + cl_device_type /* device_type */, void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + void * /* user_data */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; @@ -520,25 +656,6 @@ clGetCommandQueueInfo(cl_command_queue /* command_queue */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS -#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1! -/* - * WARNING: - * This API introduces mutable state into the OpenCL implementation. It has been REMOVED - * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the - * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. - * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. - * - * Software developers previously relying on this API are instructed to set the command queue - * properties when creating the queue, instead. - */ -extern CL_API_ENTRY cl_int CL_API_CALL -clSetCommandQueueProperty(cl_command_queue /* command_queue */, - cl_command_queue_properties /* properties */, - cl_bool /* enable */, - cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; -#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ - /* Memory Object APIs */ extern CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context /* context */, @@ -555,26 +672,12 @@ clCreateSubBuffer(cl_mem /* buffer */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateImage2D(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateImage3D(cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_depth */, - size_t /* image_row_pitch */, - size_t /* image_slice_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +clCreateImage(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + const cl_image_desc * /* image_desc */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; @@ -609,7 +712,7 @@ clSetMemObjectDestructorCallback( cl_mem /* memobj */, void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; -/* Sampler APIs */ +/* Sampler APIs */ extern CL_API_ENTRY cl_sampler CL_API_CALL clCreateSampler(cl_context /* context */, cl_bool /* normalized_coords */, @@ -647,6 +750,13 @@ clCreateProgramWithBinary(cl_context /* context */, cl_int * /* binary_status */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* kernel_names */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_int CL_API_CALL clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; @@ -662,7 +772,30 @@ clBuildProgram(cl_program /* program */, void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0; +clCompileProgram(cl_program /* program */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_headers */, + const cl_program * /* input_headers */, + const char ** /* header_include_names */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context /* context */, + cl_uint /* num_devices */, + const cl_device_id * /* device_list */, + const char * /* options */, + cl_uint /* num_input_programs */, + const cl_program * /* input_programs */, + void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), + void * /* user_data */, + cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2; + + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clGetProgramInfo(cl_program /* program */, @@ -710,6 +843,14 @@ clGetKernelInfo(cl_kernel /* kernel */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel /* kernel */, + cl_uint /* arg_indx */, + cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel /* kernel */, cl_device_id /* device */, @@ -718,7 +859,7 @@ clGetKernelWorkGroupInfo(cl_kernel /* kernel */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -/* Event Object APIs */ +/* Event Object APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clWaitForEvents(cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; @@ -750,7 +891,7 @@ clSetEventCallback( cl_event /* event */, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; -/* Profiling APIs */ +/* Profiling APIs */ extern CL_API_ENTRY cl_int CL_API_CALL clGetEventProfilingInfo(cl_event /* event */, cl_profiling_info /* param_name */, @@ -771,7 +912,7 @@ clEnqueueReadBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, size_t /* offset */, - size_t /* cb */, + size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, @@ -781,8 +922,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, - const size_t * /* buffer_origin */, - const size_t * /* host_origin */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, @@ -798,7 +939,7 @@ clEnqueueWriteBuffer(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, size_t /* offset */, - size_t /* cb */, + size_t /* size */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, @@ -808,8 +949,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, - const size_t * /* buffer_origin */, - const size_t * /* host_origin */, + const size_t * /* buffer_offset */, + const size_t * /* host_offset */, const size_t * /* region */, size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, @@ -820,13 +961,24 @@ clEnqueueWriteBufferRect(cl_command_queue /* command_queue */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue /* command_queue */, + cl_mem /* buffer */, + const void * /* pattern */, + size_t /* pattern_size */, + size_t /* offset */, + size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyBuffer(cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, size_t /* src_offset */, size_t /* dst_offset */, - size_t /* cb */, + size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; @@ -872,6 +1024,16 @@ clEnqueueWriteImage(cl_command_queue /* command_queue */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue /* command_queue */, + cl_mem /* image */, + const void * /* fill_color */, + const size_t * /* origin[3] */, + const size_t * /* region[3] */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueCopyImage(cl_command_queue /* command_queue */, cl_mem /* src_image */, @@ -911,7 +1073,7 @@ clEnqueueMapBuffer(cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, size_t /* offset */, - size_t /* cb */, + size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */, @@ -939,6 +1101,15 @@ clEnqueueUnmapMemObject(cl_command_queue /* command_queue */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */, + cl_uint /* num_mem_objects */, + const cl_mem * /* mem_objects */, + cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */, cl_kernel /* kernel */, @@ -959,7 +1130,7 @@ clEnqueueTask(cl_command_queue /* command_queue */, extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueNativeKernel(cl_command_queue /* command_queue */, - void (*user_func)(void *), + void (CL_CALLBACK * /*user_func*/)(void *), void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */, @@ -970,16 +1141,17 @@ clEnqueueNativeKernel(cl_command_queue /* command_queue */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMarker(cl_command_queue /* command_queue */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; +clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWaitForEvents(cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; +clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; /* Extension function access * @@ -988,7 +1160,51 @@ clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_ * check to make sure the address is not NULL, before using or * calling the returned function address. */ -extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */, + const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2; + + +// Deprecated OpenCL 1.1 APIs +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_row_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context /* context */, + cl_mem_flags /* flags */, + const cl_image_format * /* image_format */, + size_t /* image_width */, + size_t /* image_height */, + size_t /* image_depth */, + size_t /* image_row_pitch */, + size_t /* image_slice_pitch */, + void * /* host_ptr */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue /* command_queue */, + cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue /* command_queue */, + cl_uint /* num_events */, + const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; #ifdef __cplusplus } diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp index a0b0138c6..6b025e89e 100644 --- a/include/CL/cl.hpp +++ b/include/CL/cl.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2013 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -23,13 +23,18 @@ /*! \file * - * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) - * \author Benedict R. Gaster and Laurent Morichetti + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and + * OpenCL 1.2 (rev 15) + * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes * - * Additions and fixes from Brian Cole, March 3rd 2010. + * Additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 + * Matt Gruenke, April 2012. + * Bruce Merry, February 2013. + * Tom Deakin and Simon McIntosh-Smith, July 2013 * - * \version 1.1 - * \date June 2010 + * \version 1.2.6 + * \date August 2013 * * Optional extension support * @@ -55,8 +60,8 @@ * * For detail documentation on the bindings see: * - * The OpenCL C++ Wrapper API 1.1 (revision 04) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf + * The OpenCL C++ Wrapper API 1.2 (revision 09) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf * * \section example Example * @@ -140,11 +145,24 @@ #ifndef CL_HPP_ #define CL_HPP_ +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS + #ifdef _WIN32 + #include #include +#include +#include + +#if defined(__CL_ENABLE_EXCEPTIONS) +#include +#endif // #if defined(__CL_ENABLE_EXCEPTIONS) + +#pragma push_macro("max") +#undef max #if defined(USE_DX_INTEROP) #include +#include #endif #endif // _WIN32 @@ -156,16 +174,38 @@ #if defined(__APPLE__) || defined(__MACOSX) #include #include +#include #else #include #include #endif // !__APPLE__ +// To avoid accidentally taking ownership of core OpenCL types +// such as cl_kernel constructors are made explicit +// under OpenCL 1.2 +#undef CL_VERSION_1_2 + +#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS explicit +#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __CL_EXPLICIT_CONSTRUCTORS +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +// Define deprecated prefixes and suffixes to ensure compilation +// in case they are not pre-defined +#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) + #if !defined(CL_CALLBACK) #define CL_CALLBACK #endif //CL_CALLBACK #include +#include #if !defined(__NO_STD_VECTOR) #include @@ -176,11 +216,15 @@ #endif #if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -# include +#include + +#include +#include #endif // linux #include + /*! \namespace cl * * \brief The OpenCL C++ bindings are defined within this namespace. @@ -188,6 +232,12 @@ */ namespace cl { +class Memory; + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) #define __INIT_CL_EXT_FCN_PTR(name) \ if(!pfn_##name) { \ pfn_##name = (PFN_##name) \ @@ -195,17 +245,29 @@ namespace cl { if(!pfn_##name) { \ } \ } +#endif // #if defined(CL_VERSION_1_1) + +#if defined(CL_VERSION_1_2) +#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if(!pfn_##name) { \ + } \ + } +#endif // #if defined(CL_VERSION_1_1) class Program; class Device; class Context; class CommandQueue; class Memory; +class Buffer; #if defined(__CL_ENABLE_EXCEPTIONS) -#include -/*! \class Error - * \brief Exception class +/*! \brief Exception class + * + * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. */ class Error : public std::exception { @@ -213,8 +275,14 @@ private: cl_int err_; const char * errStr_; public: - /*! Create a new CL error exception for a given error code + /*! \brief Create a new CL error exception for a given error code * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). */ Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) {} @@ -247,9 +315,33 @@ public: #define __ERR_STR(x) NULL #endif // __CL_ENABLE_EXCEPTIONS + +namespace detail +{ +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + (void) errStr; // suppress unused variable warning + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS +} + + + //! \cond DOXYGEN_DETAIL #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clgetDeviceInfo) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) #define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) #define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) #define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) @@ -260,20 +352,29 @@ public: #define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) #define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) #define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#if defined(CL_VERSION_1_2) +#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) +#endif // #if defined(CL_VERSION_1_2) #define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) #define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) #define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) #define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) #define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) #define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) #define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __COPY_ERR __ERR_STR(cl::copy) #define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) #define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) #define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#if defined(CL_VERSION_1_2) +#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) +#endif // #if defined(CL_VERSION_1_2) #define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) #define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) @@ -286,7 +387,14 @@ public: #define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) #define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) #define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#if defined(CL_VERSION_1_2) +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) +#endif // #if defined(CL_VERSION_1_2) #define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#if defined(CL_VERSION_1_2) +#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) + +#endif // #if defined(CL_VERSION_1_2) #define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) #define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) @@ -297,9 +405,11 @@ public: #define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) #define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) #define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) #define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) #define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) #define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) #define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) #define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) #define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) @@ -308,76 +418,193 @@ public: #define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) #define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) #define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) +#endif // #if defined(CL_VERSION_1_2) #define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) #define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __RETAIN_ERR __ERR_STR(Retain Object) +#define __RELEASE_ERR __ERR_STR(Release Object) #define __FLUSH_ERR __ERR_STR(clFlush) #define __FINISH_ERR __ERR_STR(clFinish) +#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) +/** + * CL 1.2 version that uses device fission. + */ +#if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#else #define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // #if defined(CL_VERSION_1_2) + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#endif // #if defined(CL_VERSION_1_1) + #endif // __CL_USER_OVERRIDE_ERROR_STRINGS //! \endcond +/** + * CL 1.2 marker and barrier commands + */ +#if defined(CL_VERSION_1_2) +#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) +#endif // #if defined(CL_VERSION_1_2) + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) + /*! \class string * \brief Simple string class, that provides a limited subset of std::string * functionality but avoids many of the issues that come with that class. + + * \note Deprecated. Please use std::string as default or + * re-define the string class to match the std::string + * interface by defining STRING_CLASS */ -class string +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { private: ::size_t size_; char * str_; public: + //! \brief Constructs an empty string, allocating no memory. string(void) : size_(0), str_(NULL) { } - string(char * str, ::size_t size) : + /*! \brief Constructs a string populated from an arbitrary value of + * specified size. + * + * An extra '\0' is added, in case none was contained in str. + * + * \param str the initial value of the string instance. Note that '\0' + * characters receive no special treatment. If NULL, + * the string is left empty, with a size of 0. + * + * \param size the number of characters to copy from str. + */ + string(const char * str, ::size_t size) : size_(size), str_(NULL) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; + if( size > 0 ) { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } } } - string(char * str) : + /*! \brief Constructs a string populated from a null-terminated value. + * + * \param str the null-terminated initial value of the string instance. + * If NULL, the string is left empty, with a size of 0. + */ + string(const char * str) : + size_(0), str_(NULL) { - size_= ::strlen(str); - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); + if( str ) { + size_= ::strlen(str); } - else { - size_ = 0; + if( size_ > 0 ) { + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } } } + void resize( ::size_t n ) + { + if( size_ == n ) { + return; + } + if (n == 0) { + if( str_ ) { + delete [] str_; + } + str_ = NULL; + size_ = 0; + } + else { + char *newString = new char[n + 1]; + int copySize = n; + if( size_ < n ) { + copySize = size_; + } + size_ = n; + + if(str_) { + memcpy(newString, str_, (copySize + 1) * sizeof(char)); + } + if( copySize < size_ ) { + memset(newString + copySize, 0, size_ - copySize); + } + newString[size_] = '\0'; + + delete [] str_; + str_ = newString; + } + } + + const char& operator[] ( ::size_t pos ) const + { + return str_[pos]; + } + + char& operator[] ( ::size_t pos ) + { + return str_[pos]; + } + + /*! \brief Copies the value of another string to this one. + * + * \param rhs the string to copy. + * + * \returns a reference to the modified instance. + */ string& operator=(const string& rhs) { if (this == &rhs) { return *this; } - if (rhs.size_ == 0 || rhs.str_ == NULL) { + if( str_ != NULL ) { + delete [] str_; + str_ = NULL; + size_ = 0; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + str_ = NULL; size_ = 0; - str_ = NULL; } else { + str_ = new char[rhs.size_ + 1]; size_ = rhs.size_; - str_ = new char[size_ + 1]; + if (str_ != NULL) { memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); } @@ -389,37 +616,42 @@ public: return *this; } - string(const string& rhs) + /*! \brief Constructs a string by copying the value of another instance. + * + * \param rhs the string to copy. + */ + string(const string& rhs) : + size_(0), + str_(NULL) { *this = rhs; } + //! \brief Destructor - frees memory used to hold the current value. ~string() { - if (str_ != NULL) { - delete[] str_; - } + delete[] str_; + str_ = NULL; } - + + //! \brief Queries the length of the string, excluding any added '\0's. ::size_t size(void) const { return size_; } + + //! \brief Queries the length of the string, excluding any added '\0's. ::size_t length(void) const { return size(); } + /*! \brief Returns a pointer to the private copy held by this instance, + * or "" if empty/unset. + */ const char * c_str(void) const { return (str_) ? str_ : "";} }; - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -#include -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) typedef cl::string STRING_CLASS; -#endif +#endif // #elif !defined(__USE_DEV_STRING) #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#include #define VECTOR_CLASS std::vector #elif !defined(__USE_DEV_VECTOR) #define VECTOR_CLASS cl::vector -#endif #if !defined(__MAX_DEFAULT_VECTOR_SIZE) #define __MAX_DEFAULT_VECTOR_SIZE 10 @@ -427,189 +659,279 @@ typedef cl::string STRING_CLASS; /*! \class vector * \brief Fixed sized vector implementation that mirroring + * + * \note Deprecated. Please use std::vector as default or + * re-define the vector class to match the std::vector + * interface by defining VECTOR_CLASS + + * \note Not recommended for use with custom objects as + * current implementation will construct N elements + * * std::vector functionality. + * \brief Fixed sized vector compatible with std::vector. + * + * \note + * This differs from std::vector<> not just in memory allocation, + * but also in terms of when members are constructed, destroyed, + * and assigned instead of being copy constructed. + * + * \param T type of element contained in the vector. + * + * \param N maximum size of the vector. */ template -class vector +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { private: T data_[N]; unsigned int size_; - bool empty_; + public: - vector() : - size_(-1), - empty_(true) + //! \brief Constructs an empty vector with no memory allocated. + vector() : + size_(static_cast(0)) {} - ~vector() {} + //! \brief Deallocates the vector's memory and destroys all of its elements. + ~vector() + { + clear(); + } + //! \brief Returns the number of elements currently contained. unsigned int size(void) const { - return size_ + 1; + return size_; } - + + /*! \brief Empties the vector of all elements. + * \note + * This does not deallocate memory but will invoke destructors + * on contained elements. + */ void clear() { - size_ = -1; - empty_ = true; - } - - void push_back (const T& x) - { - if (size() < N) { - size_++; - data_[size_] = x; - empty_ = false; + while(!empty()) { + pop_back(); } } + /*! \brief Appends an element after the last valid element. + * Calling this on a vector that has reached capacity will throw an + * exception if exceptions are enabled. + */ + void push_back (const T& x) + { + if (size() < N) { + new (&data_[size_]) T(x); + size_++; + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); + } + } + + /*! \brief Removes the last valid element from the vector. + * Calling this on an empty vector will throw an exception + * if exceptions are enabled. + */ void pop_back(void) { - if (!empty_) { + if (size_ != 0) { + --size_; data_[size_].~T(); - size_--; - if (size_ == -1) { - empty_ = true; - } + } else { + detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); } } + /*! \brief Constructs with a value copied from another. + * + * \param vec the vector to copy. + */ vector(const vector& vec) : - size_(vec.size_), - empty_(vec.empty_) + size_(vec.size_) { - if (!empty_) { - memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); + if (size_ != 0) { + assign(vec.begin(), vec.end()); } } + /*! \brief Constructs with a specified number of initial elements. + * + * \param size number of initial elements. + * + * \param val value of initial elements. + */ vector(unsigned int size, const T& val = T()) : - size_(-1), - empty_(true) + size_(0) { for (unsigned int i = 0; i < size; i++) { push_back(val); } } + /*! \brief Overwrites the current content with that copied from another + * instance. + * + * \param rhs vector to copy. + * + * \returns a reference to this. + */ vector& operator=(const vector& rhs) { if (this == &rhs) { return *this; } - size_ = rhs.size_; - empty_ = rhs.empty_; - - if (!empty_) { - memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); + if (rhs.size_ != 0) { + assign(rhs.begin(), rhs.end()); + } else { + clear(); } return *this; } + /*! \brief Tests equality against another instance. + * + * \param vec the vector against which to compare. + */ bool operator==(vector &vec) { - if (empty_ && vec.empty_) { - return true; - } - if (size() != vec.size()) { return false; } - return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; + for( unsigned int i = 0; i < size(); ++i ) { + if( operator[](i) != vec[i] ) { + return false; + } + } + return true; } + //! \brief Conversion operator to T*. operator T* () { return data_; } + + //! \brief Conversion operator to const T*. operator const T* () const { return data_; } + //! \brief Tests whether this instance has any elements. bool empty (void) const { - return empty_; + return size_==0; } + //! \brief Returns the maximum number of elements this instance can hold. unsigned int max_size (void) const { return N; } + //! \brief Returns the maximum number of elements this instance can hold. unsigned int capacity () const { - return sizeof(T) * N; + return N; } + /*! \brief Returns a reference to a given element. + * + * \param index which element to access. * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ T& operator[](int index) { return data_[index]; } - T operator[](int index) const + /*! \brief Returns a const reference to a given element. + * + * \param index which element to access. + * + * \note + * The caller is responsible for ensuring index is >= 0 and < size(). + */ + const T& operator[](int index) const { return data_[index]; } + /*! \brief Assigns elements of the vector based on a source iterator range. + * + * \param start Beginning iterator of source range + * \param end Enditerator of source range + * + * \note + * Will throw an exception if exceptions are enabled and size exceeded. + */ template void assign(I start, I end) { clear(); - while(start < end) { + while(start != end) { push_back(*start); start++; } } /*! \class iterator - * \brief Iterator class for vectors + * \brief Const iterator class for vectors */ class iterator { private: - vector vec_; + const vector *vec_; int index_; - bool initialized_; + + /** + * Internal iterator constructor to capture reference + * to the vector it iterates over rather than taking + * the vector by copy. + */ + iterator (const vector &vec, int index) : + vec_(&vec) + { + if( !vec.empty() ) { + index_ = index; + } else { + index_ = -1; + } + } + public: iterator(void) : index_(-1), - initialized_(false) + vec_(NULL) + { + } + + iterator(const iterator& rhs) : + vec_(rhs.vec_), + index_(rhs.index_) { - index_ = -1; - initialized_ = false; } ~iterator(void) {} - static iterator begin(vector &vec) + static iterator begin(const cl::vector &vec) { - iterator i; + iterator i(vec, 0); - if (!vec.empty()) { - i.index_ = 0; - } - - i.vec_ = vec; - i.initialized_ = true; return i; } - static iterator end(vector &vec) + static iterator end(const cl::vector &vec) { - iterator i; + iterator i(vec, vec.size()); - if (!vec.empty()) { - i.index_ = vec.size(); - } - i.vec_ = vec; - i.initialized_ = true; return i; } bool operator==(iterator i) { return ((vec_ == i.vec_) && - (index_ == i.index_) && - (initialized_ == i.initialized_)); + (index_ == i.index_)); } bool operator!=(iterator i) @@ -617,29 +939,35 @@ public: return (!(*this==i)); } - void operator++() + iterator& operator++() { - index_++; + ++index_; + return *this; } - void operator++(int x) + iterator operator++(int) { - index_ += x; + iterator retVal(*this); + ++index_; + return retVal; } - void operator--() + iterator& operator--() { - index_--; + --index_; + return *this; } - void operator--(int x) + iterator operator--(int) { - index_ -= x; + iterator retVal(*this); + --index_; + return retVal; } - T operator *() + const T& operator *() const { - return vec_[index_]; + return (*vec_)[index_]; } }; @@ -648,11 +976,21 @@ public: return iterator::begin(*this); } + iterator begin(void) const + { + return iterator::begin(*this); + } + iterator end(void) { return iterator::end(*this); } + iterator end(void) const + { + return iterator::end(*this); + } + T& front(void) { return data_[0]; @@ -670,110 +1008,230 @@ public: const T& back(void) const { - return data_[size_]; + return data_[size_-1]; } }; +#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) + + + + + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + inline int compare_exchange(volatile int * dest, int exchange, int comparand) + { +#ifdef _WIN32 + return (int)(InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#elif defined(__APPLE__) || defined(__MACOSX) + return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); +#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !_WIN32 + } + + inline void fence() { _mm_mfence(); } +} // namespace detail + -/*! - * \brief size_t class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, who's - * size is known statically. +/*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. */ template -struct size_t : public cl::vector< ::size_t, N> { }; +class size_t +{ +private: + ::size_t data_[N]; + +public: + //! \brief Initialize size_t to all 0s + size_t() + { + for( int i = 0; i < N; ++i ) { + data_[i] = 0; + } + } + + ::size_t& operator[](int index) + { + return data_[index]; + } + + const ::size_t& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator ::size_t* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const ::size_t* () const { return data_; } +}; namespace detail { -// GetInfo help struct -template -struct GetInfoHelper +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) { - static cl_int - get(Functor f, cl_uint name, T* param) - { - return f(name, sizeof(T), param, NULL); - } -}; + return f(name, sizeof(T), param, NULL); +} -// Specialized GetInfoHelper for VECTOR_CLASS params +// Specialized getInfoHelper for VECTOR_CLASS params template -struct GetInfoHelper > +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) { - static cl_int get(Func f, cl_uint name, VECTOR_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } -}; + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; +} + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + typename T::cl_type * value = (typename T::cl_type *) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t elements = required / sizeof(typename T::cl_type); + param->assign(&value[0], &value[elements]); + for (::size_t i = 0; i < elements; i++) + { + if (value[i] != NULL) + { + err = (*param)[i].retain(); + if (err != CL_SUCCESS) { + return err; + } + } + } + return CL_SUCCESS; +} // Specialized for getInfo template -struct GetInfoHelper > +inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) { - static cl_int - get(Func f, cl_uint name, VECTOR_CLASS* param) - { - cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - if (err != CL_SUCCESS) { + cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); + + if (err != CL_SUCCESS) { return err; - } - - return CL_SUCCESS; } -}; + + return CL_SUCCESS; +} // Specialized GetInfoHelper for STRING_CLASS params template -struct GetInfoHelper +inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) { - static cl_int get(Func f, cl_uint name, STRING_CLASS* param) - { - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } -}; -#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ -namespace detail { \ -template \ -struct GetInfoHelper \ -{ \ - static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ - { \ - cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ - if (err != CL_SUCCESS) { \ - return err; \ - } \ - \ - return ReferenceHandler::retain((*param)()); \ - } \ -}; \ -} + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + return CL_SUCCESS; +} + +// Specialized GetInfoHelper for cl::size_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) +{ + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + for(int i = 0; i < N; ++i) { + (*param)[i] = value[i]; + } + + return CL_SUCCESS; +} + +template struct ReferenceHandler; + +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; + } + *param = value; + if (value != NULL) + { + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } + } + return CL_SUCCESS; +} #define __PARAM_NAME_INFO_1_0(F) \ F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ @@ -795,7 +1253,7 @@ struct GetInfoHelper \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ @@ -804,7 +1262,7 @@ struct GetInfoHelper \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ @@ -872,7 +1330,7 @@ struct GetInfoHelper \ F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ @@ -910,6 +1368,7 @@ struct GetInfoHelper \ F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ \ F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ @@ -920,19 +1379,45 @@ struct GetInfoHelper \ F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) #endif // CL_VERSION_1_1 + +#if defined(CL_VERSION_1_2) +#define __PARAM_NAME_INFO_1_2(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ + \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) +#endif // #if defined(CL_VERSION_1_2) + #if defined(USE_CL_DEVICE_FISSION) #define __PARAM_NAME_DEVICE_FISSION(F) \ F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) #endif // USE_CL_DEVICE_FISSION template struct param_traits {}; -#define __DECLARE_PARAM_TRAITS(token, param_name, T) \ +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ struct token; \ template<> \ struct param_traits \ @@ -941,16 +1426,78 @@ struct param_traits \ typedef T param_type; \ }; -__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS) +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) #if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 +#if defined(CL_VERSION_1_2) +__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) #endif // CL_VERSION_1_1 #if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS); +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); #endif // USE_CL_DEVICE_FISSION -#undef __DECLARE_PARAM_TRAITS +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif // Convenience functions @@ -958,7 +1505,7 @@ template inline cl_int getInfo(Func f, cl_uint name, T* param) { - return GetInfoHelper::get(f, name, param); + return getInfoHelper(f, name, param, 0); } template @@ -984,8 +1531,7 @@ inline cl_int getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) { GetInfoFunctor0 f0 = { f, arg0 }; - return GetInfoHelper, T> - ::get(f0, name, param); + return getInfoHelper(f0, name, param, 0); } template @@ -993,34 +1539,68 @@ inline cl_int getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) { GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return GetInfoHelper, T> - ::get(f0, name, param); + return getInfoHelper(f0, name, param, 0); } template struct ReferenceHandler { }; +#if defined(CL_VERSION_1_2) +/** + * OpenCL 1.2 devices do have retain/release. + */ +template <> +struct ReferenceHandler +{ + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) + { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) + { return ::clReleaseDevice(device); } +}; +#else // #if defined(CL_VERSION_1_2) +/** + * OpenCL 1.1 devices do not have retain/release. + */ template <> struct ReferenceHandler { // cl_device_id does not have retain(). static cl_int retain(cl_device_id) - { return CL_INVALID_DEVICE; } + { return CL_SUCCESS; } // cl_device_id does not have release(). static cl_int release(cl_device_id) - { return CL_INVALID_DEVICE; } + { return CL_SUCCESS; } }; +#endif // #if defined(CL_VERSION_1_2) template <> struct ReferenceHandler { // cl_platform_id does not have retain(). static cl_int retain(cl_platform_id) - { return CL_INVALID_PLATFORM; } + { return CL_SUCCESS; } // cl_platform_id does not have release(). static cl_int release(cl_platform_id) - { return CL_INVALID_PLATFORM; } + { return CL_SUCCESS; } }; template <> @@ -1086,6 +1666,58 @@ struct ReferenceHandler { return ::clReleaseEvent(event); } }; + +// Extracts version number with major in the upper 16 bits, minor in the lower 16 +static cl_uint getVersion(const char *versionInfo) +{ + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while(versionInfo[index] != '.' ) { + highVersion *= 10; + highVersion += versionInfo[index]-'0'; + ++index; + } + ++index; + while(versionInfo[index] != ' ' ) { + lowVersion *= 10; + lowVersion += versionInfo[index]-'0'; + ++index; + } + return (highVersion << 16) | lowVersion; +} + +static cl_uint getPlatformVersion(cl_platform_id platform) +{ + ::size_t size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); + char *versionInfo = (char *) alloca(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + return getVersion(versionInfo); +} + +static cl_uint getDevicePlatformVersion(cl_device_id device) +{ + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); + return getPlatformVersion(platform); +} + +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +static cl_uint getContextPlatformVersion(cl_context context) +{ + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + ::size_t size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); + if (size == 0) + return 0; + cl_device_id *devices = (cl_device_id *) alloca(size); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + return getDevicePlatformVersion(devices[0]); +} +#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + template class Wrapper { @@ -1098,6 +1730,8 @@ protected: public: Wrapper() : object_(NULL) { } + Wrapper(const cl_type &obj) : object_(obj) { } + ~Wrapper() { if (object_ != NULL) { release(); } @@ -1106,14 +1740,21 @@ public: Wrapper(const Wrapper& rhs) { object_ = rhs.object_; - if (object_ != NULL) { retain(); } + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } } Wrapper& operator = (const Wrapper& rhs) { - if (object_ != NULL) { release(); } + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } object_ = rhs.object_; - if (object_ != NULL) { retain(); } + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; return *this; } @@ -1122,6 +1763,8 @@ public: cl_type& operator ()() { return object_; } protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); cl_int retain() const { @@ -1134,41 +1777,120 @@ protected: } }; -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) throw(Error) +template <> +class Wrapper { - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL); +public: + typedef cl_device_id cl_type; -static inline cl_int errHandler (cl_int err, const char *) -{ - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS +protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) + { + bool retVal = false; + if (device != NULL) { + int version = getDevicePlatformVersion(device); + if(version > ((1 << 16) + 1)) { + retVal = true; + } + } + return retVal; + } + +public: + Wrapper() : object_(NULL), referenceCountable_(false) + { + } + + Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + { + referenceCountable_ = isReferenceCountable(obj); + } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper& rhs) + { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + Wrapper& operator = (const Wrapper& rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + return *this; + } + + Wrapper& operator = (const cl_type &rhs) + { + if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + + cl_int retain() const + { + if( referenceCountable_ ) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } + } + + cl_int release() const + { + if( referenceCountable_ ) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } + } +}; } // namespace detail //! \endcond /*! \stuct ImageFormat - * \brief ImageFormat interface fro cl_image_format. + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format */ struct ImageFormat : public cl_image_format { + //! \brief Default constructor - performs no initialization. ImageFormat(){} + //! \brief Initializing constructor. ImageFormat(cl_channel_order order, cl_channel_type type) { image_channel_order = order; image_channel_data_type = type; } + //! \brief Assignment operator. ImageFormat& operator = (const ImageFormat& rhs) { if (this != &rhs) { @@ -1179,18 +1901,41 @@ struct ImageFormat : public cl_image_format } }; -/*! \class Device - * \brief Device interface for cl_device_id. +/*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id */ class Device : public detail::Wrapper { public: - Device(cl_device_id device) { object_ = device; } - + //! \brief Default constructor - initializes to NULL. Device() : detail::Wrapper() { } + /*! \brief Copy constructor. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ Device(const Device& device) : detail::Wrapper(device) { } + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device(const cl_device_id &device) : detail::Wrapper(device) { } + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int * err = NULL); + + /*! \brief Assignment operator from Device. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ Device& operator = (const Device& rhs) { if (this != &rhs) { @@ -1199,6 +1944,17 @@ public: return *this; } + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). template cl_int getInfo(cl_device_info name, T* param) const { @@ -1207,6 +1963,7 @@ public: __GET_DEVICE_INFO_ERR); } + //! \brief Wrapper for clGetDeviceInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -1220,23 +1977,53 @@ public: return param; } + /** + * CL 1.2 version + */ +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clCreateSubDevicesEXT(). + cl_int createSubDevices( + const cl_device_partition_property * properties, + VECTOR_CLASS* devices) + { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = clCreateSubDevices(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif // #if defined(CL_VERSION_1_2) + +/** + * CL 1.1 version that uses device fission. + */ +#if defined(CL_VERSION_1_1) #if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - cl_uint n = 0; + cl_uint n = 0; cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES); @@ -1250,24 +2037,40 @@ public: devices->assign(&ids[0], &ids[n]); return CL_SUCCESS; - } -#endif + } +#endif // #if defined(USE_CL_DEVICE_FISSION) +#endif // #if defined(CL_VERSION_1_1) }; -/*! \class Platform - * \brief Platform interface. +/*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id */ class Platform : public detail::Wrapper { public: - static const Platform null(); - - Platform(cl_platform_id platform) { object_ = platform; } - + //! \brief Default constructor - initializes to NULL. Platform() : detail::Wrapper() { } + /*! \brief Copy constructor. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ Platform(const Platform& platform) : detail::Wrapper(platform) { } + /*! \brief Constructor from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + + /*! \brief Assignment operator from Platform. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ Platform& operator = (const Platform& rhs) { if (this != &rhs) { @@ -1276,6 +2079,17 @@ public: return *this; } + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetPlatformInfo(). cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const { return detail::errHandler( @@ -1283,6 +2097,7 @@ public: __GET_PLATFORM_INFO_ERR); } + //! \brief Wrapper for clGetPlatformInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -1296,11 +2111,18 @@ public: return param; } + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ cl_int getDevices( cl_device_type type, VECTOR_CLASS* devices) const { cl_uint n = 0; + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); @@ -1355,8 +2177,12 @@ public: cl_device_id * devices, cl_uint* num_devices); + if( devices == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); + __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); cl_uint n = 0; cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( @@ -1389,10 +2215,19 @@ public: } #endif + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ static cl_int get( VECTOR_CLASS* platforms) { cl_uint n = 0; + + if( platforms == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + cl_int err = ::clGetPlatformIDs(0, NULL, &n); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); @@ -1408,17 +2243,128 @@ public: platforms->assign(&ids[0], &ids[n]); return CL_SUCCESS; } -}; -static inline cl_int + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + + if( platform == NULL ) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (errResult != NULL) { + *errResult = err; + } + + return ids[0]; + } + + static Platform getDefault( + cl_int *errResult = NULL ) + { + return get(errResult); + } + + +#if defined(CL_VERSION_1_2) + //! \brief Wrapper for clUnloadCompiler(). + cl_int + unloadCompiler() + { + return ::clUnloadPlatformCompiler(object_); + } +#endif // #if defined(CL_VERSION_1_2) +}; // class Platform + +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +/** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline cl_int UnloadCompiler() { return ::clUnloadCompiler(); } +#endif // #if defined(CL_VERSION_1_1) -class Context : public detail::Wrapper +/*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ +class Context + : public detail::Wrapper { +private: + static volatile int default_initialized_; + static Context default_; + static volatile cl_int default_error_; public: + /*! \brief Destructor. + * + * This calls clReleaseContext() on the value held by this instance. + */ + ~Context() { } + + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ Context( const VECTOR_CLASS& devices, cl_context_properties* properties = NULL, @@ -1431,17 +2377,54 @@ public: cl_int* err = NULL) { cl_int error; + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + object_ = ::clCreateContext( - properties, (cl_uint) devices.size(), - (cl_device_id*) &devices.front(), + properties, (cl_uint) numDevices, + deviceIDs, notifyFptr, data, &error); - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + detail::errHandler(error, __CREATE_CONTEXT_ERR); if (err != NULL) { *err = error; } } + Context( + const Device& device, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext( + properties, 1, + &deviceID, + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ Context( cl_device_type type, cl_context_properties* properties = NULL, @@ -1454,6 +2437,66 @@ public: cl_int* err = NULL) { cl_int error; + +#if !defined(__APPLE__) || !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + + if (properties == NULL) { + // Get a valid platform ID as we cannot send in a blank one + VECTOR_CLASS platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + + VECTOR_CLASS devices; + +#if defined(__CL_ENABLE_EXCEPTIONS) + try { +#endif + + error = platforms[i].getDevices(type, &devices); + +#if defined(__CL_ENABLE_EXCEPTIONS) + } catch (Error) {} + // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type + // We do error checking next anyway, and can throw there if needed +#endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = CL_DEVICE_NOT_FOUND; + } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } +#endif object_ = ::clCreateContextFromType( properties, type, notifyFptr, data, &error); @@ -1463,10 +2506,79 @@ public: } } + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + //! \brief Default constructor - initializes to NULL. Context() : detail::Wrapper() { } + /*! \brief Copy constructor. + * + * This calls clRetainContext() on the parameter's cl_context. + */ Context(const Context& context) : detail::Wrapper(context) { } + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + + /*! \brief Assignment operator from Context. + * + * This calls clRetainContext() on the parameter and clReleaseContext() on + * the previous value held by this instance. + */ Context& operator = (const Context& rhs) { if (this != &rhs) { @@ -1475,6 +2587,18 @@ public: return *this; } + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context& operator = (const cl_context& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). template cl_int getInfo(cl_context_info name, T* param) const { @@ -1483,6 +2607,7 @@ public: __GET_CONTEXT_INFO_ERR); } + //! \brief Wrapper for clGetContextInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -1496,6 +2621,10 @@ public: return param; } + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ cl_int getSupportedImageFormats( cl_mem_flags flags, cl_mem_object_type type, @@ -1531,18 +2660,78 @@ public: } }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Context) +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; -/*! \class Event - * \brief Event interface for cl_event. + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + + +#ifdef _WIN32 +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif + +/*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event */ class Event : public detail::Wrapper { public: + /*! \brief Destructor. + * + * This calls clReleaseEvent() on the value held by this instance. + */ + ~Event() { } + + //! \brief Default constructor - initializes to NULL. Event() : detail::Wrapper() { } + /*! \brief Copy constructor. + * + * This calls clRetainEvent() on the parameter's cl_event. + */ Event(const Event& event) : detail::Wrapper(event) { } + /*! \brief Constructor from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + Event(const cl_event& event) : detail::Wrapper(event) { } + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ Event& operator = (const Event& rhs) { if (this != &rhs) { @@ -1551,6 +2740,18 @@ public: return *this; } + /*! \brief Assignment operator from cl_event. + * + * This calls clRetainEvent() on the parameter and clReleaseEvent() on + * the previous value held by this instance. + */ + Event& operator = (const cl_event& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). template cl_int getInfo(cl_event_info name, T* param) const { @@ -1559,6 +2760,7 @@ public: __GET_EVENT_INFO_ERR); } + //! \brief Wrapper for clGetEventInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -1572,6 +2774,7 @@ public: return param; } + //! \brief Wrapper for clGetEventProfilingInfo(). template cl_int getProfilingInfo(cl_profiling_info name, T* param) const { @@ -1580,6 +2783,7 @@ public: __GET_EVENT_PROFILE_INFO_ERR); } + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. template typename detail::param_traits::param_type getProfilingInfo(cl_int* err = NULL) const @@ -1593,6 +2797,10 @@ public: return param; } + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ cl_int wait() const { return detail::errHandler( @@ -1601,6 +2809,10 @@ public: } #if defined(CL_VERSION_1_1) + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ cl_int setCallback( cl_int type, void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), @@ -1616,6 +2828,10 @@ public: } #endif + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ static cl_int waitForEvents(const VECTOR_CLASS& events) { @@ -1626,15 +2842,18 @@ public: } }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Event) - #if defined(CL_VERSION_1_1) -/*! \class UserEvent - * \brief User event interface for cl_event. +/*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. */ class UserEvent : public Event { public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ UserEvent( const Context& context, cl_int * err = NULL) @@ -1650,10 +2869,13 @@ public: } } + //! \brief Default constructor - initializes to NULL. UserEvent() : Event() { } + //! \brief Copy constructor - performs shallow copy. UserEvent(const UserEvent& event) : Event(event) { } + //! \brief Assignment Operator - performs shallow copy. UserEvent& operator = (const UserEvent& rhs) { if (this != &rhs) { @@ -1662,6 +2884,10 @@ public: return *this; } + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ cl_int setStatus(cl_int status) { return detail::errHandler( @@ -1671,6 +2897,10 @@ public: }; #endif +/*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ inline static cl_int WaitForEvents(const VECTOR_CLASS& events) { @@ -1680,16 +2910,45 @@ WaitForEvents(const VECTOR_CLASS& events) __WAIT_FOR_EVENTS_ERR); } -/*! \class Memory - * \brief Memory interface for cl_mem. +/*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem */ class Memory : public detail::Wrapper { public: + + /*! \brief Destructor. + * + * This calls clReleaseMemObject() on the value held by this instance. + */ + ~Memory() {} + + //! \brief Default constructor - initializes to NULL. Memory() : detail::Wrapper() { } + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainMemObject() on the parameter's cl_mem. + */ Memory(const Memory& memory) : detail::Wrapper(memory) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_mem + * into the new Memory object. + */ + __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + + /*! \brief Assignment operator from Memory. + * + * This calls clRetainMemObject() on the parameter and clReleaseMemObject() + * on the previous value held by this instance. + */ Memory& operator = (const Memory& rhs) { if (this != &rhs) { @@ -1698,6 +2957,18 @@ public: return *this; } + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). template cl_int getInfo(cl_mem_info name, T* param) const { @@ -1706,6 +2977,7 @@ public: __GET_MEM_OBJECT_INFO_ERR); } + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -1720,6 +2992,19 @@ public: } #if defined(CL_VERSION_1_1) + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ cl_int setDestructorCallback( void (CL_CALLBACK * pfn_notify)(cl_mem, void *), void * user_data = NULL) @@ -1735,14 +3020,35 @@ public: }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) +// Pre-declare copy functions +class Buffer; +template< typename IteratorType > +cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); +template< typename IteratorType > +cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -/*! \class Buffer - * \brief Memory buffer interface. + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class Buffer : public Memory { public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ Buffer( const Context& context, cl_mem_flags flags, @@ -1759,10 +3065,112 @@ public: } } + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer( + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr = false, + cl_int* err = NULL) + { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + Context context = Context::getDefault(err); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template< typename IteratorType > + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int* err = NULL); + + //! \brief Default constructor - initializes to NULL. Buffer() : Memory() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Buffer(const Buffer& buffer) : Memory(buffer) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + + /*! \brief Assignment from Buffer - performs shallow copy. + * + * See Memory for further details. + */ Buffer& operator = (const Buffer& rhs) { if (this != &rhs) { @@ -1771,7 +3179,21 @@ public: return *this; } + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + #if defined(CL_VERSION_1_1) + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ Buffer createSubBuffer( cl_mem_flags flags, cl_buffer_create_type buffer_create_type, @@ -1793,11 +3215,19 @@ public: } return result; - } + } #endif }; #if defined (USE_DX_INTEROP) +/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ class BufferD3D10 : public Buffer { public: @@ -1805,6 +3235,11 @@ public: cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, cl_int* errcode_ret); + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ BufferD3D10( const Context& context, cl_mem_flags flags, @@ -1812,7 +3247,20 @@ public: cl_int * err = NULL) { static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + +#if defined(CL_VERSION_1_2) + vector props = context.getInfo(); + cl_platform platform = -1; + for( int i = 0; i < props.size(); ++i ) { + if( props[i] == CL_CONTEXT_PLATFORM ) { + platform = props[i+1]; + } + } + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); +#endif +#if defined(CL_VERSION_1_1) __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); +#endif cl_int error; object_ = pfn_clCreateFromD3D10BufferKHR( @@ -1827,10 +3275,25 @@ public: } } + //! \brief Default constructor - initializes to NULL. BufferD3D10() : Buffer() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferD3D10 - performs shallow copy. + * + * See Memory for further details. + */ BufferD3D10& operator = (const BufferD3D10& rhs) { if (this != &rhs) { @@ -1838,15 +3301,35 @@ public: } return *this; } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } }; #endif -/*! \class BufferGL - * \brief Memory buffer interface for GL interop. +/*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class BufferGL : public Buffer { public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ BufferGL( const Context& context, cl_mem_flags flags, @@ -1866,10 +3349,25 @@ public: } } + //! \brief Default constructor - initializes to NULL. BufferGL() : Buffer() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ BufferGL& operator = (const BufferGL& rhs) { if (this != &rhs) { @@ -1878,6 +3376,17 @@ public: return *this; } + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). cl_int getObjectInfo( cl_gl_object_type *type, GLuint * gl_object_name) @@ -1888,12 +3397,22 @@ public: } }; -/*! \class BufferRenderGL - * \brief Memory buffer interface for GL interop with renderbuffer. +/*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class BufferRenderGL : public Buffer { public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ BufferRenderGL( const Context& context, cl_mem_flags flags, @@ -1907,16 +3426,31 @@ public: bufobj, &error); - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); if (err != NULL) { *err = error; } } + //! \brief Default constructor - initializes to NULL. BufferRenderGL() : Buffer() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } + + /*! \brief Assignment from BufferGL - performs shallow copy. + * + * See Memory for further details. + */ BufferRenderGL& operator = (const BufferRenderGL& rhs) { if (this != &rhs) { @@ -1925,6 +3459,17 @@ public: return *this; } + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). cl_int getObjectInfo( cl_gl_object_type *type, GLuint * gl_object_name) @@ -1935,16 +3480,34 @@ public: } }; -/*! \class Image - * \brief Base class interface for all images. +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class Image : public Memory { protected: + //! \brief Default constructor - initializes to NULL. Image() : Memory() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Image(const Image& image) : Memory(image) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } + + /*! \brief Assignment from Image - performs shallow copy. + * + * See Memory for further details. + */ Image& operator = (const Image& rhs) { if (this != &rhs) { @@ -1952,7 +3515,19 @@ protected: } return *this; } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + public: + //! \brief Wrapper for clGetImageInfo(). template cl_int getImageInfo(cl_image_info name, T* param) const { @@ -1960,7 +3535,8 @@ public: detail::getInfo(&::clGetImageInfo, object_, name, param), __GET_IMAGE_INFO_ERR); } - + + //! \brief Wrapper for clGetImageInfo() that returns by value. template typename detail::param_traits::param_type getImageInfo(cl_int* err = NULL) const @@ -1975,12 +3551,220 @@ public: } }; -/*! \class Image2D - * \brief Image interface for 2D images. +#if defined(CL_VERSION_1_2) +/*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image1D : public Image +{ +public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D, + width, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + Image1D() { } + + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ + Image1D(const Image1D& image1D) : Image(image1D) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + + /*! \brief Assignment from Image1D - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const Image1D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ +class Image1DBuffer : public Image +{ +public: + Image1DBuffer( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + const Buffer &buffer, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + width, + 0, 0, 0, 0, 0, 0, 0, + buffer() + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + NULL, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DBuffer() { } + + Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + + Image1DBuffer& operator = (const Image1DBuffer& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DBuffer& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ +class Image1DArray : public Image +{ +public: + Image1DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t rowPitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE1D_ARRAY, + width, + 0, 0, // height, depth (unused) + arraySize, + rowPitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image1DArray() { } + + Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image1DArray& operator = (const Image1DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image1DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + + +/*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class Image2D : public Image { public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ Image2D( const Context& context, cl_mem_flags flags, @@ -1992,19 +3776,79 @@ public: cl_int* err = NULL) { cl_int error; - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); + bool useCreateImage; - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) } + //! \brief Default constructor - initializes to NULL. Image2D() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Image2D(const Image2D& image2D) : Image(image2D) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + + /*! \brief Assignment from Image2D - performs shallow copy. + * + * See Memory for further details. + */ Image2D& operator = (const Image2D& rhs) { if (this != &rhs) { @@ -2012,14 +3856,37 @@ public: } return *this; } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } }; -/*! \class Image2DGL - * \brief 2D image interface for GL interop. + +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. */ -class Image2DGL : public Image2D +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D { public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ Image2DGL( const Context& context, cl_mem_flags flags, @@ -2037,16 +3904,32 @@ public: texobj, &error); - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); if (err != NULL) { *err = error; } - } + } + + //! \brief Default constructor - initializes to NULL. Image2DGL() : Image2D() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Image2DGL(const Image2DGL& image) : Image2D(image) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + + /*! \brief Assignment from Image2DGL - performs shallow copy. + * + * See Memory for further details. + */ Image2DGL& operator = (const Image2DGL& rhs) { if (this != &rhs) { @@ -2054,14 +3937,99 @@ public: } return *this; } -}; -/*! \class Image3D - * \brief Image interface for 3D images. + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ +class Image2DArray : public Image +{ +public: + Image2DArray( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t arraySize, + ::size_t width, + ::size_t height, + ::size_t rowPitch, + ::size_t slicePitch, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D_ARRAY, + width, + height, + 0, // depth (unused) + arraySize, + rowPitch, + slicePitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DArray() { } + + Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } + + __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + Image2DArray& operator = (const Image2DArray& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image2DArray& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class Image3D : public Image { public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ Image3D( const Context& context, cl_mem_flags flags, @@ -2075,20 +4043,82 @@ public: cl_int* err = NULL) { cl_int error; - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); + bool useCreateImage; - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; +#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above } +#elif defined(CL_VERSION_1_2) + useCreateImage = true; +#else + useCreateImage = false; +#endif + +#if defined(CL_VERSION_1_2) + if (useCreateImage) + { + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE3D, + width, + height, + depth, + 0, // array size (unused) + row_pitch, + slice_pitch, + 0, 0, 0 + }; + object_ = ::clCreateImage( + context(), + flags, + &format, + &desc, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) +#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + if (!useCreateImage) + { + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) } + //! \brief Default constructor - initializes to NULL. Image3D() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Image3D(const Image3D& image3D) : Image(image3D) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + + /*! \brief Assignment from Image3D - performs shallow copy. + * + * See Memory for further details. + */ Image3D& operator = (const Image3D& rhs) { if (this != &rhs) { @@ -2096,14 +4126,35 @@ public: } return *this; } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } }; -/*! \class Image2DGL - * \brief 2D image interface for GL interop. +#if !defined(CL_VERSION_1_2) +/*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory */ class Image3DGL : public Image3D { public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ Image3DGL( const Context& context, cl_mem_flags flags, @@ -2121,16 +4172,31 @@ public: texobj, &error); - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); if (err != NULL) { *err = error; } } + //! \brief Default constructor - initializes to NULL. Image3DGL() : Image3D() { } + /*! \brief Copy constructor - performs shallow copy. + * + * See Memory for further details. + */ Image3DGL(const Image3DGL& image) : Image3D(image) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * See Memory for further details. + */ + __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + + /*! \brief Assignment from Image3DGL - performs shallow copy. + * + * See Memory for further details. + */ Image3DGL& operator = (const Image3DGL& rhs) { if (this != &rhs) { @@ -2138,16 +4204,98 @@ public: } return *this; } -}; -/*! \class Sampler - * \brief Sampler interface for cl_sampler. + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } +}; +#endif // #if !defined(CL_VERSION_1_2) + +#if defined(CL_VERSION_1_2) +/*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ +class ImageGL : public Image +{ +public: + ImageGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != NULL) { + *err = error; + } + } + + ImageGL() : Image() { } + + ImageGL(const ImageGL& image) : Image(image) { } + + __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + ImageGL& operator = (const ImageGL& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + ImageGL& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; +#endif // #if defined(CL_VERSION_1_2) + +/*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler */ class Sampler : public detail::Wrapper { public: + /*! \brief Destructor. + * + * This calls clReleaseSampler() on the value held by this instance. + */ + ~Sampler() { } + + //! \brief Default constructor - initializes to NULL. Sampler() { } + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ Sampler( const Context& context, cl_bool normalized_coords, @@ -2169,8 +4317,24 @@ public: } } + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainSampler() on the parameter's cl_sampler. + */ Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } + /*! \brief Constructor from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + + /*! \brief Assignment operator from Sampler. + * + * This calls clRetainSampler() on the parameter and clReleaseSampler() + * on the previous value held by this instance. + */ Sampler& operator = (const Sampler& rhs) { if (this != &rhs) { @@ -2179,6 +4343,18 @@ public: return *this; } + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). template cl_int getInfo(cl_sampler_info name, T* param) const { @@ -2187,6 +4363,7 @@ public: __GET_SAMPLER_INFO_ERR); } + //! \brief Wrapper for clGetSamplerInfo() that returns by value. template typename detail::param_traits::param_type getInfo(cl_int* err = NULL) const @@ -2201,15 +4378,11 @@ public: } }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) - class Program; class CommandQueue; class Kernel; -/*! \class NDRange - * \brief NDRange interface - */ +//! \brief Class interface for specifying NDRange values. class NDRange { private: @@ -2217,41 +4390,51 @@ private: cl_uint dimensions_; public: + //! \brief Default constructor - resulting range has zero dimensions. NDRange() : dimensions_(0) { } + //! \brief Constructs one-dimensional range. NDRange(::size_t size0) : dimensions_(1) { - sizes_.push_back(size0); + sizes_[0] = size0; } + //! \brief Constructs two-dimensional range. NDRange(::size_t size0, ::size_t size1) : dimensions_(2) { - sizes_.push_back(size0); - sizes_.push_back(size1); + sizes_[0] = size0; + sizes_[1] = size1; } + //! \brief Constructs three-dimensional range. NDRange(::size_t size0, ::size_t size1, ::size_t size2) : dimensions_(3) { - sizes_.push_back(size0); - sizes_.push_back(size1); - sizes_.push_back(size2); + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; } - operator const ::size_t*() const { return (const ::size_t*) sizes_; } + /*! \brief Conversion operator to const ::size_t *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const ::size_t*() const { + return (const ::size_t*) sizes_; + } + + //! \brief Queries the number of dimensions in the range. ::size_t dimensions() const { return dimensions_; } }; +//! \brief A zero-dimensional range. static const NDRange NullRange; -/*! - * \struct LocalSpaceArg - * \brief Local address raper for use with Kernel::setArg - */ +//! \brief Local address wrapper for use with Kernel::setArg struct LocalSpaceArg { ::size_t size_; @@ -2276,6 +4459,12 @@ struct KernelArgumentHandler } //! \endcond +/*! __local + * \brief Helper function for generating LocalSpaceArg objects. + * Deprecated. Replaced with Local. + */ +inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg +__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; inline LocalSpaceArg __local(::size_t size) { @@ -2283,20 +4472,58 @@ __local(::size_t size) return ret; } -class KernelFunctor; +/*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ +inline LocalSpaceArg +Local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} -/*! \class Kernel - * \brief Kernel interface that implements cl_kernel +//class KernelFunctor; + +/*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel */ class Kernel : public detail::Wrapper { public: inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + /*! \brief Destructor. + * + * This calls clReleaseKernel() on the value held by this instance. + */ + ~Kernel() { } + + //! \brief Default constructor - initializes to NULL. Kernel() { } + /*! \brief Copy constructor - performs shallow copy. + * + * This calls clRetainKernel() on the parameter's cl_kernel. + */ Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } + /*! \brief Constructor from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + + /*! \brief Assignment operator from Kernel. + * + * This calls clRetainKernel() on the parameter and clReleaseKernel() + * on the previous value held by this instance. + */ Kernel& operator = (const Kernel& rhs) { if (this != &rhs) { @@ -2305,6 +4532,17 @@ public: return *this; } + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + template cl_int getInfo(cl_kernel_info name, T* param) const { @@ -2326,6 +4564,29 @@ public: return param; } +#if defined(CL_VERSION_1_2) + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template typename + detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_arg_info, name>::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_VERSION_1_2) + template cl_int getWorkGroupInfo( const Device& device, cl_kernel_work_group_info name, T* param) const @@ -2367,21 +4628,8 @@ public: ::clSetKernelArg(object_, index, size, argPtr), __SET_KERNEL_ARGS_ERR); } - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local); - - KernelFunctor bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local); }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) - /*! \class Program * \brief Program interface that implements cl_program. */ @@ -2391,6 +4639,75 @@ public: typedef VECTOR_CLASS > Binaries; typedef VECTOR_CLASS > Sources; + Program( + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + Program( const Context& context, const Sources& sources, @@ -2416,6 +4733,25 @@ public: } } + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: + * CL_INVALID_CONTEXT if context is not a valid context. + * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; + * or if any entry in binaries is NULL or has length 0. + * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. + * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + */ Program( const Context& context, const VECTOR_CLASS& devices, @@ -2424,20 +4760,41 @@ public: cl_int* err = NULL) { cl_int error; - const ::size_t n = binaries.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); + + const ::size_t numDevices = devices.size(); + + // Catch size mismatch early and return + if(binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + return; + } - for (::size_t i = 0; i < n; ++i) { - images[i] = (const unsigned char*)binaries[(int)i].first; + ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); + + for (::size_t i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char*)binaries[i].first; lengths[i] = binaries[(int)i].second; } + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + if(binaryStatus) { + binaryStatus->resize(numDevices); + } + object_ = ::clCreateProgramWithBinary( context(), (cl_uint) devices.size(), - (cl_device_id*)&devices.front(), + deviceIDs, lengths, images, binaryStatus != NULL - ? (cl_int*) &binaryStatus->front() + ? &binaryStatus->front() : NULL, &error); detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); @@ -2446,10 +4803,47 @@ public: } } + +#if defined(CL_VERSION_1_2) + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program( + const Context& context, + const VECTOR_CLASS& devices, + const STRING_CLASS& kernelNames, + cl_int* err = NULL) + { + cl_int error; + + + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), + (cl_uint) devices.size(), + deviceIDs, + kernelNames.c_str(), + &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != NULL) { + *err = error; + } + } +#endif // #if defined(CL_VERSION_1_2) + Program() { } Program(const Program& program) : detail::Wrapper(program) { } + __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + Program& operator = (const Program& rhs) { if (this != &rhs) { @@ -2458,24 +4852,73 @@ public: return *this; } + Program& operator = (const cl_program& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + cl_int build( const VECTOR_CLASS& devices, const char* options = NULL, void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL) const + { + ::size_t numDevices = devices.size(); + cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); + for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const { return detail::errHandler( ::clBuildProgram( object_, - (cl_uint) - devices.size(), - (cl_device_id*)&devices.front(), + 0, + NULL, options, notifyFptr, data), __BUILD_PROGRAM_ERR); } +#if defined(CL_VERSION_1_2) + cl_int compile( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data), + __COMPILE_PROGRAM_ERR); + } +#endif + template cl_int getInfo(cl_program_info name, T* param) const { @@ -2540,7 +4983,96 @@ public: } }; -__GET_INFO_HELPER_WITH_RETAIN(cl::Program) +#if defined(CL_VERSION_1_2) +inline Program linkProgram( + Program input1, + Program input2, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program programs[2] = { input1(), input2() }; + + Context ctx = input1.getInfo(); + + cl_program prog = ::clLinkProgram( + ctx(), + 0, + NULL, + options, + 2, + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} + +inline Program linkProgram( + VECTOR_CLASS inputPrograms, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL, + cl_int* err = NULL) +{ + cl_int err_local = CL_SUCCESS; + + cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); + + if (programs != NULL) { + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + } + + cl_program prog = ::clLinkProgram( + Context::getDefault()(), + 0, + NULL, + options, + (cl_uint)inputPrograms.size(), + programs, + notifyFptr, + data, + &err_local); + + detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); + if (err != NULL) { + *err = err_local; + } + + return Program(prog); +} +#endif + +template<> +inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo(); + VECTOR_CLASS binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) { @@ -2560,7 +5092,69 @@ inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) */ class CommandQueue : public detail::Wrapper { +private: + static volatile int default_initialized_; + static CommandQueue default_; + static volatile cl_int default_error_; public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + */ + explicit CommandQueue( + const Context& context, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + VECTOR_CLASS devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (err != NULL) { + *err = error; + } + + } + CommandQueue( const Context& context, const Device& device, @@ -2577,10 +5171,73 @@ public: } } + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + detail::fence(); + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + detail::fence(); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + + detail::fence(); + + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + CommandQueue() { } CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } + CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + CommandQueue& operator = (const CommandQueue& rhs) { if (this != &rhs) { @@ -2589,6 +5246,12 @@ public: return *this; } + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; + } + template cl_int getInfo(cl_command_queue_info name, T* param) const { @@ -2620,14 +5283,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueReadBuffer( object_, buffer(), blocking, offset, size, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueWriteBuffer( @@ -2639,14 +5308,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueWriteBuffer( object_, buffer(), blocking, offset, size, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueCopyBuffer( @@ -2658,16 +5333,21 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueCopyBuffer( object_, src(), dst(), src_offset, dst_offset, size, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } -#if defined(CL_VERSION_1_1) cl_int enqueueReadBufferRect( const Buffer& buffer, cl_bool blocking, @@ -2682,7 +5362,8 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueReadBufferRect( object_, buffer(), @@ -2697,10 +5378,14 @@ public: ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_READ_BUFFER_RECT_ERR); - } + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } cl_int enqueueWriteBufferRect( const Buffer& buffer, @@ -2716,7 +5401,8 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueWriteBufferRect( object_, buffer(), @@ -2731,8 +5417,13 @@ public: ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueCopyBufferRect( @@ -2748,7 +5439,8 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueCopyBufferRect( object_, src(), @@ -2762,10 +5454,51 @@ public: dst_slice_pitch, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } -#endif + +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified a as vector. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + */ + template + cl_int enqueueFillBuffer( + const Buffer& buffer, + PatternType pattern, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) cl_int enqueueReadImage( const Image& image, @@ -2778,14 +5511,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueReadImage( object_, image(), blocking, (const ::size_t *) origin, (const ::size_t *) region, row_pitch, slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueWriteImage( @@ -2799,14 +5538,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueWriteImage( object_, image(), blocking, (const ::size_t *) origin, (const ::size_t *) region, row_pitch, slice_pitch, ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueCopyImage( @@ -2818,16 +5563,126 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueCopyImage( object_, src(), dst(), (const ::size_t *) src_origin, (const ::size_t *)dst_origin, (const ::size_t *) region, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } +#if defined(CL_VERSION_1_2) + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point color value if + * the image channel data type is not an unnormalized signed or + * unsigned data type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_float4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA signed integer color value if + * the image channel data type is an unnormalized signed integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_int4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA unsigned integer color value if + * the image channel data type is an unnormalized unsigned integer + * type. + */ + cl_int enqueueFillImage( + const Image& image, + cl_uint4 fillColor, + const size_t<3>& origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + (const ::size_t *) origin, + (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + cl_int enqueueCopyImageToBuffer( const Image& src, const Buffer& dst, @@ -2837,14 +5692,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueCopyImageToBuffer( object_, src(), dst(), (const ::size_t *) src_origin, (const ::size_t *) region, dst_offset, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueCopyBufferToImage( @@ -2856,14 +5717,20 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueCopyBufferToImage( object_, src(), dst(), src_offset, (const ::size_t *) dst_origin, (const ::size_t *) region, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } void* enqueueMapBuffer( @@ -2926,24 +5793,129 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueUnmapMemObject( object_, memory(), mapped_ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } +#if defined(CL_VERSION_1_2) + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList( + const VECTOR_CLASS *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects( + const VECTOR_CLASS &memObjects, + cl_mem_migration_flags flags, + const VECTOR_CLASS* events = NULL, + Event* event = NULL + ) + { + cl_event tmp; + + cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + for( int i = 0; i < (int)memObjects.size(); ++i ) { + localMemObjects[i] = memObjects[i](); + } + + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + static_cast(localMemObjects), + flags, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if defined(CL_VERSION_1_2) + cl_int enqueueNDRangeKernel( const Kernel& kernel, const NDRange& offset, const NDRange& global, - const NDRange& local, + const NDRange& local = NullRange, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueNDRangeKernel( object_, kernel(), (cl_uint) global.dimensions(), offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, @@ -2951,8 +5923,13 @@ public: local.dimensions() != 0 ? (const ::size_t*) local : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueTask( @@ -2960,17 +5937,23 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueTask( object_, kernel(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueNativeKernel( - void (*userFptr)(void *), + void (CL_CALLBACK *userFptr)(void *), std::pair args, const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* mem_locs = NULL, @@ -2987,7 +5970,8 @@ public: } } - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueNativeKernel( object_, userFptr, args.first, args.second, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, @@ -2995,18 +5979,29 @@ public: (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } - cl_int enqueueMarker(Event* event = NULL) const +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueMarker(object_, (cl_event*) event), __ENQUEUE_MARKER_ERR); } - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueWaitForEvents( @@ -3015,21 +6010,28 @@ public: (const cl_event*) &events.front()), __ENQUEUE_WAIT_FOR_EVENTS_ERR); } +#endif // #if defined(CL_VERSION_1_1) cl_int enqueueAcquireGLObjects( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueAcquireGLObjects( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueReleaseGLObjects( @@ -3037,15 +6039,21 @@ public: const VECTOR_CLASS* events = NULL, Event* event = NULL) const { - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( ::clEnqueueReleaseGLObjects( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } #if defined (USE_DX_INTEROP) @@ -3062,19 +6070,33 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( const VECTOR_CLASS* mem_objects = NULL, const VECTOR_CLASS* events = NULL, Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); - - return detail::errHandler( + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); +#endif +#if defined(CL_VERSION_1_1) + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#endif + + cl_event tmp; + cl_int err = detail::errHandler( pfn_clEnqueueAcquireD3D10ObjectsKHR( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } cl_int enqueueReleaseD3D10Objects( @@ -3083,26 +6105,46 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( Event* event = NULL) const { static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; +#if defined(CL_VERSION_1_2) + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_1_1) __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); +#endif // #if defined(CL_VERSION_1_1) - return detail::errHandler( + cl_event tmp; + cl_int err = detail::errHandler( pfn_clEnqueueReleaseD3D10ObjectsKHR( object_, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL) ? (cl_event*) &events->front() : NULL, - (cl_event*) event), + (event != NULL) ? &tmp : NULL), __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; } #endif - cl_int enqueueBarrier() const +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) + CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueBarrier(object_), __ENQUEUE_BARRIER_ERR); } +#endif // #if defined(CL_VERSION_1_1) cl_int flush() const { @@ -3115,825 +6157,6207 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( } }; -__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) +#ifdef _WIN32 +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif -/*! \class KernelFunctor - * \brief Kernel functor interface - * - * \note Currently only functors of zero to ten arguments are supported. It - * is straightforward to add more and a more general solution, similar to - * Boost.Lambda could be followed if required in the future. +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + ::size_t size = sizeof(DataType)*(endIterator - startIterator); + + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } +} + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; +} + +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. */ -class KernelFunctor +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, startIterator, endIterator, buffer); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + ::size_t length = endIterator-startIterator; + ::size_t byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +struct EnqueueArgs +{ + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + VECTOR_CLASS events_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { + + } + + EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) + { + + } +}; + +namespace detail { + +class NullType {}; + +template +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + +template +struct SetArg +{ + static void set (Kernel, NullType) + { + } +}; + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31 +> +class KernelFunctorGlobal { private: Kernel kernel_; - CommandQueue queue_; - NDRange offset_; - NDRange global_; - NDRange local_; - cl_int err_; public: - KernelFunctor() { } - - KernelFunctor( - const Kernel& kernel, - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) : - kernel_(kernel), - queue_(queue), - offset_(offset), - global_(global), - local_(local), - err_(CL_SUCCESS) + KernelFunctorGlobal( + Kernel kernel) : + kernel_(kernel) {} - KernelFunctor& operator=(const KernelFunctor& rhs); + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} - KernelFunctor(const KernelFunctor& rhs); + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType() + ) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } - cl_int getError() { return err_; } - - inline Event operator()(const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events = NULL); - - template - inline Event operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events = NULL); }; -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& offset, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,offset,global,local); -} +//------------------------------------------------------------------------------------------------------ -inline KernelFunctor Kernel::bind( - const CommandQueue& queue, - const NDRange& global, - const NDRange& local) -{ - return KernelFunctor(*this,queue,NullRange,global,local); -} -inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs) +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_ { - if (this == &rhs) { - return *this; + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { - kernel_ = rhs.kernel_; - queue_ = rhs.queue_; - offset_ = rhs.offset_; - global_ = rhs.global_; - local_ = rhs.local_; + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { - return *this; -} + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } -inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) : - kernel_(rhs.kernel_), - queue_(rhs.queue_), - offset_(rhs.offset_), - global_(rhs.global_), - local_(rhs.local_) + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> { -} + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType> FunctorType; -Event KernelFunctor::operator()(const VECTOR_CLASS* /*events*/) -{ - Event event; + FunctorType functor_; - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const VECTOR_CLASS* events) -{ - Event event; - - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); - - return event; -} - -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const VECTOR_CLASS* events) -{ - Event event; + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); + //! \brief Return type of the functor + typedef Event result_type; - return event; -} + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28); -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const VECTOR_CLASS* events) + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> { - Event event; + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); + //! \brief Return type of the functor + typedef Event result_type; - return event; -} + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27); -template -Event KernelFunctor::operator()( - const A1& a1, - const A2& a2, - const A3& a3, - const A4& a4, - const A5& a5, - const A6& a6, - const A7& a7, - const A8& a8, - const A9& a9, - const A10& a10, - const A11& a11, - const A12& a12, - const A13& a13, - const A14& a14, - const A15& a15, - const VECTOR_CLASS* events) + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> { - Event event; + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { - kernel_.setArg(0,a1); - kernel_.setArg(1,a2); - kernel_.setArg(2,a3); - kernel_.setArg(3,a4); - kernel_.setArg(4,a5); - kernel_.setArg(5,a6); - kernel_.setArg(6,a7); - kernel_.setArg(7,a8); - kernel_.setArg(8,a9); - kernel_.setArg(9,a10); - kernel_.setArg(10,a11); - kernel_.setArg(11,a12); - kernel_.setArg(12,a13); - kernel_.setArg(13,a14); - kernel_.setArg(14,a15); + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } - err_ = queue_.enqueueNDRangeKernel( - kernel_, - offset_, - global_, - local_, - NULL, // bgaster_fixme - do we want to allow wait event lists? - &event); + //! \brief Return type of the functor + typedef Event result_type; - return event; -} + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_ +< T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2, + T3); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_ +< T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1, + T2); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_ +< T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0, + T1); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + +}; + +template< + typename T0> +struct functionImplementation_ +< T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + + #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) + // Fail variadic expansion for dev11 + static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); + #endif + + } + + //! \brief Return type of the functor + typedef Event result_type; + + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + T0); + + Event operator()( + const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType +> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + > FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31 + >( + FunctorType(kernel)) + {} +}; + + +//---------------------------------------------------------------------------------------------------------------------- #undef __ERR_STR #if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) @@ -3947,11 +12371,13 @@ Event KernelFunctor::operator()( #undef __GET_IMAGE_INFO_ERR #undef __GET_SAMPLER_INFO_ERR #undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_ARG_INFO_ERR #undef __GET_KERNEL_WORK_GROUP_INFO_ERR #undef __GET_PROGRAM_INFO_ERR #undef __GET_PROGRAM_BUILD_INFO_ERR #undef __GET_COMMAND_QUEUE_INFO_ERR +#undef __CREATE_CONTEXT_ERR #undef __CREATE_CONTEXT_FROM_TYPE_ERR #undef __GET_SUPPORTED_IMAGE_FORMATS_ERR @@ -3965,6 +12391,7 @@ Event KernelFunctor::operator()( #undef __CREATE_USER_EVENT_ERR #undef __SET_USER_EVENT_STATUS_ERR #undef __SET_EVENT_CALLBACK_ERR +#undef __SET_PRINTF_CALLBACK_ERR #undef __WAIT_FOR_EVENTS_ERR @@ -3972,6 +12399,7 @@ Event KernelFunctor::operator()( #undef __SET_KERNEL_ARGS_ERR #undef __CREATE_PROGRAM_WITH_SOURCE_ERR #undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR #undef __BUILD_PROGRAM_ERR #undef __CREATE_KERNELS_IN_PROGRAM_ERR @@ -3995,19 +12423,34 @@ Event KernelFunctor::operator()( #undef __ENQUEUE_TASK_ERR #undef __ENQUEUE_NATIVE_KERNEL +#undef __CL_EXPLICIT_CONSTRUCTORS + #undef __UNLOAD_COMPILER_ERR #endif //__CL_USER_OVERRIDE_ERROR_STRINGS -#undef __GET_INFO_HELPER_WITH_RETAIN +#undef __CL_FUNCTION_TYPE // Extensions +/** + * Deprecated APIs for 1.2 + */ +#if defined(CL_VERSION_1_1) #undef __INIT_CL_EXT_FCN_PTR +#endif // #if defined(CL_VERSION_1_1) #undef __CREATE_SUB_DEVICES #if defined(USE_CL_DEVICE_FISSION) #undef __PARAM_NAME_DEVICE_FISSION #endif // USE_CL_DEVICE_FISSION +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + } // namespace cl +#ifdef _WIN32 +#pragma pop_macro("max") +#endif // _WIN32 + #endif // CL_HPP_ diff --git a/include/CL/cl_d3d10.h b/include/CL/cl_d3d10.h index ea9ab99c9..81b0d3721 100644 --- a/include/CL/cl_d3d10.h +++ b/include/CL/cl_d3d10.h @@ -1,5 +1,5 @@ /********************************************************************************** - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -113,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, - cl_mem * mem_objects, + const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h index 4e92c7e63..0fc4e16eb 100644 --- a/include/CL/cl_ext.h +++ b/include/CL/cl_ext.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2013 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -34,10 +34,10 @@ extern "C" { #endif #ifdef __APPLE__ - #include + #include #include #else - #include + #include #endif /* cl_khr_fp64 extension - no extension #define since it has no functions */ @@ -64,7 +64,7 @@ extern "C" { * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 -cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */, void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; @@ -118,6 +118,48 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( cl_uint * /* num_platforms */); +/* Extension: cl_khr_image2D_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without a copy. + * The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t. + * Both the sampler and sampler-less read_image built-in functions are supported for 2D images + * and 2D images created from a buffer. Similarly, the write_image built-ins are also supported + * for 2D images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the width, + * height, image format (i.e. channel order and channel data type) and optionally the row pitch + * + * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels. + * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels. + */ + +/************************************* + * cl_khr_initalize_memory extension * + *************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F +#define CL_CONTEXT_TERMINATE_KHR 0x2010 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + /****************************************** * cl_nv_device_attribute_query extension * ******************************************/ @@ -130,12 +172,46 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +/********************************* +* cl_amd_device_memory_flags * +*********************************/ +#define cl_amd_device_memory_flags 1 + +#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap + +/* cl_device_info */ +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 /********************************* * cl_amd_device_attribute_query * *********************************/ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; + +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + + +/************************** +* cl_amd_offline_devices * +**************************/ +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F #ifdef CL_VERSION_1_1 /*********************************** @@ -201,7 +277,68 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( #define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) #define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + /* cl_ext_atomic_counters_32 and cl_ext_atomic_counters_64 extensions + * no extension #define since they have no functions + */ + #define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + // Type of external memory allocation. + // Legal values will be defined in layered extensions. + cl_uint allocation_type; + + // Host cache policy for this external memory allocation. + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + // Type of external memory allocation. + // Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. + cl_mem_ext_host_ptr ext_host_ptr; + + // ION file descriptor + int ion_filedesc; + + // Host pointer to the ION allocated memory + void* ion_hostptr; + +} cl_mem_ion_host_ptr; #endif /* CL_VERSION_1_1 */ diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h index 3b4fe0690..af2036cc9 100644 --- a/include/CL/cl_gl.h +++ b/include/CL/cl_gl.h @@ -1,5 +1,5 @@ /********************************************************************************** - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008 - 2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -21,20 +21,11 @@ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. **********************************************************************************/ -/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ - -/* - * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have - * OpenGL dependencies. The application is responsible for #including - * OpenGL or OpenGL ES headers before #including cl_gl.h. - */ - #ifndef __OPENCL_CL_GL_H #define __OPENCL_CL_GL_H #ifdef __APPLE__ #include -#include #else #include #endif @@ -48,15 +39,21 @@ typedef cl_uint cl_gl_texture_info; typedef cl_uint cl_gl_platform_info; typedef struct __GLsync *cl_GLsync; -/* cl_gl_object_type */ -#define CL_GL_OBJECT_BUFFER 0x2000 -#define CL_GL_OBJECT_TEXTURE2D 0x2001 -#define CL_GL_OBJECT_TEXTURE3D 0x2002 -#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#define CL_GL_NUM_SAMPLES 0x2012 -/* cl_gl_texture_info */ -#define CL_GL_TEXTURE_TARGET 0x2004 -#define CL_GL_MIPMAP_LEVEL 0x2005 extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLBuffer(cl_context /* context */, @@ -65,21 +62,13 @@ clCreateFromGLBuffer(cl_context /* context */, int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLTexture2D(cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLenum /* target */, - cl_GLint /* miplevel */, - cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLTexture3D(cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLenum /* target */, - cl_GLint /* miplevel */, - cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - +clCreateFromGLTexture(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromGLRenderbuffer(cl_context /* context */, cl_mem_flags /* flags */, @@ -89,7 +78,7 @@ clCreateFromGLRenderbuffer(cl_context /* context */, extern CL_API_ENTRY cl_int CL_API_CALL clGetGLObjectInfo(cl_mem /* memobj */, cl_gl_object_type * /* gl_object_type */, - cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; extern CL_API_ENTRY cl_int CL_API_CALL clGetGLTextureInfo(cl_mem /* memobj */, @@ -114,33 +103,51 @@ clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; + +// Deprecated OpenCL 1.1 APIs +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context /* context */, + cl_mem_flags /* flags */, + cl_GLenum /* target */, + cl_GLint /* miplevel */, + cl_GLuint /* texture */, + cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + /* cl_khr_gl_sharing extension */ - + #define cl_khr_gl_sharing 1 - + typedef cl_uint cl_gl_context_info; - + /* Additional Error Codes */ #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 - + /* cl_gl_context_info */ #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 - + /* Additional cl_context_properties */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C - + extern CL_API_ENTRY cl_int CL_API_CALL clGetGLContextInfoKHR(const cl_context_properties * /* properties */, cl_gl_context_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - + typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, @@ -152,4 +159,4 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( } #endif -#endif /* __OPENCL_CL_GL_H */ +#endif /* __OPENCL_CL_GL_H */ diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h index 7c9b64cfb..77d53536f 100644 --- a/include/CL/cl_gl_ext.h +++ b/include/CL/cl_gl_ext.h @@ -1,5 +1,5 @@ /********************************************************************************** - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h index 043b0489d..cf2b7210a 100644 --- a/include/CL/cl_platform.h +++ b/include/CL/cl_platform.h @@ -1,5 +1,5 @@ /********************************************************************************** - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -46,19 +46,75 @@ extern "C" { #endif #ifdef __APPLE__ - #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) - #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER - #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER - #define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK - #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXTENSION_WEAK_LINK __attribute__((weak_import)) + #define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER + #define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 + + #ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 + #else + #warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here! + #define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER + #endif #else - #define CL_EXTENSION_WEAK_LINK + #define CL_EXTENSION_WEAK_LINK #define CL_API_SUFFIX__VERSION_1_0 #define CL_EXT_SUFFIX__VERSION_1_0 #define CL_API_SUFFIX__VERSION_1_1 #define CL_EXT_SUFFIX__VERSION_1_1 - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2 + #define CL_EXT_SUFFIX__VERSION_1_2 + + #ifdef __GNUC__ + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif + #elif _WIN32 + #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated) + #endif + + #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated) + #endif + #else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #endif #endif #if (defined (_WIN32) && defined(_MSC_VER)) @@ -252,7 +308,7 @@ typedef double cl_double __attribute__((aligned(8))); #include -/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */ +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ typedef unsigned int cl_GLuint; typedef int cl_GLint; typedef unsigned int cl_GLenum; @@ -1170,13 +1226,13 @@ typedef union /* Macro to facilitate debugging * Usage: * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. - * The first line ends with: CL_PROGRAM_STRING_BEGIN \" + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" * Each line thereafter of OpenCL C source must end with: \n\ * The last line ends in "; * * Example: * - * const char *my_program = CL_PROGRAM_STRING_BEGIN "\ + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ * kernel void foo( int a, float * b ) \n\ * { \n\ * // my comment \n\ diff --git a/include/CL/opencl.h b/include/CL/opencl.h index 26a638997..3f0052471 100644 --- a/include/CL/opencl.h +++ b/include/CL/opencl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2010 The Khronos Group Inc. + * Copyright (c) 2008-2012 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the diff --git a/include/atidlas/backend/templates/base.h b/include/atidlas/backend/templates/base.h index d75c000ce..760d18b2e 100644 --- a/include/atidlas/backend/templates/base.h +++ b/include/atidlas/backend/templates/base.h @@ -147,7 +147,7 @@ protected: static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr); static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr); static std::string append_width(std::string const & str, unsigned int width); - bool has_strided_access(symbolic_expressions_container const & symbolic_expressions) const; + static bool requires_fallback(symbolic_expressions_container const & symbolic_expressions); void set_arguments(symbolic_expressions_container const & symbolic_expressions, cl::Kernel & kernel, unsigned int & current_arg); @@ -176,8 +176,6 @@ class base_impl : public base { private: virtual int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const; -protected: - bool has_misaligned_offset(symbolic_expressions_container const & symbolic_expressions); public: typedef ParametersType parameters_type; base_impl(parameters_type const & parameters, binding_policy_t binding_policy); diff --git a/include/atidlas/cl/queues.h b/include/atidlas/cl/queues.h index 738d9923c..769955af7 100644 --- a/include/atidlas/cl/queues.h +++ b/include/atidlas/cl/queues.h @@ -11,7 +11,7 @@ namespace atidlas namespace cl_ext { -typedef std::map kernels_t; +typedef std::map, cl::Kernel> kernels_t; typedef std::vector > > queues_t; queues_t init_queues(); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 55fc1dbb4..7156ed10f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) add_library(atidlas SHARED ${LIBATIDLAS_SRC}) set_target_properties(atidlas PROPERTIES - COMPILE_FLAGS "-Wno-sign-compare -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic") + COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic") #install(TARGETS atidlas LIBRARY DESTINATION lib) diff --git a/lib/backend/templates/base.cpp b/lib/backend/templates/base.cpp index f29cadeb1..23306a7f1 100644 --- a/lib/backend/templates/base.cpp +++ b/lib/backend/templates/base.cpp @@ -366,17 +366,13 @@ bool base::is_strided(symbolic_expression_node const & node) || node.op.type==OPERATOR_OUTER_PROD_TYPE; } -bool base::has_strided_access(symbolic_expressions_container const & symbolic_expressions) const +bool base::requires_fallback(symbolic_expressions_container const & symbolic_expressions) { for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it) - { - std::vector arrays = filter_elements(DENSE_ARRAY_TYPE, **it); - for (std::vector::iterator itt = arrays.begin(); itt != arrays.end(); ++itt) - if(std::max(itt->array.stride1, itt->array.stride2)>1) + for(symbolic_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt) + if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0)) + || (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0))) return true; - if(filter_nodes(&is_strided, **it, true).empty()==false) - return true; - } return false; } @@ -520,19 +516,6 @@ template int base_impl::check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const { return TEMPLATE_VALID; } -template -bool base_impl::has_misaligned_offset(symbolic_expressions_container const & symbolic_expressions) -{ - for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it) - { - std::vector arrays = filter_elements(DENSE_ARRAY_TYPE, **it); - for (std::vector::iterator itt = arrays.begin(); itt != arrays.end(); ++itt) - if (itt->array.start1>0 || itt->array.start2>0) - return true; - } - return false; -} - template base_impl::base_impl(parameters_type const & parameters, binding_policy_t binding_policy) : base(binding_policy), p_(parameters) { } diff --git a/lib/backend/templates/mreduction.cpp b/lib/backend/templates/mreduction.cpp index a7e78703a..a40f80bef 100644 --- a/lib/backend/templates/mreduction.cpp +++ b/lib/backend/templates/mreduction.cpp @@ -225,7 +225,7 @@ void mreduction::enqueue(cl::CommandQueue & queue, //Kernel int idx = 0; - if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && has_strided_access(symbolic_expressions)) + if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(symbolic_expressions)) idx = 1; cl::Program & program = programs[idx].program(); cl::Kernel kernel(program, kname); diff --git a/lib/backend/templates/reduction.cpp b/lib/backend/templates/reduction.cpp index d5bb65000..1cc5db3d3 100644 --- a/lib/backend/templates/reduction.cpp +++ b/lib/backend/templates/reduction.cpp @@ -286,7 +286,7 @@ void reduction::enqueue(cl::CommandQueue & queue, symbolic_expressions_container const & symbolic_expressions) { //Preprocessing - std::vector size = input_sizes(symbolic_expressions); + int_t size = input_sizes(symbolic_expressions)[0]; std::vector reductions; for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it) { @@ -303,7 +303,7 @@ void reduction::enqueue(cl::CommandQueue & queue, fill_kernel_name(kopt[0], label, "o0"); fill_kernel_name(kopt[1], label, "o1"); - bool fallback = has_strided_access(symbolic_expressions) && p_.simd_width > 1; + bool fallback = p_.simd_width > 1 && (requires_fallback(symbolic_expressions) || (size%p_.simd_width>0)); cl::Program & program = programs[fallback?0:1].program(); cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]), cl::Kernel(program, fallback?kfallback[1]:kopt[1]) }; @@ -319,7 +319,7 @@ void reduction::enqueue(cl::CommandQueue & queue, for (unsigned int k = 0; k < 2; k++) { unsigned int n_arg = 0; - kernels[k].setArg(n_arg++, cl_uint(size[0])); + kernels[k].setArg(n_arg++, cl_uint(size)); //Temporary buffers unsigned int i = 0; diff --git a/lib/backend/templates/vaxpy.cpp b/lib/backend/templates/vaxpy.cpp index 30d5bc2e2..38890a03f 100644 --- a/lib/backend/templates/vaxpy.cpp +++ b/lib/backend/templates/vaxpy.cpp @@ -117,11 +117,15 @@ void vaxpy::enqueue(cl::CommandQueue & queue, char kopt[10]; fill_kernel_name(kfb, label, "f"); fill_kernel_name(kopt, label, "o"); - bool strided = has_strided_access(symbolic_expressions); - bool misaligned = has_misaligned_offset(symbolic_expressions); - bool fallback = p_.simd_width > 1 && (strided || (size%p_.simd_width>0) || misaligned); + bool fallback = p_.simd_width > 1 && (requires_fallback(symbolic_expressions) || (size%p_.simd_width>0)); + cl::Program const & program = programs[fallback?0:1].program(); - cl::Kernel kernel(program, fallback?kfb:kopt); + cl_ext::kernels_t::key_type key(program(), label); + cl_ext::kernels_t::iterator it = cl_ext::kernels.find(key); + if(it==cl_ext::kernels.end()) + it = cl_ext::kernels.insert(std::make_pair(key, cl::Kernel(program, fallback?kfb:kopt))).first; + cl::Kernel & kernel = it->second; + //NDRange cl::NDRange grange(p_.local_size_0*p_.num_groups); cl::NDRange lrange(p_.local_size_0); diff --git a/lib/cl/program_map.cpp b/lib/cl/program_map.cpp index 99910e9a3..2e0be6710 100644 --- a/lib/cl/program_map.cpp +++ b/lib/cl/program_map.cpp @@ -36,27 +36,27 @@ cl::Program program_map::add(cl::Context & context, std::string const & pname, s // Retrieves the program in the cache bool compile = true; - if (cache_path_.size()) - { - std::string prefix; - for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it) - prefix += it->getInfo() + it->getInfo() + it->getInfo(); - std::string sha1 = tools::sha1(prefix + source); +// if (cache_path_.size()) +// { +// std::string prefix; +// for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it) +// prefix += it->getInfo() + it->getInfo() + it->getInfo(); +// std::string sha1 = tools::sha1(prefix + source); - std::ifstream cached((cache_path_+sha1).c_str(),std::ios::binary); - if (cached) - { - std::size_t len; - std::vector buffer; - cached.read((char*)&len, sizeof(std::size_t)); - buffer.resize(len); - cached.read((char*)buffer.data(), std::streamsize(len)); - char* cbuffer = buffer.data(); - res = cl::Program(context, devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)), NULL, &err); - compile = false; - } - } - //Gets from source +// std::ifstream cached((cache_path_+sha1).c_str(),std::ios::binary); +// if (cached) +// { +// std::size_t len; +// std::vector buffer; +// cached.read((char*)&len, sizeof(std::size_t)); +// buffer.resize(len); +// cached.read((char*)buffer.data(), std::streamsize(len)); +// char* cbuffer = buffer.data(); +// res = cl::Program(context, devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)), NULL, &err); +// compile = false; +// } +// } +// //Gets from source if(compile) { const char * csrc = source.c_str(); @@ -80,7 +80,6 @@ cl::Program program_map::add(cl::Context & context, std::string const & pname, s { std::vector sizes = res.getInfo(); std::vector binaries = res.getInfo(); - std::string prefix; for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it) prefix += it->getInfo() + it->getInfo() + it->getInfo(); diff --git a/lib/model/model.cpp b/lib/model/model.cpp index 96c8865d1..510e2877b 100644 --- a/lib/model/model.cpp +++ b/lib/model/model.cpp @@ -195,6 +195,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re //Parse the JSON document js::Document document; std::ifstream t(fname.c_str()); + if(!t) return; std::string str; t.seekg(0, std::ios::end); str.reserve(t.tellg()); diff --git a/python/autotune/pysrc/genetic.py b/python/autotune/pysrc/genetic.py index 893d51dce..c47ee677c 100644 --- a/python/autotune/pysrc/genetic.py +++ b/python/autotune/pysrc/genetic.py @@ -129,7 +129,6 @@ class GeneticOperators(object): def evaluate(self, individual): if tuple(individual) not in self.cache: parameters = self.decode(individual) - parameters = [2,16,16,16,6,1,6,atd.fetching_policy_type.FETCH_FROM_LOCAL,atd.fetching_policy_type.FETCH_FROM_LOCAL,16,16] template = self.Template(*parameters) try: tt = misc_tools.benchmark(template, self.symbolic) diff --git a/python/autotune/pysrc/genetic.pyc b/python/autotune/pysrc/genetic.pyc index 7f6c9027bc7e6e62ee376caaf7fa986d086d734e..7e3b0aa3a029eabb8a2930bc91332a17cb4cb045 100644 GIT binary patch delta 304 zcmX@-@Xel$`7%aF~*P;`)i zk&!V4Vjs{rn4@h9k4{z;lhR=U`oyn<5lHG3gGjDSE+DDF2P8^ZK}2amPHK=AkQEd> kxl8OKrwEYAz$nVdzd22OBO7DyW(he_Mn;~^dh!a406kSVL;wH) delta 351 zcmYL^u}T9$5QhJ`z3j!iToQ}Bh(SqVVR2Sk3W`=X7WxVc@ivvoHo_eiX~pIqM8ds5 zo~4 zthBdzFFhE7VXjNT_%I4i!TPYRf3Y@W*W z!bUJH*bZEb!k6ES39c(wVNVvZpKHK9qpO!>c+kLieM~}3zd=MM*hKhCbb8-ZAIWak z=B(Gb$ISZzE#T;If@~%X^N@MCU5(P6C}VkZB)F