reducing overhead; reverted custom CL/ header because CL/cl.hpp was buggy
This commit is contained in:
162
bench/blas.cpp
162
bench/blas.cpp
@@ -40,7 +40,7 @@ void bench(ad::numeric_type dtype)
|
||||
total_time += times.back();\
|
||||
}\
|
||||
float tres = median(times);\
|
||||
std::cout << " " << PERF << std::flush;\
|
||||
std::cout << " " << tres << std::flush;\
|
||||
}
|
||||
|
||||
#define CL_BENCHMARK(OP, PERF) BENCHMARK(OP, PERF, ad::cl_ext::synchronize(ad::cl_ext::default_context()))
|
||||
@@ -86,89 +86,89 @@ void bench(ad::numeric_type dtype)
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
std::cout << "#DOT" << std::endl;
|
||||
for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
{
|
||||
int_t N = *it;
|
||||
std::cout << N;
|
||||
/* ATIDLAS */
|
||||
ad::array x(N, dtype), y(N, dtype);
|
||||
ad::array scratch(N, dtype);
|
||||
ad::scalar s(dtype);
|
||||
CL_BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
CL_BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
CPU_BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
|
||||
/*---------*/
|
||||
/*--BLAS2--*/
|
||||
/*---------*/
|
||||
//T-layout
|
||||
std::cout << "#GEMV-T" << std::endl;
|
||||
for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
||||
for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
|
||||
{
|
||||
int_t M = *Mit;
|
||||
int_t N = *Nit;
|
||||
std::cout << M << "," << N;
|
||||
/* ATIDLAS */
|
||||
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
CL_BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
CL_BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
ad::copy(x, cx);
|
||||
ad::copy(y, cy);
|
||||
ad::copy(A, cA);
|
||||
CPU_BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << "\n\n" << std::flush;
|
||||
// std::cout << "#DOT" << std::endl;
|
||||
// for(std::vector<int_t>::const_iterator it = BLAS1_N.begin() ; it != BLAS1_N.end() ; ++it)
|
||||
// {
|
||||
// int_t N = *it;
|
||||
// std::cout << N;
|
||||
// /* ATIDLAS */
|
||||
// ad::array x(N, dtype), y(N, dtype);
|
||||
// ad::array scratch(N, dtype);
|
||||
// ad::scalar s(dtype);
|
||||
// CL_BENCHMARK(s = dot(x,y), bandwidth(2*N, tres, dtsize));
|
||||
// /* clAmdBlas */
|
||||
//#ifdef BENCH_CLAMDBLAS
|
||||
// CL_BENCHMARK(clAmdBlasSdot(N, s.data()(), 0, x.data()(), 0, 1, y.data()(), 0, 1, scratch.data()(), 1, &ad::cl_ext::get_queue(x.context(), 0)(), 0, NULL, NULL), bandwidth(2*N, tres, dtsize))
|
||||
//#endif
|
||||
// /* BLAS */
|
||||
//#ifdef BENCH_CBLAS
|
||||
// std::vector<float> cx(N), cy(N);
|
||||
// ad::copy(x, cx);
|
||||
// ad::copy(y, cy);
|
||||
// CPU_BENCHMARK(cblas_sdot(N, cx.data(), 1, cy.data(), 1), bandwidth(2*N, tres, dtsize));
|
||||
//#endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
|
||||
// /*---------*/
|
||||
// /*--BLAS3--*/
|
||||
// /*--BLAS2--*/
|
||||
// /*---------*/
|
||||
std::cout << "#GEMM-NT" << std::endl;
|
||||
for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
|
||||
for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
|
||||
for(std::vector<int_t>::const_iterator Kit = BLAS3_K.begin() ; Kit != BLAS3_K.end() ; ++Kit)
|
||||
{
|
||||
int_t M = *Kit, N = *Kit, K = *Kit;
|
||||
std::cout << M << "," << N << "," << K;
|
||||
/* ATIDLAS */
|
||||
ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
|
||||
CL_BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres));
|
||||
/* clAmdBlas */
|
||||
#ifdef BENCH_CLAMDBLAS
|
||||
CL_BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(),
|
||||
0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
|
||||
#endif
|
||||
/* BLAS */
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||
ad::copy(C, cC);
|
||||
ad::copy(A, cA);
|
||||
ad::copy(B, cB);
|
||||
CPU_BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres));
|
||||
#endif
|
||||
std::cout << std::endl;
|
||||
}
|
||||
// //T-layout
|
||||
// std::cout << "#GEMV-T" << std::endl;
|
||||
// for(std::vector<int>::const_iterator Mit = BLAS2_M.begin() ; Mit != BLAS2_M.end() ; ++Mit)
|
||||
// for(std::vector<int_t>::const_iterator Nit = BLAS2_N.begin() ; Nit != BLAS2_N.end() ; ++Nit)
|
||||
// {
|
||||
// int_t M = *Mit;
|
||||
// int_t N = *Nit;
|
||||
// std::cout << M << "," << N;
|
||||
// /* ATIDLAS */
|
||||
// ad::array A(N, M, dtype), y(M, dtype), x(N, dtype);
|
||||
// CL_BENCHMARK(y = dot(trans(A),x), bandwidth(M*N + M + N, tres, dtsize));
|
||||
// /* clAmdBlas */
|
||||
// #ifdef BENCH_CLAMDBLAS
|
||||
// CL_BENCHMARK(clAmdBlasSgemv(clAmdBlasColumnMajor, clAmdBlasTrans, N, M, 1, A.data()(), A.ld(), x.data()(), 0, 1, 0, y.data()(), 0, 1, 1, &ad::cl_ext::get_queue(x.context(), 0)(),0, NULL, NULL), bandwidth(M*N + M + N, tres, dtsize))
|
||||
// #endif
|
||||
// /* BLAS */
|
||||
// #ifdef BENCH_CBLAS
|
||||
// std::vector<float> cA(N*M), cx(N), cy(M);
|
||||
// ad::copy(x, cx);
|
||||
// ad::copy(y, cy);
|
||||
// ad::copy(A, cA);
|
||||
// CPU_BENCHMARK(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), N, cx.data(), 1, 0, cy.data(), 1), bandwidth(M*N + M + N, tres, dtsize));
|
||||
// #endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
// std::cout << "\n\n" << std::flush;
|
||||
|
||||
//// /*---------*/
|
||||
//// /*--BLAS3--*/
|
||||
//// /*---------*/
|
||||
// std::cout << "#GEMM-NT" << std::endl;
|
||||
// for(std::vector<int_t>::const_iterator Mit = BLAS3_M.begin() ; Mit != BLAS3_M.end() ; ++Mit)
|
||||
// for(std::vector<int_t>::const_iterator Nit = BLAS3_N.begin() ; Nit != BLAS3_N.end() ; ++Nit)
|
||||
// for(std::vector<int_t>::const_iterator Kit = BLAS3_K.begin() ; Kit != BLAS3_K.end() ; ++Kit)
|
||||
// {
|
||||
// int_t M = *Kit, N = *Kit, K = *Kit;
|
||||
// std::cout << M << "," << N << "," << K;
|
||||
// /* ATIDLAS */
|
||||
// ad::array C(M, N, dtype), A(M, K, dtype), B(N, K, dtype);
|
||||
// CL_BENCHMARK(C = dot(A,trans(B)), gflops((double)2*M*N*K, tres));
|
||||
// /* clAmdBlas */
|
||||
// #ifdef BENCH_CLAMDBLAS
|
||||
// CL_BENCHMARK(clAmdBlasSgemm(clAmdBlasColumnMajor, clAmdBlasNoTrans, clAmdBlasTrans, M, N, K, 1, A.data()(), A.ld(), B.data()(), B.ld(),
|
||||
// 0, C.data()(), C.ld(), 1, &ad::cl_ext::get_queue(C.context(), 0)(),0, NULL, NULL), gflops((double)2*M*N*K, tres))
|
||||
// #endif
|
||||
// /* BLAS */
|
||||
// #ifdef BENCH_CBLAS
|
||||
// std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||
// ad::copy(C, cC);
|
||||
// ad::copy(A, cA);
|
||||
// ad::copy(B, cB);
|
||||
// CPU_BENCHMARK(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), M, cB.data(), N, 1, cC.data(), M), gflops((double)2*M*N*K, tres));
|
||||
// #endif
|
||||
// std::cout << std::endl;
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
|
334
include/CL/cl.h
334
include/CL/cl.h
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
@@ -21,8 +21,6 @@
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_H
|
||||
#define __OPENCL_CL_H
|
||||
|
||||
@@ -58,6 +56,8 @@ typedef cl_uint cl_device_mem_cache_type;
|
||||
typedef cl_uint cl_device_local_mem_type;
|
||||
typedef cl_bitfield cl_device_exec_capabilities;
|
||||
typedef cl_bitfield cl_command_queue_properties;
|
||||
typedef intptr_t cl_device_partition_property;
|
||||
typedef cl_bitfield cl_device_affinity_domain;
|
||||
|
||||
typedef intptr_t cl_context_properties;
|
||||
typedef cl_uint cl_context_info;
|
||||
@@ -67,6 +67,7 @@ typedef cl_uint cl_channel_type;
|
||||
typedef cl_bitfield cl_mem_flags;
|
||||
typedef cl_uint cl_mem_object_type;
|
||||
typedef cl_uint cl_mem_info;
|
||||
typedef cl_bitfield cl_mem_migration_flags;
|
||||
typedef cl_uint cl_image_info;
|
||||
typedef cl_uint cl_buffer_create_type;
|
||||
typedef cl_uint cl_addressing_mode;
|
||||
@@ -75,24 +76,43 @@ typedef cl_uint cl_sampler_info;
|
||||
typedef cl_bitfield cl_map_flags;
|
||||
typedef cl_uint cl_program_info;
|
||||
typedef cl_uint cl_program_build_info;
|
||||
typedef cl_uint cl_program_binary_type;
|
||||
typedef cl_int cl_build_status;
|
||||
typedef cl_uint cl_kernel_info;
|
||||
typedef cl_uint cl_kernel_arg_info;
|
||||
typedef cl_uint cl_kernel_arg_address_qualifier;
|
||||
typedef cl_uint cl_kernel_arg_access_qualifier;
|
||||
typedef cl_bitfield cl_kernel_arg_type_qualifier;
|
||||
typedef cl_uint cl_kernel_work_group_info;
|
||||
typedef cl_uint cl_event_info;
|
||||
typedef cl_uint cl_command_type;
|
||||
typedef cl_uint cl_profiling_info;
|
||||
|
||||
|
||||
typedef struct _cl_image_format {
|
||||
cl_channel_order image_channel_order;
|
||||
cl_channel_type image_channel_data_type;
|
||||
} cl_image_format;
|
||||
|
||||
typedef struct _cl_image_desc {
|
||||
cl_mem_object_type image_type;
|
||||
size_t image_width;
|
||||
size_t image_height;
|
||||
size_t image_depth;
|
||||
size_t image_array_size;
|
||||
size_t image_row_pitch;
|
||||
size_t image_slice_pitch;
|
||||
cl_uint num_mip_levels;
|
||||
cl_uint num_samples;
|
||||
cl_mem buffer;
|
||||
} cl_image_desc;
|
||||
|
||||
typedef struct _cl_buffer_region {
|
||||
size_t origin;
|
||||
size_t size;
|
||||
} cl_buffer_region;
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
@@ -111,6 +131,11 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_MAP_FAILURE -12
|
||||
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
|
||||
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
|
||||
#define CL_COMPILE_PROGRAM_FAILURE -15
|
||||
#define CL_LINKER_NOT_AVAILABLE -16
|
||||
#define CL_LINK_PROGRAM_FAILURE -17
|
||||
#define CL_DEVICE_PARTITION_FAILED -18
|
||||
#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
|
||||
|
||||
#define CL_INVALID_VALUE -30
|
||||
#define CL_INVALID_DEVICE_TYPE -31
|
||||
@@ -147,14 +172,21 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_INVALID_MIP_LEVEL -62
|
||||
#define CL_INVALID_GLOBAL_WORK_SIZE -63
|
||||
#define CL_INVALID_PROPERTY -64
|
||||
#define CL_INVALID_IMAGE_DESCRIPTOR -65
|
||||
#define CL_INVALID_COMPILER_OPTIONS -66
|
||||
#define CL_INVALID_LINKER_OPTIONS -67
|
||||
#define CL_INVALID_DEVICE_PARTITION_COUNT -68
|
||||
|
||||
/* OpenCL Version */
|
||||
#define CL_VERSION_1_0 1
|
||||
#define CL_VERSION_1_1 1
|
||||
#define CL_VERSION_1_2 1
|
||||
|
||||
/* cl_bool */
|
||||
#define CL_FALSE 0
|
||||
#define CL_TRUE 1
|
||||
#define CL_BLOCKING CL_TRUE
|
||||
#define CL_NON_BLOCKING CL_FALSE
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_PROFILE 0x0900
|
||||
@@ -168,6 +200,7 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_DEVICE_TYPE_CPU (1 << 1)
|
||||
#define CL_DEVICE_TYPE_GPU (1 << 2)
|
||||
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
|
||||
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
|
||||
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
|
||||
|
||||
/* cl_device_info */
|
||||
@@ -221,7 +254,7 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_DEVICE_VERSION 0x102F
|
||||
#define CL_DEVICE_EXTENSIONS 0x1030
|
||||
#define CL_DEVICE_PLATFORM 0x1031
|
||||
/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
|
||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||
/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
|
||||
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
|
||||
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
|
||||
@@ -233,6 +266,20 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
|
||||
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
|
||||
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
|
||||
#define CL_DEVICE_LINKER_AVAILABLE 0x103E
|
||||
#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
|
||||
#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
|
||||
#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
|
||||
#define CL_DEVICE_PARENT_DEVICE 0x1042
|
||||
#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
|
||||
#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
|
||||
#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
|
||||
#define CL_DEVICE_PARTITION_TYPE 0x1046
|
||||
#define CL_DEVICE_REFERENCE_COUNT 0x1047
|
||||
#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
|
||||
#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
|
||||
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
|
||||
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
|
||||
|
||||
/* cl_device_fp_config - bitfield */
|
||||
#define CL_FP_DENORM (1 << 0)
|
||||
@@ -242,6 +289,7 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_FP_ROUND_TO_INF (1 << 4)
|
||||
#define CL_FP_FMA (1 << 5)
|
||||
#define CL_FP_SOFT_FLOAT (1 << 6)
|
||||
#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
|
||||
|
||||
/* cl_device_mem_cache_type */
|
||||
#define CL_NONE 0x0
|
||||
@@ -266,8 +314,23 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_CONTEXT_PROPERTIES 0x1082
|
||||
#define CL_CONTEXT_NUM_DEVICES 0x1083
|
||||
|
||||
/* cl_context_info + cl_context_properties */
|
||||
/* cl_context_properties */
|
||||
#define CL_CONTEXT_PLATFORM 0x1084
|
||||
#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
|
||||
|
||||
/* cl_device_partition_property */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY 0x1086
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
|
||||
|
||||
/* cl_device_affinity_domain */
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
|
||||
#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
|
||||
|
||||
/* cl_command_queue_info */
|
||||
#define CL_QUEUE_CONTEXT 0x1090
|
||||
@@ -282,6 +345,14 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_MEM_USE_HOST_PTR (1 << 3)
|
||||
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
|
||||
#define CL_MEM_COPY_HOST_PTR (1 << 5)
|
||||
// reserved (1 << 6)
|
||||
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
|
||||
#define CL_MEM_HOST_READ_ONLY (1 << 8)
|
||||
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
|
||||
|
||||
/* cl_mem_migration_flags - bitfield */
|
||||
#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
|
||||
#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
|
||||
|
||||
/* cl_channel_order */
|
||||
#define CL_R 0x10B0
|
||||
@@ -297,6 +368,8 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_Rx 0x10BA
|
||||
#define CL_RGx 0x10BB
|
||||
#define CL_RGBx 0x10BC
|
||||
#define CL_DEPTH 0x10BD
|
||||
#define CL_DEPTH_STENCIL 0x10BE
|
||||
|
||||
/* cl_channel_type */
|
||||
#define CL_SNORM_INT8 0x10D0
|
||||
@@ -314,11 +387,16 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_UNSIGNED_INT32 0x10DC
|
||||
#define CL_HALF_FLOAT 0x10DD
|
||||
#define CL_FLOAT 0x10DE
|
||||
#define CL_UNORM_INT24 0x10DF
|
||||
|
||||
/* cl_mem_object_type */
|
||||
#define CL_MEM_OBJECT_BUFFER 0x10F0
|
||||
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
|
||||
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
|
||||
#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
|
||||
#define CL_MEM_OBJECT_IMAGE1D 0x10F4
|
||||
#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
|
||||
#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_TYPE 0x1100
|
||||
@@ -339,6 +417,10 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_IMAGE_WIDTH 0x1114
|
||||
#define CL_IMAGE_HEIGHT 0x1115
|
||||
#define CL_IMAGE_DEPTH 0x1116
|
||||
#define CL_IMAGE_ARRAY_SIZE 0x1117
|
||||
#define CL_IMAGE_BUFFER 0x1118
|
||||
#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
|
||||
#define CL_IMAGE_NUM_SAMPLES 0x111A
|
||||
|
||||
/* cl_addressing_mode */
|
||||
#define CL_ADDRESS_NONE 0x1130
|
||||
@@ -361,6 +443,7 @@ typedef struct _cl_buffer_region {
|
||||
/* cl_map_flags - bitfield */
|
||||
#define CL_MAP_READ (1 << 0)
|
||||
#define CL_MAP_WRITE (1 << 1)
|
||||
#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
|
||||
|
||||
/* cl_program_info */
|
||||
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
|
||||
@@ -370,11 +453,20 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_PROGRAM_SOURCE 0x1164
|
||||
#define CL_PROGRAM_BINARY_SIZES 0x1165
|
||||
#define CL_PROGRAM_BINARIES 0x1166
|
||||
#define CL_PROGRAM_NUM_KERNELS 0x1167
|
||||
#define CL_PROGRAM_KERNEL_NAMES 0x1168
|
||||
|
||||
/* cl_program_build_info */
|
||||
#define CL_PROGRAM_BUILD_STATUS 0x1181
|
||||
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
|
||||
#define CL_PROGRAM_BUILD_LOG 0x1183
|
||||
#define CL_PROGRAM_BINARY_TYPE 0x1184
|
||||
|
||||
/* cl_program_binary_type */
|
||||
#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
|
||||
#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
|
||||
#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
|
||||
#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
|
||||
|
||||
/* cl_build_status */
|
||||
#define CL_BUILD_SUCCESS 0
|
||||
@@ -388,6 +480,32 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_KERNEL_REFERENCE_COUNT 0x1192
|
||||
#define CL_KERNEL_CONTEXT 0x1193
|
||||
#define CL_KERNEL_PROGRAM 0x1194
|
||||
#define CL_KERNEL_ATTRIBUTES 0x1195
|
||||
|
||||
/* cl_kernel_arg_info */
|
||||
#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
|
||||
#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
|
||||
#define CL_KERNEL_ARG_TYPE_NAME 0x1198
|
||||
#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
|
||||
#define CL_KERNEL_ARG_NAME 0x119A
|
||||
|
||||
/* cl_kernel_arg_address_qualifier */
|
||||
#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
|
||||
#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
|
||||
#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
|
||||
#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
|
||||
|
||||
/* cl_kernel_arg_access_qualifier */
|
||||
#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
|
||||
#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
|
||||
#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
|
||||
#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
|
||||
|
||||
/* cl_kernel_arg_type_qualifer */
|
||||
#define CL_KERNEL_ARG_TYPE_NONE 0
|
||||
#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
|
||||
#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
|
||||
#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
|
||||
|
||||
/* cl_kernel_work_group_info */
|
||||
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
|
||||
@@ -395,6 +513,7 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
|
||||
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
|
||||
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
|
||||
#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
|
||||
|
||||
/* cl_event_info */
|
||||
#define CL_EVENT_COMMAND_QUEUE 0x11D0
|
||||
@@ -425,6 +544,10 @@ typedef struct _cl_buffer_region {
|
||||
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
|
||||
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
|
||||
#define CL_COMMAND_USER 0x1204
|
||||
#define CL_COMMAND_BARRIER 0x1205
|
||||
#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
|
||||
#define CL_COMMAND_FILL_BUFFER 0x1207
|
||||
#define CL_COMMAND_FILL_IMAGE 0x1208
|
||||
|
||||
/* command execution status */
|
||||
#define CL_COMPLETE 0x0
|
||||
@@ -471,6 +594,19 @@ clGetDeviceInfo(cl_device_id /* device */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevices(cl_device_id /* in_device */,
|
||||
const cl_device_partition_property * /* properties */,
|
||||
cl_uint /* num_devices */,
|
||||
cl_device_id * /* out_devices */,
|
||||
cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
/* Context APIs */
|
||||
extern CL_API_ENTRY cl_context CL_API_CALL
|
||||
clCreateContext(const cl_context_properties * /* properties */,
|
||||
@@ -520,25 +656,6 @@ clGetCommandQueueInfo(cl_command_queue /* command_queue */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
|
||||
/*
|
||||
* WARNING:
|
||||
* This API introduces mutable state into the OpenCL implementation. It has been REMOVED
|
||||
* to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
|
||||
* OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
|
||||
* It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
|
||||
*
|
||||
* Software developers previously relying on this API are instructed to set the command queue
|
||||
* properties when creating the queue, instead.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetCommandQueueProperty(cl_command_queue /* command_queue */,
|
||||
cl_command_queue_properties /* properties */,
|
||||
cl_bool /* enable */,
|
||||
cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
|
||||
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
|
||||
|
||||
/* Memory Object APIs */
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateBuffer(cl_context /* context */,
|
||||
@@ -555,26 +672,12 @@ clCreateSubBuffer(cl_mem /* buffer */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateImage2D(cl_context /* context */,
|
||||
clCreateImage(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_image_format * /* image_format */,
|
||||
size_t /* image_width */,
|
||||
size_t /* image_height */,
|
||||
size_t /* image_row_pitch */,
|
||||
const cl_image_desc * /* image_desc */,
|
||||
void * /* host_ptr */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateImage3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_image_format * /* image_format */,
|
||||
size_t /* image_width */,
|
||||
size_t /* image_height */,
|
||||
size_t /* image_depth */,
|
||||
size_t /* image_row_pitch */,
|
||||
size_t /* image_slice_pitch */,
|
||||
void * /* host_ptr */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
|
||||
@@ -647,6 +750,13 @@ clCreateProgramWithBinary(cl_context /* context */,
|
||||
cl_int * /* binary_status */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clCreateProgramWithBuiltInKernels(cl_context /* context */,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const char * /* kernel_names */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
@@ -662,7 +772,30 @@ clBuildProgram(cl_program /* program */,
|
||||
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
|
||||
clCompileProgram(cl_program /* program */,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const char * /* options */,
|
||||
cl_uint /* num_input_headers */,
|
||||
const cl_program * /* input_headers */,
|
||||
const char ** /* header_include_names */,
|
||||
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
|
||||
void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clLinkProgram(cl_context /* context */,
|
||||
cl_uint /* num_devices */,
|
||||
const cl_device_id * /* device_list */,
|
||||
const char * /* options */,
|
||||
cl_uint /* num_input_programs */,
|
||||
const cl_program * /* input_programs */,
|
||||
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
|
||||
void * /* user_data */,
|
||||
cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetProgramInfo(cl_program /* program */,
|
||||
@@ -710,6 +843,14 @@ clGetKernelInfo(cl_kernel /* kernel */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelArgInfo(cl_kernel /* kernel */,
|
||||
cl_uint /* arg_indx */,
|
||||
cl_kernel_arg_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
|
||||
cl_device_id /* device */,
|
||||
@@ -771,7 +912,7 @@ clEnqueueReadBuffer(cl_command_queue /* command_queue */,
|
||||
cl_mem /* buffer */,
|
||||
cl_bool /* blocking_read */,
|
||||
size_t /* offset */,
|
||||
size_t /* cb */,
|
||||
size_t /* size */,
|
||||
void * /* ptr */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
@@ -781,8 +922,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
|
||||
cl_mem /* buffer */,
|
||||
cl_bool /* blocking_read */,
|
||||
const size_t * /* buffer_origin */,
|
||||
const size_t * /* host_origin */,
|
||||
const size_t * /* buffer_offset */,
|
||||
const size_t * /* host_offset */,
|
||||
const size_t * /* region */,
|
||||
size_t /* buffer_row_pitch */,
|
||||
size_t /* buffer_slice_pitch */,
|
||||
@@ -798,7 +939,7 @@ clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
|
||||
cl_mem /* buffer */,
|
||||
cl_bool /* blocking_write */,
|
||||
size_t /* offset */,
|
||||
size_t /* cb */,
|
||||
size_t /* size */,
|
||||
const void * /* ptr */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
@@ -808,8 +949,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
|
||||
cl_mem /* buffer */,
|
||||
cl_bool /* blocking_write */,
|
||||
const size_t * /* buffer_origin */,
|
||||
const size_t * /* host_origin */,
|
||||
const size_t * /* buffer_offset */,
|
||||
const size_t * /* host_offset */,
|
||||
const size_t * /* region */,
|
||||
size_t /* buffer_row_pitch */,
|
||||
size_t /* buffer_slice_pitch */,
|
||||
@@ -820,13 +961,24 @@ clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueFillBuffer(cl_command_queue /* command_queue */,
|
||||
cl_mem /* buffer */,
|
||||
const void * /* pattern */,
|
||||
size_t /* pattern_size */,
|
||||
size_t /* offset */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
|
||||
cl_mem /* src_buffer */,
|
||||
cl_mem /* dst_buffer */,
|
||||
size_t /* src_offset */,
|
||||
size_t /* dst_offset */,
|
||||
size_t /* cb */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
@@ -872,6 +1024,16 @@ clEnqueueWriteImage(cl_command_queue /* command_queue */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueFillImage(cl_command_queue /* command_queue */,
|
||||
cl_mem /* image */,
|
||||
const void * /* fill_color */,
|
||||
const size_t * /* origin[3] */,
|
||||
const size_t * /* region[3] */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueCopyImage(cl_command_queue /* command_queue */,
|
||||
cl_mem /* src_image */,
|
||||
@@ -911,7 +1073,7 @@ clEnqueueMapBuffer(cl_command_queue /* command_queue */,
|
||||
cl_bool /* blocking_map */,
|
||||
cl_map_flags /* map_flags */,
|
||||
size_t /* offset */,
|
||||
size_t /* cb */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */,
|
||||
@@ -939,6 +1101,15 @@ clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_mem_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_mem_migration_flags /* flags */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
|
||||
cl_kernel /* kernel */,
|
||||
@@ -959,7 +1130,7 @@ clEnqueueTask(cl_command_queue /* command_queue */,
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueNativeKernel(cl_command_queue /* command_queue */,
|
||||
void (*user_func)(void *),
|
||||
void (CL_CALLBACK * /*user_func*/)(void *),
|
||||
void * /* args */,
|
||||
size_t /* cb_args */,
|
||||
cl_uint /* num_mem_objects */,
|
||||
@@ -970,16 +1141,17 @@ clEnqueueNativeKernel(cl_command_queue /* command_queue */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMarker(cl_command_queue /* command_queue */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_events */,
|
||||
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
|
||||
clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* Extension function access
|
||||
*
|
||||
@@ -988,7 +1160,51 @@ clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_
|
||||
* check to make sure the address is not NULL, before using or
|
||||
* calling the returned function address.
|
||||
*/
|
||||
extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
extern CL_API_ENTRY void * CL_API_CALL
|
||||
clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
|
||||
const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
// Deprecated OpenCL 1.1 APIs
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateImage2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_image_format * /* image_format */,
|
||||
size_t /* image_width */,
|
||||
size_t /* image_height */,
|
||||
size_t /* image_row_pitch */,
|
||||
void * /* host_ptr */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateImage3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_image_format * /* image_format */,
|
||||
size_t /* image_width */,
|
||||
size_t /* image_height */,
|
||||
size_t /* image_depth */,
|
||||
size_t /* image_row_pitch */,
|
||||
size_t /* image_slice_pitch */,
|
||||
void * /* host_ptr */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
|
||||
clEnqueueMarker(cl_command_queue /* command_queue */,
|
||||
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
|
||||
clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_events */,
|
||||
const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
|
||||
clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
|
||||
clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
|
||||
clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
10547
include/CL/cl.hpp
10547
include/CL/cl.hpp
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
@@ -113,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem * mem_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
@@ -118,6 +118,48 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
|
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
|
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
|
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
|
||||
* for 2D images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the width,
|
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
|
||||
*
|
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
|
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
|
||||
*/
|
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2010
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
@@ -130,12 +172,46 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_memory_flags *
|
||||
*********************************/
|
||||
#define cl_amd_device_memory_flags 1
|
||||
|
||||
#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct { cl_uint type; cl_uint data[5]; } raw;
|
||||
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
|
||||
} cl_device_topology_amd;
|
||||
|
||||
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
|
||||
|
||||
|
||||
/**************************
|
||||
* cl_amd_offline_devices *
|
||||
**************************/
|
||||
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
/***********************************
|
||||
@@ -201,7 +277,68 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
/* cl_ext_atomic_counters_32 and cl_ext_atomic_counters_64 extensions
|
||||
* no extension #define since they have no functions
|
||||
*/
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
// Type of external memory allocation.
|
||||
// Legal values will be defined in layered extensions.
|
||||
cl_uint allocation_type;
|
||||
|
||||
// Host cache policy for this external memory allocation.
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
// Type of external memory allocation.
|
||||
// Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations.
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
// ION file descriptor
|
||||
int ion_filedesc;
|
||||
|
||||
// Host pointer to the ION allocated memory
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
#endif /* CL_VERSION_1_1 */
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
@@ -21,20 +21,11 @@
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
/*
|
||||
* cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
|
||||
* OpenGL dependencies. The application is responsible for #including
|
||||
* OpenGL or OpenGL ES headers before #including cl_gl.h.
|
||||
*/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#include <OpenGL/CGLDevice.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
@@ -48,15 +39,21 @@ typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type */
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context /* context */,
|
||||
@@ -65,20 +62,12 @@ clCreateFromGLBuffer(cl_context /* context */,
|
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
clCreateFromGLTexture(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context /* context */,
|
||||
@@ -114,6 +103,24 @@ clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
// Deprecated OpenCL 1.1 APIs
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
@@ -49,16 +49,72 @@ extern "C" {
|
||||
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
|
||||
#define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
|
||||
#define CL_API_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
|
||||
#define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
|
||||
|
||||
#ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
|
||||
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
|
||||
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
|
||||
#else
|
||||
#warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here!
|
||||
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
|
||||
#endif
|
||||
#else
|
||||
#define CL_EXTENSION_WEAK_LINK
|
||||
#define CL_API_SUFFIX__VERSION_1_0
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0
|
||||
#define CL_API_SUFFIX__VERSION_1_1
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1
|
||||
#define CL_API_SUFFIX__VERSION_1_2
|
||||
#define CL_EXT_SUFFIX__VERSION_1_2
|
||||
|
||||
#ifdef __GNUC__
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
|
||||
#else
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
|
||||
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
|
||||
#else
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
|
||||
#endif
|
||||
#elif _WIN32
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
|
||||
#else
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
|
||||
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
|
||||
#else
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
#else
|
||||
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
|
||||
|
||||
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
|
||||
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined (_WIN32) && defined(_MSC_VER))
|
||||
@@ -252,7 +308,7 @@ typedef double cl_double __attribute__((aligned(8)));
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
|
||||
/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
|
||||
typedef unsigned int cl_GLuint;
|
||||
typedef int cl_GLint;
|
||||
typedef unsigned int cl_GLenum;
|
||||
@@ -1170,13 +1226,13 @@ typedef union
|
||||
/* Macro to facilitate debugging
|
||||
* Usage:
|
||||
* Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
|
||||
* The first line ends with: CL_PROGRAM_STRING_BEGIN \"
|
||||
* The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \"
|
||||
* Each line thereafter of OpenCL C source must end with: \n\
|
||||
* The last line ends in ";
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* const char *my_program = CL_PROGRAM_STRING_BEGIN "\
|
||||
* const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
|
||||
* kernel void foo( int a, float * b ) \n\
|
||||
* { \n\
|
||||
* // my comment \n\
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2010 The Khronos Group Inc.
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
|
@@ -147,7 +147,7 @@ protected:
|
||||
static std::string vstore(unsigned int simd_width, std::string const & value, std::string const & offset, std::string const & ptr);
|
||||
static std::string vload(unsigned int simd_width, std::string const & offset, std::string const & ptr);
|
||||
static std::string append_width(std::string const & str, unsigned int width);
|
||||
bool has_strided_access(symbolic_expressions_container const & symbolic_expressions) const;
|
||||
static bool requires_fallback(symbolic_expressions_container const & symbolic_expressions);
|
||||
void set_arguments(symbolic_expressions_container const & symbolic_expressions, cl::Kernel & kernel, unsigned int & current_arg);
|
||||
|
||||
|
||||
@@ -176,8 +176,6 @@ class base_impl : public base
|
||||
{
|
||||
private:
|
||||
virtual int check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const;
|
||||
protected:
|
||||
bool has_misaligned_offset(symbolic_expressions_container const & symbolic_expressions);
|
||||
public:
|
||||
typedef ParametersType parameters_type;
|
||||
base_impl(parameters_type const & parameters, binding_policy_t binding_policy);
|
||||
|
@@ -11,7 +11,7 @@ namespace atidlas
|
||||
namespace cl_ext
|
||||
{
|
||||
|
||||
typedef std::map<cl::Program, cl::Kernel, cl_ext::compare> kernels_t;
|
||||
typedef std::map<std::pair<cl_program, unsigned int>, cl::Kernel> kernels_t;
|
||||
typedef std::vector<std::pair<cl::Context, std::vector<cl::CommandQueue> > > queues_t;
|
||||
|
||||
queues_t init_queues();
|
||||
|
@@ -5,7 +5,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_library(atidlas SHARED ${LIBATIDLAS_SRC})
|
||||
set_target_properties(atidlas PROPERTIES
|
||||
COMPILE_FLAGS "-Wno-sign-compare -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic")
|
||||
COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic")
|
||||
|
||||
|
||||
#install(TARGETS atidlas LIBRARY DESTINATION lib)
|
||||
|
@@ -366,17 +366,13 @@ bool base::is_strided(symbolic_expression_node const & node)
|
||||
|| node.op.type==OPERATOR_OUTER_PROD_TYPE;
|
||||
}
|
||||
|
||||
bool base::has_strided_access(symbolic_expressions_container const & symbolic_expressions) const
|
||||
bool base::requires_fallback(symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it)
|
||||
{
|
||||
std::vector<lhs_rhs_element> arrays = filter_elements(DENSE_ARRAY_TYPE, **it);
|
||||
for (std::vector<lhs_rhs_element>::iterator itt = arrays.begin(); itt != arrays.end(); ++itt)
|
||||
if(std::max(itt->array.stride1, itt->array.stride2)>1)
|
||||
for(symbolic_expression::container_type::const_iterator itt = (*it)->tree().begin(); itt != (*it)->tree().end() ; ++itt)
|
||||
if( (itt->lhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->lhs.array.stride1, itt->lhs.array.stride2)>1 || std::max(itt->lhs.array.start1,itt->lhs.array.start2)>0))
|
||||
|| (itt->rhs.subtype==DENSE_ARRAY_TYPE && (std::max(itt->rhs.array.stride1, itt->rhs.array.stride2)>1 || std::max(itt->rhs.array.start1,itt->rhs.array.start2)>0)))
|
||||
return true;
|
||||
if(filter_nodes(&is_strided, **it, true).empty()==false)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -520,19 +516,6 @@ template<class TType, class PType>
|
||||
int base_impl<TType, PType>::check_invalid_impl(cl::Device const &, symbolic_expressions_container const &) const
|
||||
{ return TEMPLATE_VALID; }
|
||||
|
||||
template<class TType, class PType>
|
||||
bool base_impl<TType, PType>::has_misaligned_offset(symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it)
|
||||
{
|
||||
std::vector<lhs_rhs_element> arrays = filter_elements(DENSE_ARRAY_TYPE, **it);
|
||||
for (std::vector<lhs_rhs_element>::iterator itt = arrays.begin(); itt != arrays.end(); ++itt)
|
||||
if (itt->array.start1>0 || itt->array.start2>0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class TType, class PType>
|
||||
base_impl<TType, PType>::base_impl(parameters_type const & parameters, binding_policy_t binding_policy) : base(binding_policy), p_(parameters)
|
||||
{ }
|
||||
|
@@ -225,7 +225,7 @@ void mreduction::enqueue(cl::CommandQueue & queue,
|
||||
|
||||
//Kernel
|
||||
int idx = 0;
|
||||
if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && has_strided_access(symbolic_expressions))
|
||||
if(reduction_type_==REDUCE_COLUMNS && p_.simd_width>1 && requires_fallback(symbolic_expressions))
|
||||
idx = 1;
|
||||
cl::Program & program = programs[idx].program();
|
||||
cl::Kernel kernel(program, kname);
|
||||
|
@@ -286,7 +286,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
symbolic_expressions_container const & symbolic_expressions)
|
||||
{
|
||||
//Preprocessing
|
||||
std::vector<int_t> size = input_sizes(symbolic_expressions);
|
||||
int_t size = input_sizes(symbolic_expressions)[0];
|
||||
std::vector<symbolic_expression_node const *> reductions;
|
||||
for (symbolic_expressions_container::data_type::const_iterator it = symbolic_expressions.data().begin(); it != symbolic_expressions.data().end(); ++it)
|
||||
{
|
||||
@@ -303,7 +303,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
fill_kernel_name(kopt[0], label, "o0");
|
||||
fill_kernel_name(kopt[1], label, "o1");
|
||||
|
||||
bool fallback = has_strided_access(symbolic_expressions) && p_.simd_width > 1;
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(symbolic_expressions) || (size%p_.simd_width>0));
|
||||
cl::Program & program = programs[fallback?0:1].program();
|
||||
cl::Kernel kernels[2] = { cl::Kernel(program, fallback?kfallback[0]:kopt[0]),
|
||||
cl::Kernel(program, fallback?kfallback[1]:kopt[1]) };
|
||||
@@ -319,7 +319,7 @@ void reduction::enqueue(cl::CommandQueue & queue,
|
||||
for (unsigned int k = 0; k < 2; k++)
|
||||
{
|
||||
unsigned int n_arg = 0;
|
||||
kernels[k].setArg(n_arg++, cl_uint(size[0]));
|
||||
kernels[k].setArg(n_arg++, cl_uint(size));
|
||||
|
||||
//Temporary buffers
|
||||
unsigned int i = 0;
|
||||
|
@@ -117,11 +117,15 @@ void vaxpy::enqueue(cl::CommandQueue & queue,
|
||||
char kopt[10];
|
||||
fill_kernel_name(kfb, label, "f");
|
||||
fill_kernel_name(kopt, label, "o");
|
||||
bool strided = has_strided_access(symbolic_expressions);
|
||||
bool misaligned = has_misaligned_offset(symbolic_expressions);
|
||||
bool fallback = p_.simd_width > 1 && (strided || (size%p_.simd_width>0) || misaligned);
|
||||
bool fallback = p_.simd_width > 1 && (requires_fallback(symbolic_expressions) || (size%p_.simd_width>0));
|
||||
|
||||
cl::Program const & program = programs[fallback?0:1].program();
|
||||
cl::Kernel kernel(program, fallback?kfb:kopt);
|
||||
cl_ext::kernels_t::key_type key(program(), label);
|
||||
cl_ext::kernels_t::iterator it = cl_ext::kernels.find(key);
|
||||
if(it==cl_ext::kernels.end())
|
||||
it = cl_ext::kernels.insert(std::make_pair(key, cl::Kernel(program, fallback?kfb:kopt))).first;
|
||||
cl::Kernel & kernel = it->second;
|
||||
|
||||
//NDRange
|
||||
cl::NDRange grange(p_.local_size_0*p_.num_groups);
|
||||
cl::NDRange lrange(p_.local_size_0);
|
||||
|
@@ -36,27 +36,27 @@ cl::Program program_map::add(cl::Context & context, std::string const & pname, s
|
||||
|
||||
// Retrieves the program in the cache
|
||||
bool compile = true;
|
||||
if (cache_path_.size())
|
||||
{
|
||||
std::string prefix;
|
||||
for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
prefix += it->getInfo<CL_DEVICE_NAME>() + it->getInfo<CL_DEVICE_VENDOR>() + it->getInfo<CL_DEVICE_VERSION>();
|
||||
std::string sha1 = tools::sha1(prefix + source);
|
||||
// if (cache_path_.size())
|
||||
// {
|
||||
// std::string prefix;
|
||||
// for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
// prefix += it->getInfo<CL_DEVICE_NAME>() + it->getInfo<CL_DEVICE_VENDOR>() + it->getInfo<CL_DEVICE_VERSION>();
|
||||
// std::string sha1 = tools::sha1(prefix + source);
|
||||
|
||||
std::ifstream cached((cache_path_+sha1).c_str(),std::ios::binary);
|
||||
if (cached)
|
||||
{
|
||||
std::size_t len;
|
||||
std::vector<char> buffer;
|
||||
cached.read((char*)&len, sizeof(std::size_t));
|
||||
buffer.resize(len);
|
||||
cached.read((char*)buffer.data(), std::streamsize(len));
|
||||
char* cbuffer = buffer.data();
|
||||
res = cl::Program(context, devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)), NULL, &err);
|
||||
compile = false;
|
||||
}
|
||||
}
|
||||
//Gets from source
|
||||
// std::ifstream cached((cache_path_+sha1).c_str(),std::ios::binary);
|
||||
// if (cached)
|
||||
// {
|
||||
// std::size_t len;
|
||||
// std::vector<char> buffer;
|
||||
// cached.read((char*)&len, sizeof(std::size_t));
|
||||
// buffer.resize(len);
|
||||
// cached.read((char*)buffer.data(), std::streamsize(len));
|
||||
// char* cbuffer = buffer.data();
|
||||
// res = cl::Program(context, devices, cl::Program::Binaries(1, std::make_pair(cbuffer, len)), NULL, &err);
|
||||
// compile = false;
|
||||
// }
|
||||
// }
|
||||
// //Gets from source
|
||||
if(compile)
|
||||
{
|
||||
const char * csrc = source.c_str();
|
||||
@@ -80,7 +80,6 @@ cl::Program program_map::add(cl::Context & context, std::string const & pname, s
|
||||
{
|
||||
std::vector<std::size_t> sizes = res.getInfo<CL_PROGRAM_BINARY_SIZES>();
|
||||
std::vector<char*> binaries = res.getInfo<CL_PROGRAM_BINARIES>();
|
||||
|
||||
std::string prefix;
|
||||
for(std::vector< cl::Device >::const_iterator it = devices.begin(); it != devices.end(); ++it)
|
||||
prefix += it->getInfo<CL_DEVICE_NAME>() + it->getInfo<CL_DEVICE_VENDOR>() + it->getInfo<CL_DEVICE_VERSION>();
|
||||
|
@@ -195,6 +195,7 @@ void import(std::string const & fname, cl::CommandQueue & queue, model_map_t& re
|
||||
//Parse the JSON document
|
||||
js::Document document;
|
||||
std::ifstream t(fname.c_str());
|
||||
if(!t) return;
|
||||
std::string str;
|
||||
t.seekg(0, std::ios::end);
|
||||
str.reserve(t.tellg());
|
||||
|
@@ -129,7 +129,6 @@ class GeneticOperators(object):
|
||||
def evaluate(self, individual):
|
||||
if tuple(individual) not in self.cache:
|
||||
parameters = self.decode(individual)
|
||||
parameters = [2,16,16,16,6,1,6,atd.fetching_policy_type.FETCH_FROM_LOCAL,atd.fetching_policy_type.FETCH_FROM_LOCAL,16,16]
|
||||
template = self.Template(*parameters)
|
||||
try:
|
||||
tt = misc_tools.benchmark(template, self.symbolic)
|
||||
|
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user