diff --git a/CMakeLists.txt b/CMakeLists.txt index 34c8500b5..5530aeba8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,6 @@ endif() add_definitions(-Wno-sign-compare ${BACKEND_DEFINES} -Wall -Wextra -pedantic -std=c++11) add_executable(bin2cpp ${CMAKE_MODULE_PATH}/helpers/bin2cpp.cpp) - file(GLOB_RECURSE LIBISAAC_SRC lib/*.cpp) #Python wrapper diff --git a/bench/blas.cpp b/bench/blas.cpp index 6a5e7c7ba..925fd6c0e 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -17,8 +17,8 @@ #include #define HAS_A_BLAS defined(BENCH_CBLAS) or defined(BENCH_CLBLAS) or defined(BENCH_CUBLAS) -namespace ad = isaac; -typedef ad::int_t int_t; +namespace isc = isaac; +typedef isc::int_t int_t; template struct int_{}; @@ -85,11 +85,11 @@ T mean(std::vector x) return res/N; } -static double time_event(unsigned long sum, ad::driver::Event const & e) +static double time_event(unsigned long sum, isc::driver::Event const & e) { return sum + e.elapsed_time();} template -void bench(ad::numeric_type dtype, std::string operation) +void bench(isc::numeric_type dtype, std::string operation) { // @@ -101,8 +101,8 @@ void bench(ad::numeric_type dtype, std::string operation) std::vector times;\ double total_time = 0;\ while(total_time*1e-9 < 1e-3){\ - std::list events;\ - flush = ad::zeros(1e6, 1, dtype);\ + std::list events;\ + flush = isc::zeros(1e6, 1, dtype);\ OP;\ queue.synchronize();\ times.push_back(std::accumulate(events.begin(), events.end(), 0, &time_event));\ @@ -118,7 +118,7 @@ void bench(ad::numeric_type dtype, std::string operation) double total_time = 0;\ while(total_time*1e-9 < 1e-3){\ cl::Event event;\ - flush = ad::zeros(1e6, 1, dtype);\ + flush = isc::zeros(1e6, 1, dtype);\ OP;\ queue.synchronize();\ times.push_back(event.getProfilingInfo() - event.getProfilingInfo());\ @@ -130,7 +130,7 @@ void bench(ad::numeric_type dtype, std::string operation) #define BENCHMARK_HOST(OP, PERF) \ {\ - ad::tools::timer tmr;\ + isc::tools::timer tmr;\ double total_time = 0;\ std::vector times;\ while(total_time < 1e-2){\ @@ -156,7 +156,7 @@ void bench(ad::numeric_type dtype, std::string operation) OP;\ cudaThreadSynchronize();\ while(total_time*1e-3 < 1e-3){\ - flush = ad::zeros(1e6, 1, dtype);\ + flush = isc::zeros(1e6, 1, dtype);\ cudaEventRecord(start,0);\ OP;\ cudaEventRecord(stop,0);\ @@ -169,10 +169,10 @@ void bench(ad::numeric_type dtype, std::string operation) std::cout << "\t" << PERF << std::flush;\ } - unsigned int dtsize = ad::size_of(dtype); - ad::driver::CommandQueue & queue = ad::driver::queues.default_queues()[0]; + unsigned int dtsize = isc::size_of(dtype); + isc::driver::CommandQueue & queue = isc::driver::queues.default_queues()[0]; std::map metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}}; - ad::array flush(1e6, dtype); + isc::array flush(1e6, dtype); std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl; std::cout << "N"; std::cout << "\tISAAC"; @@ -200,10 +200,10 @@ void bench(ad::numeric_type dtype, std::string operation) for(int_t N: create_log_range(1e3, 2e7, 50, 64)) { std::cout << N; - ad::array x(N, dtype), y(N, dtype); + isc::array x(N, dtype), y(N, dtype); /* ISAAC */ - std::list events;\ - BENCHMARK_ISAAC(y = ad::control(x + alpha*y, ad::execution_options_type(0, &events)), 3*N*dtsize/t) + std::list events;\ + BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t) /* clblas */ #ifdef BENCH_CLBLAS BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event()), 3*N*dtsize/t) @@ -211,8 +211,8 @@ void bench(ad::numeric_type dtype, std::string operation) /* BLAS */ #ifdef BENCH_CBLAS std::vector cx(N), cy(N); - ad::copy(x, cx); - ad::copy(y, cy); + isc::copy(x, cx); + isc::copy(y, cy); BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t); #endif /* CuBLAS */ @@ -234,11 +234,11 @@ void bench(ad::numeric_type dtype, std::string operation) { std::cout << N; /* ISAAC */ - ad::array x(N, dtype), y(N, dtype); - ad::array scratch(N, dtype); - ad::scalar s(dtype); + isc::array x(N, dtype), y(N, dtype); + isc::array scratch(N, dtype); + isc::scalar s(dtype); s = dot(x,y); queue.synchronize(); - BENCHMARK_ISAAC(s = ad::control(dot(x,y), ad::execution_options_type(0, &events)), 2*N*dtsize/t) + BENCHMARK_ISAAC(s = isc::control(dot(x,y), isc::execution_options_type(0, &events)), 2*N*dtsize/t) /* clblas */ #ifdef BENCH_CLBLAS BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event()), 2*N*dtsize/t) @@ -246,8 +246,8 @@ void bench(ad::numeric_type dtype, std::string operation) /* BLAS */ #ifdef BENCH_CBLAS std::vector cx(N), cy(N); - ad::copy(x, cx); - ad::copy(y, cy); + isc::copy(x, cx); + isc::copy(y, cy); BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t); #endif #ifdef BENCH_CUBLAS @@ -284,20 +284,20 @@ void bench(ad::numeric_type dtype, std::string operation) int_t N = std::get<1>(MN); std::cout << M << "," << N; /* ISAAC */ - ad::array A(N, M, dtype), y(M, dtype), x(N, dtype); + isc::array A(N, M, dtype), y(M, dtype), x(N, dtype); #if HAS_A_BLAS int_t lda = A.ld(); #endif y = dot(trans(A),x); queue.synchronize(); - BENCHMARK_ISAAC(y = ad::control(dot(trans(A),x), ad::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t); + BENCHMARK_ISAAC(y = isc::control(dot(trans(A),x), isc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t); #ifdef BENCH_CLBLAS BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, clblasTrans, N, M, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event()), (M*N + M + N)*dtsize/t) #endif #ifdef BENCH_CBLAS std::vector cA(N*M), cx(N), cy(M); - ad::copy(x, cx); - ad::copy(y, cy); - ad::copy(A, cA); + isc::copy(x, cx); + isc::copy(y, cy); + isc::copy(A, cA); BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t); #endif #ifdef BENCH_CUBLAS @@ -359,11 +359,11 @@ void bench(ad::numeric_type dtype, std::string operation) int_t Bs1 = K, Bs2 = N; if(BT) std::swap(Bs1, Bs2); - ad::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype); + isc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype); #if HAS_A_BLAS int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld(); #endif - BENCHMARK_ISAAC(C = ad::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), ad::execution_options_type(0, &events)), (double)2*M*N*K/t); + BENCHMARK_ISAAC(C = isc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), isc::execution_options_type(0, &events)), (double)2*M*N*K/t); /* clblas */ #ifdef BENCH_CLBLAS BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb, @@ -372,9 +372,9 @@ void bench(ad::numeric_type dtype, std::string operation) /* BLAS */ #ifdef BENCH_CBLAS std::vector cC(M*N), cA(M*K), cB(N*K); - ad::copy(C, cC); - ad::copy(A, cA); - ad::copy(B, cB); + isc::copy(C, cC); + isc::copy(A, cA); + isc::copy(B, cB); BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t); #endif #ifdef BENCH_CUBLAS @@ -399,10 +399,10 @@ int main(int argc, char* argv[]) #ifdef BENCH_CLBLAS clblasSetup(); #endif - ad::driver::queues.queue_properties = CL_QUEUE_PROFILING_ENABLE; + isc::driver::queues.queue_properties = CL_QUEUE_PROFILING_ENABLE; int device_idx = 0; - ad::driver::queues_type::container_type queues = ad::driver::queues.contexts(); + isc::driver::queues_type::container_type queues = isc::driver::queues.contexts(); std::string operation; if(queues.size() > 1) @@ -412,9 +412,9 @@ int main(int argc, char* argv[]) std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl; std::cout << "Devices available: " << std::endl; unsigned int current=0; - for(ad::driver::queues_type::container_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it) + for(isc::driver::queues_type::container_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it) { - ad::driver::Device device = it->first.device(); + isc::driver::Device device = it->first.device(); std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; } exit(EXIT_FAILURE); @@ -432,10 +432,10 @@ int main(int argc, char* argv[]) operation = args[1]; } - ad::driver::queues.default_device = device_idx; + isc::driver::queues.default_device = device_idx; std::cout << "#Benchmark : BLAS" << std::endl; std::cout << "#----------------" << std::endl; - bench(ad::FLOAT_TYPE, operation); + bench(isc::FLOAT_TYPE, operation); #ifdef BENCH_CLBLAS clblasTeardown(); diff --git a/bench/overhead.cpp b/bench/overhead.cpp index 7198ca6af..a0cdb6d6d 100644 --- a/bench/overhead.cpp +++ b/bench/overhead.cpp @@ -3,7 +3,7 @@ #include -namespace ad = isaac; +namespace isc = isaac; #ifdef BENCH_CUBLAS __global__ void dummy(){} @@ -12,7 +12,7 @@ __global__ void dummy(){} int main() { - for(ad::driver::queues_type::data_type::const_iterator it = ad::driver::queues.data().begin() ; it != ad::driver::queues.data().end() ; ++it) + for(isc::driver::queues_type::data_type::const_iterator it = isc::driver::queues.data().begin() ; it != isc::driver::queues.data().end() ; ++it) { cl::CommandQueue queue = it->second[0]; cl::Context context = it->first; diff --git a/cmake/FindOpenCL.cmake b/cmake/FindOpenCL.cmake index e707ff3e8..c0874b0c8 100644 --- a/cmake/FindOpenCL.cmake +++ b/cmake/FindOpenCL.cmake @@ -17,7 +17,7 @@ else() set(L_HINTS ${L_HINTS} ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/) endif() -find_library(OPENCL_LIBRARIES NAMES OpenCL NO_CMAKE_FIND_ROOT_PATH HINTS ${L_HINTS} ) +find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${L_HINTS} ) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES) mark_as_advanced(OpenCL) diff --git a/cmake/android/toolchain.cmake b/cmake/toolchain/android.cmake similarity index 100% rename from cmake/android/toolchain.cmake rename to cmake/toolchain/android.cmake diff --git a/cmake/toolchain/cross-win32-mingw32.cmake b/cmake/toolchain/cross-win32-mingw32.cmake new file mode 100644 index 000000000..4f0a7324f --- /dev/null +++ b/cmake/toolchain/cross-win32-mingw32.cmake @@ -0,0 +1,12 @@ +#System +SET(CMAKE_SYSTEM_NAME Windows) +#Compilers +SET(CMAKE_C_COMPILER /usr/bin/i686-w64-mingw32-gcc) +SET(CMAKE_CXX_COMPILER /usr/bin/i686-w64-mingw32-g++) +SET(CMAKE_RC_COMPILER /usr/bin/i686-w64-mingw32-windres) +# search headers and libraries in the target environment, search +# programs in the host environment +SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/i686-w64-mingw32) +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/cmake/toolchain/cross-win64-mingw32.cmake b/cmake/toolchain/cross-win64-mingw32.cmake new file mode 100644 index 000000000..1b67b5a52 --- /dev/null +++ b/cmake/toolchain/cross-win64-mingw32.cmake @@ -0,0 +1,12 @@ +#System +SET(CMAKE_SYSTEM_NAME Windows) +#Compilers +SET(CMAKE_C_COMPILER /usr/bin/x86_64-w64-mingw32-gcc) +SET(CMAKE_CXX_COMPILER /usr/bin/x86_64-w64-mingw32-g++) +SET(CMAKE_RC_COMPILER /usr/bin/x86_64-w64-mingw32-windres) +# search headers and libraries in the target environment, search +# programs in the host environment +SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/x86_64-w64-mingw32) +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/include/isaac/value_scalar.h b/include/isaac/value_scalar.h index c6cfc59f8..4ad90cfa0 100644 --- a/include/isaac/value_scalar.h +++ b/include/isaac/value_scalar.h @@ -1,8 +1,8 @@ #ifndef ISAAC_VALUE_SCALAR_H #define ISAAC_VALUE_SCALAR_H +#include #include "isaac/types.h" -#include "stdint.h" namespace isaac { @@ -13,13 +13,13 @@ class array_expression; union values_holder { int8_t int8; - u_int8_t uint8; + uint8_t uint8; int16_t int16; - u_int16_t uint16; + uint16_t uint16; int32_t int32; - u_int32_t uint32; + uint32_t uint32; int64_t int64; - u_int64_t uint64; + uint64_t uint64; float float32; double float64; }; @@ -72,13 +72,13 @@ private: }; value_scalar int8(int8_t v); -value_scalar uint8(u_int8_t v); +value_scalar uint8(uint8_t v); value_scalar int16(int16_t v); -value_scalar uint16(u_int16_t v); +value_scalar uint16(uint16_t v); value_scalar int32(int32_t v); -value_scalar uint32(u_int32_t v); +value_scalar uint32(uint32_t v); value_scalar int64(int64_t v); -value_scalar uint64(u_int64_t v); +value_scalar uint64(uint64_t v); value_scalar float32(float v); value_scalar float64(double v); diff --git a/lib/value_scalar.cpp b/lib/value_scalar.cpp index 3ae5a44ba..62642cd0d 100644 --- a/lib/value_scalar.cpp +++ b/lib/value_scalar.cpp @@ -91,13 +91,13 @@ T value_scalar::cast() const #undef INSTANTIATE value_scalar int8(int8_t v) { return value_scalar(v); } -value_scalar uint8(u_int8_t v) { return value_scalar(v); } +value_scalar uint8(uint8_t v) { return value_scalar(v); } value_scalar int16(int16_t v) { return value_scalar(v); } -value_scalar uint16(u_int16_t v) { return value_scalar(v); } +value_scalar uint16(uint16_t v) { return value_scalar(v); } value_scalar int32(int32_t v) { return value_scalar(v); } -value_scalar uint32(u_int32_t v) { return value_scalar(v); } +value_scalar uint32(uint32_t v) { return value_scalar(v); } value_scalar int64(int64_t v) { return value_scalar(v); } -value_scalar uint64(u_int64_t v) { return value_scalar(v); } +value_scalar uint64(uint64_t v) { return value_scalar(v); } value_scalar float32(float v) { return value_scalar(v); } value_scalar float64(double v) { return value_scalar(v); } diff --git a/python/setup.py b/python/setup.py index 2448a4f66..da08ef153 100644 --- a/python/setup.py +++ b/python/setup.py @@ -115,7 +115,7 @@ def main(): include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] #Source files - src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/kernel.cpp src/lib/driver/event.cpp src/lib/driver/command_queue.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] + src = 'src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/model/predictors/random_forest.cpp src/lib/model/model.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] boostsrc = 'external/boost/libs/' for s in ['numpy','python','smart_ptr','system','thread']: src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] diff --git a/tests/linalg/axpy.cpp b/tests/linalg/axpy.cpp index c90c1cae0..1a3d9b8ae 100644 --- a/tests/linalg/axpy.cpp +++ b/tests/linalg/axpy.cpp @@ -4,19 +4,19 @@ #include "isaac/array.h" #include "isaac/wrap/clBLAS.h" -namespace ad = isaac; +namespace isc = isaac; typedef isaac::int_t int_t; template void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vector_base& cy, simple_vector_base& cz, - ad::array& x, ad::array& y, ad::array& z) + isc::array& x, isc::array& y, isc::array& z) { using namespace std; int failure_count = 0; - ad::numeric_type dtype = x.dtype(); - ad::driver::Context const & ctx = x.context(); - ad::driver::CommandQueue queue = ad::driver::queues[ctx][0]; + isc::numeric_type dtype = x.dtype(); + isc::driver::Context const & ctx = x.context(); + isc::driver::CommandQueue queue = isc::driver::queues[ctx][0]; cl_command_queue clqueue = (*queue.handle().cl)(); int_t N = cz.size(); @@ -114,7 +114,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect } template -void test_impl(T epsilon, ad::driver::Context const & ctx) +void test_impl(T epsilon, isc::driver::Context const & ctx) { using isaac::_; @@ -140,10 +140,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx) int main() { clblasSetup(); - auto data = ad::driver::queues.contexts(); + auto data = isc::driver::queues.contexts(); for(const auto & elem : data) { - ad::driver::Device device = elem.second[0].device(); + isc::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; diff --git a/tests/linalg/common.hpp b/tests/linalg/common.hpp index 083d14250..def9aed6e 100644 --- a/tests/linalg/common.hpp +++ b/tests/linalg/common.hpp @@ -10,7 +10,11 @@ template struct BLAS; template<> struct BLAS { template static FT F(FT SAXPY, DT ) { return SAXPY; } }; template<> struct BLAS { template static DT F(FT , DT DAXPY) { return DAXPY; } }; -enum interface_t{clBLAS, CPP}; +enum interface_t +{ + clBLAS, + CPP +}; #define CHANDLE(X) (*X.data().handle().cl)() #define OFF(X) X.start()[0] + X.start()[1]*X.ld() diff --git a/tests/linalg/dot.cpp b/tests/linalg/dot.cpp index 4c6d00694..8368c095d 100644 --- a/tests/linalg/dot.cpp +++ b/tests/linalg/dot.cpp @@ -5,23 +5,23 @@ #include "isaac/array.h" #include "isaac/wrap/clBLAS.h" -namespace ad = isaac; -typedef ad::int_t int_t; +namespace isc = isaac; +typedef isc::int_t int_t; template void test_reduction(T epsilon, simple_vector_base & cx, simple_vector_base & cy, - ad::array & x, ad::array & y) + isc::array & x, isc::array & y) { using namespace std; - ad::driver::Context const & ctx = x.context(); + isc::driver::Context const & ctx = x.context(); int_t N = cx.size(); - ad::driver::CommandQueue queue = ad::driver::queues[ctx][0]; + isc::driver::CommandQueue queue = isc::driver::queues[ctx][0]; cl_command_queue clqueue = (*queue.handle().cl)(); - ad::array scratch(N, x.dtype()); + isc::array scratch(N, x.dtype()); unsigned int failure_count = 0; - isaac::numeric_type dtype = ad::to_numeric_type::value; + isaac::numeric_type dtype = isc::to_numeric_type::value; T cs = 0; T tmp = 0; @@ -67,7 +67,7 @@ void test_reduction(T epsilon, simple_vector_base & cx, simple_vector_base -void test_impl(T epsilon, ad::driver::Context const & ctx) +void test_impl(T epsilon, isc::driver::Context const & ctx) { using isaac::_; @@ -90,10 +90,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx) int main() { clblasSetup(); - auto data = ad::driver::queues.contexts(); + auto data = isc::driver::queues.contexts(); for(const auto & elem : data) { - ad::driver::Device device = elem.second[0].device(); + isc::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; diff --git a/tests/linalg/gemm.cpp b/tests/linalg/gemm.cpp index e01770ee8..69818a3aa 100644 --- a/tests/linalg/gemm.cpp +++ b/tests/linalg/gemm.cpp @@ -4,23 +4,23 @@ #include "isaac/model/model.h" #include "isaac/wrap/clBLAS.h" -namespace ad = isaac; +namespace isc = isaac; template void test_impl(T epsilon, simple_matrix_base & cC, simple_matrix_base const & cA, simple_matrix_base const & cB, - ad::array & C, ad::array const & A, ad::array const & AT, ad::array const & B, ad::array const & BT, - interface_t interface, const char * prefix) + isc::array & C, isc::array const & A, isc::array const & AT, isc::array const & B, isc::array const & BT, + interface_t interf, const char * prefix) { int failure_count = 0; - ad::int_t M = C.shape()[0]; - ad::int_t N = C.shape()[1]; - ad::int_t K = A.shape()[1]; + isc::int_t M = C.shape()[0]; + isc::int_t N = C.shape()[1]; + isc::int_t K = A.shape()[1]; T alpha = 1; T beta = 0; - ad::driver::CommandQueue queue = ad::driver::queues[C.context()][0]; + isc::driver::CommandQueue queue = isc::driver::queues[C.context()][0]; for(int i = 0 ; i < M ; ++i) { @@ -44,7 +44,7 @@ void test_impl(T epsilon, simple_matrix_base & cC, simple_matrix_base cons std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\ GPU_OP;\ queue.synchronize();\ - ad::copy(C, buffer);\ + isc::copy(C, buffer);\ if(diff(buffer, cCbuffer, epsilon))\ {\ failure_count++;\ @@ -53,7 +53,7 @@ void test_impl(T epsilon, simple_matrix_base & cC, simple_matrix_base cons else\ std::cout << std::endl; - if(interface==clBLAS) + if(interf==clBLAS) { cl_command_queue clqueue = (*queue.handle().cl)(); @@ -95,7 +95,7 @@ void test_impl(T epsilon, simple_matrix_base & cC, simple_matrix_base cons } template -void test_impl(T epsilon, ad::driver::Context const & ctx) +void test_impl(T epsilon, isc::driver::Context const & ctx) { int_t M = 173; int_t N = 256; @@ -126,10 +126,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx) int main() { clblasSetup(); - auto data = ad::driver::queues.contexts(); + auto data = isc::driver::queues.contexts(); for(const auto & elem : data) { - ad::driver::Device device = elem.second[0].device(); + isc::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; diff --git a/tests/linalg/gemv.cpp b/tests/linalg/gemv.cpp index 972d28358..014f43de8 100644 --- a/tests/linalg/gemv.cpp +++ b/tests/linalg/gemv.cpp @@ -3,24 +3,24 @@ #include "isaac/array.h" #include "isaac/wrap/clBLAS.h" -namespace ad = isaac; +namespace isc = isaac; template void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matrix_base const & cA, simple_vector_base & cx, - ad::array & y, ad::array const & A, ad::array & x, interface_t interface, const char * prefix) + isc::array & y, isc::array const & A, isc::array & x, interface_t interf, const char * prefix) { int failure_count = 0; - ad::int_t M = A.shape()[0]; - ad::int_t N = A.shape()[1]; + isc::int_t M = A.shape()[0]; + isc::int_t N = A.shape()[1]; simple_vector bufy(M); simple_vector bufx(N); T alpha = 4.2, beta = 5.6; - ad::driver::CommandQueue queue = ad::driver::queues[y.context()][0]; + isc::driver::CommandQueue queue = isc::driver::queues[y.context()][0]; T yi = 0, xi = 0; #define TEST_OPERATION(NAME, SIZE1, SIZE2, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\ @@ -35,7 +35,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matri }\ GPU_REDUCTION;\ queue.synchronize();\ - ad::copy(RES, BUF.data());\ + isc::copy(RES, BUF.data());\ if(diff(CRES, BUF, epsilon))\ {\ failure_count++;\ @@ -45,7 +45,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matri std::cout << std::endl; - if(interface==clBLAS) + if(interf==clBLAS) { cl_command_queue clqueue = (*queue.handle().cl)(); @@ -81,7 +81,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matri } template -void test_impl(T epsilon, ad::driver::Context const & ctx) +void test_impl(T epsilon, isc::driver::Context const & ctx) { int_t M = 1324; int_t N = 1143; @@ -106,10 +106,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx) int main() { clblasSetup(); - auto data = ad::driver::queues.contexts(); + auto data = isc::driver::queues.contexts(); for(const auto & elem : data) { - ad::driver::Device device = elem.second[0].device(); + isc::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; diff --git a/tests/linalg/ger.cpp b/tests/linalg/ger.cpp index 139df070f..cdf5eb699 100644 --- a/tests/linalg/ger.cpp +++ b/tests/linalg/ger.cpp @@ -1,18 +1,18 @@ #include "common.hpp" #include "isaac/array.h" -namespace ad = isaac; +namespace isc = isaac; typedef isaac::int_t int_t; template void test(T epsilon, simple_matrix_base & cA, simple_matrix_base& cB, simple_matrix_base& cC, simple_vector_base& cx, simple_vector_base& cy, - ad::array& A, ad::array& B, ad::array& C, ad::array& x, ad::array& y) + isc::array& A, isc::array& B, isc::array& C, isc::array& x, isc::array& y) { using namespace std; int failure_count = 0; - ad::numeric_type dtype = C.dtype(); - ad::driver::Context const & ctx = C.context(); + isc::numeric_type dtype = C.dtype(); + isc::driver::Context const & ctx = C.context(); int_t M = cC.size1(); int_t N = cC.size2(); @@ -98,7 +98,7 @@ void test(T epsilon, simple_matrix_base & cA, simple_matrix_base& cB, simp } template -void test_impl(T epsilon, ad::driver::Context const & ctx) +void test_impl(T epsilon, isc::driver::Context const & ctx) { using isaac::_; @@ -123,10 +123,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx) int main() { - auto data = ad::driver::queues.contexts(); + auto data = isc::driver::queues.contexts(); for(const auto & elem : data) { - ad::driver::Device device = elem.second[0].device(); + isc::driver::Device device = elem.second[0].device(); std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; diff --git a/tests/linalg/model.cpp b/tests/linalg/model.cpp index 32e999854..f6649e5f6 100644 --- a/tests/linalg/model.cpp +++ b/tests/linalg/model.cpp @@ -5,12 +5,12 @@ #include #include "isaac/model/import.hpp" -namespace ad = isaac; +namespace isc = isaac; int main() { viennacl::vector x(10000), y(10000), z(10000); - std::map > models = ad::import("geforce_gt_540m.json"); + std::map > models = isc::import("geforce_gt_540m.json"); models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x)); models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x)); return EXIT_SUCCESS;