Code quality: Cleaned directory folder, variable names and mingw compliance

This commit is contained in:
Philippe Tillet
2015-07-21 13:27:48 -04:00
parent 7fdb8c0457
commit cbe930398e
17 changed files with 134 additions and 107 deletions

View File

@@ -16,7 +16,6 @@ endif()
add_definitions(-Wno-sign-compare ${BACKEND_DEFINES} -Wall -Wextra -pedantic -std=c++11) add_definitions(-Wno-sign-compare ${BACKEND_DEFINES} -Wall -Wextra -pedantic -std=c++11)
add_executable(bin2cpp ${CMAKE_MODULE_PATH}/helpers/bin2cpp.cpp) add_executable(bin2cpp ${CMAKE_MODULE_PATH}/helpers/bin2cpp.cpp)
file(GLOB_RECURSE LIBISAAC_SRC lib/*.cpp) file(GLOB_RECURSE LIBISAAC_SRC lib/*.cpp)
#Python wrapper #Python wrapper

View File

@@ -17,8 +17,8 @@
#include <regex> #include <regex>
#define HAS_A_BLAS defined(BENCH_CBLAS) or defined(BENCH_CLBLAS) or defined(BENCH_CUBLAS) #define HAS_A_BLAS defined(BENCH_CBLAS) or defined(BENCH_CLBLAS) or defined(BENCH_CUBLAS)
namespace ad = isaac; namespace isc = isaac;
typedef ad::int_t int_t; typedef isc::int_t int_t;
template<std::size_t> struct int_{}; template<std::size_t> struct int_{};
@@ -85,11 +85,11 @@ T mean(std::vector<T> x)
return res/N; return res/N;
} }
static double time_event(unsigned long sum, ad::driver::Event const & e) static double time_event(unsigned long sum, isc::driver::Event const & e)
{ return sum + e.elapsed_time();} { return sum + e.elapsed_time();}
template<class T> template<class T>
void bench(ad::numeric_type dtype, std::string operation) void bench(isc::numeric_type dtype, std::string operation)
{ {
// //
@@ -101,8 +101,8 @@ void bench(ad::numeric_type dtype, std::string operation)
std::vector<long> times;\ std::vector<long> times;\
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-3){\ while(total_time*1e-9 < 1e-3){\
std::list<ad::driver::Event> events;\ std::list<isc::driver::Event> events;\
flush = ad::zeros(1e6, 1, dtype);\ flush = isc::zeros(1e6, 1, dtype);\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
times.push_back(std::accumulate(events.begin(), events.end(), 0, &time_event));\ times.push_back(std::accumulate(events.begin(), events.end(), 0, &time_event));\
@@ -118,7 +118,7 @@ void bench(ad::numeric_type dtype, std::string operation)
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-3){\ while(total_time*1e-9 < 1e-3){\
cl::Event event;\ cl::Event event;\
flush = ad::zeros(1e6, 1, dtype);\ flush = isc::zeros(1e6, 1, dtype);\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_START>());\ times.push_back(event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_START>());\
@@ -130,7 +130,7 @@ void bench(ad::numeric_type dtype, std::string operation)
#define BENCHMARK_HOST(OP, PERF) \ #define BENCHMARK_HOST(OP, PERF) \
{\ {\
ad::tools::timer tmr;\ isc::tools::timer tmr;\
double total_time = 0;\ double total_time = 0;\
std::vector<double> times;\ std::vector<double> times;\
while(total_time < 1e-2){\ while(total_time < 1e-2){\
@@ -156,7 +156,7 @@ void bench(ad::numeric_type dtype, std::string operation)
OP;\ OP;\
cudaThreadSynchronize();\ cudaThreadSynchronize();\
while(total_time*1e-3 < 1e-3){\ while(total_time*1e-3 < 1e-3){\
flush = ad::zeros(1e6, 1, dtype);\ flush = isc::zeros(1e6, 1, dtype);\
cudaEventRecord(start,0);\ cudaEventRecord(start,0);\
OP;\ OP;\
cudaEventRecord(stop,0);\ cudaEventRecord(stop,0);\
@@ -169,10 +169,10 @@ void bench(ad::numeric_type dtype, std::string operation)
std::cout << "\t" << PERF << std::flush;\ std::cout << "\t" << PERF << std::flush;\
} }
unsigned int dtsize = ad::size_of(dtype); unsigned int dtsize = isc::size_of(dtype);
ad::driver::CommandQueue & queue = ad::driver::queues.default_queues()[0]; isc::driver::CommandQueue & queue = isc::driver::queues.default_queues()[0];
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}}; std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
ad::array flush(1e6, dtype); isc::array flush(1e6, dtype);
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl; std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
std::cout << "N"; std::cout << "N";
std::cout << "\tISAAC"; std::cout << "\tISAAC";
@@ -200,10 +200,10 @@ void bench(ad::numeric_type dtype, std::string operation)
for(int_t N: create_log_range(1e3, 2e7, 50, 64)) for(int_t N: create_log_range(1e3, 2e7, 50, 64))
{ {
std::cout << N; std::cout << N;
ad::array x(N, dtype), y(N, dtype); isc::array x(N, dtype), y(N, dtype);
/* ISAAC */ /* ISAAC */
std::list<ad::driver::Event> events;\ std::list<isc::driver::Event> events;\
BENCHMARK_ISAAC(y = ad::control(x + alpha*y, ad::execution_options_type(0, &events)), 3*N*dtsize/t) BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event()), 3*N*dtsize/t) BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event()), 3*N*dtsize/t)
@@ -211,8 +211,8 @@ void bench(ad::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N); std::vector<float> cx(N), cy(N);
ad::copy(x, cx); isc::copy(x, cx);
ad::copy(y, cy); isc::copy(y, cy);
BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t); BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
#endif #endif
/* CuBLAS */ /* CuBLAS */
@@ -234,11 +234,11 @@ void bench(ad::numeric_type dtype, std::string operation)
{ {
std::cout << N; std::cout << N;
/* ISAAC */ /* ISAAC */
ad::array x(N, dtype), y(N, dtype); isc::array x(N, dtype), y(N, dtype);
ad::array scratch(N, dtype); isc::array scratch(N, dtype);
ad::scalar s(dtype); isc::scalar s(dtype);
s = dot(x,y); queue.synchronize(); s = dot(x,y); queue.synchronize();
BENCHMARK_ISAAC(s = ad::control(dot(x,y), ad::execution_options_type(0, &events)), 2*N*dtsize/t) BENCHMARK_ISAAC(s = isc::control(dot(x,y), isc::execution_options_type(0, &events)), 2*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event()), 2*N*dtsize/t) BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event()), 2*N*dtsize/t)
@@ -246,8 +246,8 @@ void bench(ad::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N); std::vector<float> cx(N), cy(N);
ad::copy(x, cx); isc::copy(x, cx);
ad::copy(y, cy); isc::copy(y, cy);
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t); BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -284,20 +284,20 @@ void bench(ad::numeric_type dtype, std::string operation)
int_t N = std::get<1>(MN); int_t N = std::get<1>(MN);
std::cout << M << "," << N; std::cout << M << "," << N;
/* ISAAC */ /* ISAAC */
ad::array A(N, M, dtype), y(M, dtype), x(N, dtype); isc::array A(N, M, dtype), y(M, dtype), x(N, dtype);
#if HAS_A_BLAS #if HAS_A_BLAS
int_t lda = A.ld(); int_t lda = A.ld();
#endif #endif
y = dot(trans(A),x); queue.synchronize(); y = dot(trans(A),x); queue.synchronize();
BENCHMARK_ISAAC(y = ad::control(dot(trans(A),x), ad::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t); BENCHMARK_ISAAC(y = isc::control(dot(trans(A),x), isc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, clblasTrans, N, M, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event()), (M*N + M + N)*dtsize/t) BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, clblasTrans, N, M, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event()), (M*N + M + N)*dtsize/t)
#endif #endif
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cA(N*M), cx(N), cy(M); std::vector<float> cA(N*M), cx(N), cy(M);
ad::copy(x, cx); isc::copy(x, cx);
ad::copy(y, cy); isc::copy(y, cy);
ad::copy(A, cA); isc::copy(A, cA);
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t); BENCHMARK_HOST(cblas_sgemv(CblasColMajor, CblasTrans, N, M, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -359,11 +359,11 @@ void bench(ad::numeric_type dtype, std::string operation)
int_t Bs1 = K, Bs2 = N; int_t Bs1 = K, Bs2 = N;
if(BT) std::swap(Bs1, Bs2); if(BT) std::swap(Bs1, Bs2);
ad::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype); isc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
#if HAS_A_BLAS #if HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld(); int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif #endif
BENCHMARK_ISAAC(C = ad::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), ad::execution_options_type(0, &events)), (double)2*M*N*K/t); BENCHMARK_ISAAC(C = isc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), isc::execution_options_type(0, &events)), (double)2*M*N*K/t);
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb, BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
@@ -372,9 +372,9 @@ void bench(ad::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cC(M*N), cA(M*K), cB(N*K); std::vector<float> cC(M*N), cA(M*K), cB(N*K);
ad::copy(C, cC); isc::copy(C, cC);
ad::copy(A, cA); isc::copy(A, cA);
ad::copy(B, cB); isc::copy(B, cB);
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t); BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -399,10 +399,10 @@ int main(int argc, char* argv[])
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
clblasSetup(); clblasSetup();
#endif #endif
ad::driver::queues.queue_properties = CL_QUEUE_PROFILING_ENABLE; isc::driver::queues.queue_properties = CL_QUEUE_PROFILING_ENABLE;
int device_idx = 0; int device_idx = 0;
ad::driver::queues_type::container_type queues = ad::driver::queues.contexts(); isc::driver::queues_type::container_type queues = isc::driver::queues.contexts();
std::string operation; std::string operation;
if(queues.size() > 1) if(queues.size() > 1)
@@ -412,9 +412,9 @@ int main(int argc, char* argv[])
std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl; std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl;
std::cout << "Devices available: " << std::endl; std::cout << "Devices available: " << std::endl;
unsigned int current=0; unsigned int current=0;
for(ad::driver::queues_type::container_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it) for(isc::driver::queues_type::container_type::const_iterator it = queues.begin() ; it != queues.end() ; ++it)
{ {
ad::driver::Device device = it->first.device(); isc::driver::Device device = it->first.device();
std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
} }
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@@ -432,10 +432,10 @@ int main(int argc, char* argv[])
operation = args[1]; operation = args[1];
} }
ad::driver::queues.default_device = device_idx; isc::driver::queues.default_device = device_idx;
std::cout << "#Benchmark : BLAS" << std::endl; std::cout << "#Benchmark : BLAS" << std::endl;
std::cout << "#----------------" << std::endl; std::cout << "#----------------" << std::endl;
bench<float>(ad::FLOAT_TYPE, operation); bench<float>(isc::FLOAT_TYPE, operation);
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
clblasTeardown(); clblasTeardown();

View File

@@ -3,7 +3,7 @@
#include <vector> #include <vector>
namespace ad = isaac; namespace isc = isaac;
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
__global__ void dummy(){} __global__ void dummy(){}
@@ -12,7 +12,7 @@ __global__ void dummy(){}
int main() int main()
{ {
for(ad::driver::queues_type::data_type::const_iterator it = ad::driver::queues.data().begin() ; it != ad::driver::queues.data().end() ; ++it) for(isc::driver::queues_type::data_type::const_iterator it = isc::driver::queues.data().begin() ; it != isc::driver::queues.data().end() ; ++it)
{ {
cl::CommandQueue queue = it->second[0]; cl::CommandQueue queue = it->second[0];
cl::Context context = it->first; cl::Context context = it->first;

View File

@@ -17,7 +17,7 @@ else()
set(L_HINTS ${L_HINTS} ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/) set(L_HINTS ${L_HINTS} ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/)
endif() endif()
find_library(OPENCL_LIBRARIES NAMES OpenCL NO_CMAKE_FIND_ROOT_PATH HINTS ${L_HINTS} ) find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${L_HINTS} )
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES) find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES)
mark_as_advanced(OpenCL) mark_as_advanced(OpenCL)

View File

@@ -0,0 +1,12 @@
#System
SET(CMAKE_SYSTEM_NAME Windows)
#Compilers
SET(CMAKE_C_COMPILER /usr/bin/i686-w64-mingw32-gcc)
SET(CMAKE_CXX_COMPILER /usr/bin/i686-w64-mingw32-g++)
SET(CMAKE_RC_COMPILER /usr/bin/i686-w64-mingw32-windres)
# search headers and libraries in the target environment, search
# programs in the host environment
SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/i686-w64-mingw32)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

View File

@@ -0,0 +1,12 @@
#System
SET(CMAKE_SYSTEM_NAME Windows)
#Compilers
SET(CMAKE_C_COMPILER /usr/bin/x86_64-w64-mingw32-gcc)
SET(CMAKE_CXX_COMPILER /usr/bin/x86_64-w64-mingw32-g++)
SET(CMAKE_RC_COMPILER /usr/bin/x86_64-w64-mingw32-windres)
# search headers and libraries in the target environment, search
# programs in the host environment
SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/x86_64-w64-mingw32)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

View File

@@ -1,8 +1,8 @@
#ifndef ISAAC_VALUE_SCALAR_H #ifndef ISAAC_VALUE_SCALAR_H
#define ISAAC_VALUE_SCALAR_H #define ISAAC_VALUE_SCALAR_H
#include <inttypes.h>
#include "isaac/types.h" #include "isaac/types.h"
#include "stdint.h"
namespace isaac namespace isaac
{ {
@@ -13,13 +13,13 @@ class array_expression;
union values_holder union values_holder
{ {
int8_t int8; int8_t int8;
u_int8_t uint8; uint8_t uint8;
int16_t int16; int16_t int16;
u_int16_t uint16; uint16_t uint16;
int32_t int32; int32_t int32;
u_int32_t uint32; uint32_t uint32;
int64_t int64; int64_t int64;
u_int64_t uint64; uint64_t uint64;
float float32; float float32;
double float64; double float64;
}; };
@@ -72,13 +72,13 @@ private:
}; };
value_scalar int8(int8_t v); value_scalar int8(int8_t v);
value_scalar uint8(u_int8_t v); value_scalar uint8(uint8_t v);
value_scalar int16(int16_t v); value_scalar int16(int16_t v);
value_scalar uint16(u_int16_t v); value_scalar uint16(uint16_t v);
value_scalar int32(int32_t v); value_scalar int32(int32_t v);
value_scalar uint32(u_int32_t v); value_scalar uint32(uint32_t v);
value_scalar int64(int64_t v); value_scalar int64(int64_t v);
value_scalar uint64(u_int64_t v); value_scalar uint64(uint64_t v);
value_scalar float32(float v); value_scalar float32(float v);
value_scalar float64(double v); value_scalar float64(double v);

View File

@@ -91,13 +91,13 @@ T value_scalar::cast() const
#undef INSTANTIATE #undef INSTANTIATE
value_scalar int8(int8_t v) { return value_scalar(v); } value_scalar int8(int8_t v) { return value_scalar(v); }
value_scalar uint8(u_int8_t v) { return value_scalar(v); } value_scalar uint8(uint8_t v) { return value_scalar(v); }
value_scalar int16(int16_t v) { return value_scalar(v); } value_scalar int16(int16_t v) { return value_scalar(v); }
value_scalar uint16(u_int16_t v) { return value_scalar(v); } value_scalar uint16(uint16_t v) { return value_scalar(v); }
value_scalar int32(int32_t v) { return value_scalar(v); } value_scalar int32(int32_t v) { return value_scalar(v); }
value_scalar uint32(u_int32_t v) { return value_scalar(v); } value_scalar uint32(uint32_t v) { return value_scalar(v); }
value_scalar int64(int64_t v) { return value_scalar(v); } value_scalar int64(int64_t v) { return value_scalar(v); }
value_scalar uint64(u_int64_t v) { return value_scalar(v); } value_scalar uint64(uint64_t v) { return value_scalar(v); }
value_scalar float32(float v) { return value_scalar(v); } value_scalar float32(float v) { return value_scalar(v); }
value_scalar float64(double v) { return value_scalar(v); } value_scalar float64(double v) { return value_scalar(v); }

View File

@@ -115,7 +115,7 @@ def main():
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
#Source files #Source files
src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/kernel.cpp src/lib/driver/event.cpp src/lib/driver/command_queue.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] src = 'src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/model/predictors/random_forest.cpp src/lib/model/model.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/' boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']: for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]

View File

@@ -4,19 +4,19 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace ad = isaac; namespace isc = isaac;
typedef isaac::int_t int_t; typedef isaac::int_t int_t;
template<typename T> template<typename T>
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz, void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
ad::array& x, ad::array& y, ad::array& z) isc::array& x, isc::array& y, isc::array& z)
{ {
using namespace std; using namespace std;
int failure_count = 0; int failure_count = 0;
ad::numeric_type dtype = x.dtype(); isc::numeric_type dtype = x.dtype();
ad::driver::Context const & ctx = x.context(); isc::driver::Context const & ctx = x.context();
ad::driver::CommandQueue queue = ad::driver::queues[ctx][0]; isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
cl_command_queue clqueue = (*queue.handle().cl)(); cl_command_queue clqueue = (*queue.handle().cl)();
int_t N = cz.size(); int_t N = cz.size();
@@ -114,7 +114,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
} }
template<typename T> template<typename T>
void test_impl(T epsilon, ad::driver::Context const & ctx) void test_impl(T epsilon, isc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -140,10 +140,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
int main() int main()
{ {
clblasSetup(); clblasSetup();
auto data = ad::driver::queues.contexts(); auto data = isc::driver::queues.contexts();
for(const auto & elem : data) for(const auto & elem : data)
{ {
ad::driver::Device device = elem.second[0].device(); isc::driver::Device device = elem.second[0].device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -10,7 +10,11 @@ template<class T> struct BLAS;
template<> struct BLAS<float> { template<class FT, class DT> static FT F(FT SAXPY, DT ) { return SAXPY; } }; template<> struct BLAS<float> { template<class FT, class DT> static FT F(FT SAXPY, DT ) { return SAXPY; } };
template<> struct BLAS<double> { template<class FT, class DT> static DT F(FT , DT DAXPY) { return DAXPY; } }; template<> struct BLAS<double> { template<class FT, class DT> static DT F(FT , DT DAXPY) { return DAXPY; } };
enum interface_t{clBLAS, CPP}; enum interface_t
{
clBLAS,
CPP
};
#define CHANDLE(X) (*X.data().handle().cl)() #define CHANDLE(X) (*X.data().handle().cl)()
#define OFF(X) X.start()[0] + X.start()[1]*X.ld() #define OFF(X) X.start()[0] + X.start()[1]*X.ld()

View File

@@ -5,23 +5,23 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace ad = isaac; namespace isc = isaac;
typedef ad::int_t int_t; typedef isc::int_t int_t;
template<typename T> template<typename T>
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy, void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
ad::array & x, ad::array & y) isc::array & x, isc::array & y)
{ {
using namespace std; using namespace std;
ad::driver::Context const & ctx = x.context(); isc::driver::Context const & ctx = x.context();
int_t N = cx.size(); int_t N = cx.size();
ad::driver::CommandQueue queue = ad::driver::queues[ctx][0]; isc::driver::CommandQueue queue = isc::driver::queues[ctx][0];
cl_command_queue clqueue = (*queue.handle().cl)(); cl_command_queue clqueue = (*queue.handle().cl)();
ad::array scratch(N, x.dtype()); isc::array scratch(N, x.dtype());
unsigned int failure_count = 0; unsigned int failure_count = 0;
isaac::numeric_type dtype = ad::to_numeric_type<T>::value; isaac::numeric_type dtype = isc::to_numeric_type<T>::value;
T cs = 0; T cs = 0;
T tmp = 0; T tmp = 0;
@@ -67,7 +67,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
} }
template<typename T> template<typename T>
void test_impl(T epsilon, ad::driver::Context const & ctx) void test_impl(T epsilon, isc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -90,10 +90,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
int main() int main()
{ {
clblasSetup(); clblasSetup();
auto data = ad::driver::queues.contexts(); auto data = isc::driver::queues.contexts();
for(const auto & elem : data) for(const auto & elem : data)
{ {
ad::driver::Device device = elem.second[0].device(); isc::driver::Device device = elem.second[0].device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -4,23 +4,23 @@
#include "isaac/model/model.h" #include "isaac/model/model.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace ad = isaac; namespace isc = isaac;
template<typename T> template<typename T>
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB, void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
ad::array & C, ad::array const & A, ad::array const & AT, ad::array const & B, ad::array const & BT, isc::array & C, isc::array const & A, isc::array const & AT, isc::array const & B, isc::array const & BT,
interface_t interface, const char * prefix) interface_t interf, const char * prefix)
{ {
int failure_count = 0; int failure_count = 0;
ad::int_t M = C.shape()[0]; isc::int_t M = C.shape()[0];
ad::int_t N = C.shape()[1]; isc::int_t N = C.shape()[1];
ad::int_t K = A.shape()[1]; isc::int_t K = A.shape()[1];
T alpha = 1; T alpha = 1;
T beta = 0; T beta = 0;
ad::driver::CommandQueue queue = ad::driver::queues[C.context()][0]; isc::driver::CommandQueue queue = isc::driver::queues[C.context()][0];
for(int i = 0 ; i < M ; ++i) for(int i = 0 ; i < M ; ++i)
{ {
@@ -44,7 +44,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\ std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
GPU_OP;\ GPU_OP;\
queue.synchronize();\ queue.synchronize();\
ad::copy(C, buffer);\ isc::copy(C, buffer);\
if(diff(buffer, cCbuffer, epsilon))\ if(diff(buffer, cCbuffer, epsilon))\
{\ {\
failure_count++;\ failure_count++;\
@@ -53,7 +53,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
else\ else\
std::cout << std::endl; std::cout << std::endl;
if(interface==clBLAS) if(interf==clBLAS)
{ {
cl_command_queue clqueue = (*queue.handle().cl)(); cl_command_queue clqueue = (*queue.handle().cl)();
@@ -95,7 +95,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
} }
template<typename T> template<typename T>
void test_impl(T epsilon, ad::driver::Context const & ctx) void test_impl(T epsilon, isc::driver::Context const & ctx)
{ {
int_t M = 173; int_t M = 173;
int_t N = 256; int_t N = 256;
@@ -126,10 +126,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
int main() int main()
{ {
clblasSetup(); clblasSetup();
auto data = ad::driver::queues.contexts(); auto data = isc::driver::queues.contexts();
for(const auto & elem : data) for(const auto & elem : data)
{ {
ad::driver::Device device = elem.second[0].device(); isc::driver::Device device = elem.second[0].device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -3,24 +3,24 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace ad = isaac; namespace isc = isaac;
template<typename T> template<typename T>
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx, void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
ad::array & y, ad::array const & A, ad::array & x, interface_t interface, const char * prefix) isc::array & y, isc::array const & A, isc::array & x, interface_t interf, const char * prefix)
{ {
int failure_count = 0; int failure_count = 0;
ad::int_t M = A.shape()[0]; isc::int_t M = A.shape()[0];
ad::int_t N = A.shape()[1]; isc::int_t N = A.shape()[1];
simple_vector<T> bufy(M); simple_vector<T> bufy(M);
simple_vector<T> bufx(N); simple_vector<T> bufx(N);
T alpha = 4.2, beta = 5.6; T alpha = 4.2, beta = 5.6;
ad::driver::CommandQueue queue = ad::driver::queues[y.context()][0]; isc::driver::CommandQueue queue = isc::driver::queues[y.context()][0];
T yi = 0, xi = 0; T yi = 0, xi = 0;
#define TEST_OPERATION(NAME, SIZE1, SIZE2, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\ #define TEST_OPERATION(NAME, SIZE1, SIZE2, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
@@ -35,7 +35,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
}\ }\
GPU_REDUCTION;\ GPU_REDUCTION;\
queue.synchronize();\ queue.synchronize();\
ad::copy(RES, BUF.data());\ isc::copy(RES, BUF.data());\
if(diff(CRES, BUF, epsilon))\ if(diff(CRES, BUF, epsilon))\
{\ {\
failure_count++;\ failure_count++;\
@@ -45,7 +45,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
std::cout << std::endl; std::cout << std::endl;
if(interface==clBLAS) if(interf==clBLAS)
{ {
cl_command_queue clqueue = (*queue.handle().cl)(); cl_command_queue clqueue = (*queue.handle().cl)();
@@ -81,7 +81,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
} }
template<typename T> template<typename T>
void test_impl(T epsilon, ad::driver::Context const & ctx) void test_impl(T epsilon, isc::driver::Context const & ctx)
{ {
int_t M = 1324; int_t M = 1324;
int_t N = 1143; int_t N = 1143;
@@ -106,10 +106,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
int main() int main()
{ {
clblasSetup(); clblasSetup();
auto data = ad::driver::queues.contexts(); auto data = isc::driver::queues.contexts();
for(const auto & elem : data) for(const auto & elem : data)
{ {
ad::driver::Device device = elem.second[0].device(); isc::driver::Device device = elem.second[0].device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -1,18 +1,18 @@
#include "common.hpp" #include "common.hpp"
#include "isaac/array.h" #include "isaac/array.h"
namespace ad = isaac; namespace isc = isaac;
typedef isaac::int_t int_t; typedef isaac::int_t int_t;
template<typename T> template<typename T>
void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy, void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy,
ad::array& A, ad::array& B, ad::array& C, ad::array& x, ad::array& y) isc::array& A, isc::array& B, isc::array& C, isc::array& x, isc::array& y)
{ {
using namespace std; using namespace std;
int failure_count = 0; int failure_count = 0;
ad::numeric_type dtype = C.dtype(); isc::numeric_type dtype = C.dtype();
ad::driver::Context const & ctx = C.context(); isc::driver::Context const & ctx = C.context();
int_t M = cC.size1(); int_t M = cC.size1();
int_t N = cC.size2(); int_t N = cC.size2();
@@ -98,7 +98,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
} }
template<typename T> template<typename T>
void test_impl(T epsilon, ad::driver::Context const & ctx) void test_impl(T epsilon, isc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -123,10 +123,10 @@ void test_impl(T epsilon, ad::driver::Context const & ctx)
int main() int main()
{ {
auto data = ad::driver::queues.contexts(); auto data = isc::driver::queues.contexts();
for(const auto & elem : data) for(const auto & elem : data)
{ {
ad::driver::Device device = elem.second[0].device(); isc::driver::Device device = elem.second[0].device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -5,12 +5,12 @@
#include <memory> #include <memory>
#include "isaac/model/import.hpp" #include "isaac/model/import.hpp"
namespace ad = isaac; namespace isc = isaac;
int main() int main()
{ {
viennacl::vector<float> x(10000), y(10000), z(10000); viennacl::vector<float> x(10000), y(10000), z(10000);
std::map<std::string, ad::tools::shared_ptr<ad::model> > models = ad::import("geforce_gt_540m.json"); std::map<std::string, isc::tools::shared_ptr<isc::model> > models = isc::import("geforce_gt_540m.json");
models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x)); models["vector-axpy-float32"]->tune(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x)); models["vector-axpy-float32"]->execute(viennacl::symbolic_expression(z, viennacl::op_assign(), x));
return EXIT_SUCCESS; return EXIT_SUCCESS;