Code quality: renamed isaac shortcut from isc to sc

This commit is contained in:
Philippe Tillet
2015-08-12 19:38:53 -07:00
parent b397d5306e
commit 71224a1507
13 changed files with 355 additions and 355 deletions

View File

@@ -18,8 +18,8 @@
#include "timer.hpp" #include "timer.hpp"
namespace isc = isaac; namespace sc = isaac;
typedef isc::int_t int_t; typedef sc::int_t int_t;
template<std::size_t> struct int_{}; template<std::size_t> struct int_{};
@@ -86,11 +86,11 @@ T mean(std::vector<T> x)
return res/N; return res/N;
} }
static long time_event(long sum, isc::driver::Event const & e) static long time_event(long sum, sc::driver::Event const & e)
{ return sum + e.elapsed_time();} { return sum + e.elapsed_time();}
template<class T> template<class T>
void bench(isc::numeric_type dtype, std::string operation) void bench(sc::numeric_type dtype, std::string operation)
{ {
// //
@@ -103,8 +103,8 @@ void bench(isc::numeric_type dtype, std::string operation)
std::vector<double> times;\ std::vector<double> times;\
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-3){\ while(total_time*1e-9 < 1e-3){\
std::list<isc::driver::Event> events;\ std::list<sc::driver::Event> events;\
flush = isc::zeros((isaac::int_t)1e6, 1, dtype);\ flush = sc::zeros((isaac::int_t)1e6, 1, dtype);\
queue.synchronize();\ queue.synchronize();\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
@@ -121,11 +121,11 @@ void bench(isc::numeric_type dtype, std::string operation)
double total_time = 0;\ double total_time = 0;\
while(total_time*1e-9 < 1e-3){\ while(total_time*1e-9 < 1e-3){\
cl_event event;\ cl_event event;\
flush = isc::zeros(1e6, 1, dtype);\ flush = sc::zeros(1e6, 1, dtype);\
queue.synchronize();\ queue.synchronize();\
OP;\ OP;\
queue.synchronize();\ queue.synchronize();\
times.push_back(isc::driver::Event(event).elapsed_time());\ times.push_back(sc::driver::Event(event).elapsed_time());\
total_time+=times.back();\ total_time+=times.back();\
}\ }\
double t = median(times);\ double t = median(times);\
@@ -134,7 +134,7 @@ void bench(isc::numeric_type dtype, std::string operation)
#define BENCHMARK_HOST(OP, PERF) \ #define BENCHMARK_HOST(OP, PERF) \
{\ {\
isc::tools::timer tmr;\ sc::tools::timer tmr;\
double total_time = 0;\ double total_time = 0;\
std::vector<double> times;\ std::vector<double> times;\
while(total_time < 1e-2){\ while(total_time < 1e-2){\
@@ -160,7 +160,7 @@ void bench(isc::numeric_type dtype, std::string operation)
OP;\ OP;\
cudaThreadSynchronize();\ cudaThreadSynchronize();\
while(total_time*1e-3 < 1e-3){\ while(total_time*1e-3 < 1e-3){\
flush = isc::zeros(1e6, 1, dtype);\ flush = sc::zeros(1e6, 1, dtype);\
cudaEventRecord(start,0);\ cudaEventRecord(start,0);\
OP;\ OP;\
cudaEventRecord(stop,0);\ cudaEventRecord(stop,0);\
@@ -173,10 +173,10 @@ void bench(isc::numeric_type dtype, std::string operation)
std::cout << "\t" << PERF << std::flush;\ std::cout << "\t" << PERF << std::flush;\
} }
unsigned int dtsize = isc::size_of(dtype); unsigned int dtsize = sc::size_of(dtype);
isc::driver::CommandQueue & queue = isc::driver::backend::queues::get(isc::driver::backend::contexts::get_default(),0); sc::driver::CommandQueue & queue = sc::driver::backend::queues::get(sc::driver::backend::contexts::get_default(),0);
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}}; std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
isc::array flush((int)1e6, isc::FLOAT_TYPE); sc::array flush((int)1e6, sc::FLOAT_TYPE);
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl; std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
std::cout << "N"; std::cout << "N";
std::cout << "\tISAAC"; std::cout << "\tISAAC";
@@ -204,10 +204,10 @@ void bench(isc::numeric_type dtype, std::string operation)
for(int_t N: create_log_range((int)1e3, (int)2e7, 50, 64)) for(int_t N: create_log_range((int)1e3, (int)2e7, 50, 64))
{ {
std::cout << N; std::cout << N;
isc::array x(N, dtype), y(N, dtype); sc::array x(N, dtype), y(N, dtype);
/* ISAAC */ /* ISAAC */
std::list<isc::driver::Event> events; std::list<sc::driver::Event> events;
BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t) BENCHMARK_ISAAC(y = sc::control(x + alpha*y, sc::execution_options_type(0, &events)), 3*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t); BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t);
@@ -215,8 +215,8 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N); std::vector<float> cx(N), cy(N);
isc::copy(x, cx); sc::copy(x, cx);
isc::copy(y, cy); sc::copy(y, cy);
BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t); BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
#endif #endif
/* CuBLAS */ /* CuBLAS */
@@ -238,11 +238,11 @@ void bench(isc::numeric_type dtype, std::string operation)
{ {
std::cout << N; std::cout << N;
/* ISAAC */ /* ISAAC */
isc::array x(N, dtype), y(N, dtype); sc::array x(N, dtype), y(N, dtype);
isc::array scratch(N, dtype); sc::array scratch(N, dtype);
isc::scalar s(dtype); sc::scalar s(dtype);
s = dot(x,y); queue.synchronize(); s = dot(x,y); queue.synchronize();
BENCHMARK_ISAAC(s = isc::control(dot(x,y), isc::execution_options_type(0, &events)), 2*N*dtsize/t) BENCHMARK_ISAAC(s = sc::control(dot(x,y), sc::execution_options_type(0, &events)), 2*N*dtsize/t)
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t) BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t)
@@ -250,8 +250,8 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N); std::vector<float> cx(N), cy(N);
isc::copy(x, cx); sc::copy(x, cx);
isc::copy(y, cy); sc::copy(y, cy);
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t); BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -294,19 +294,19 @@ void bench(isc::numeric_type dtype, std::string operation)
if(AT) std::swap(As1, As2); if(AT) std::swap(As1, As2);
/* ISAAC */ /* ISAAC */
isc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype); sc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
#ifdef HAS_A_BLAS #ifdef HAS_A_BLAS
int_t lda = A.ld(); int_t lda = A.ld();
#endif #endif
BENCHMARK_ISAAC(y = isc::control(AT?dot(A.T(),x):dot(A,x), isc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t); BENCHMARK_ISAAC(y = sc::control(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t) BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
#endif #endif
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cA(M*N), cx(N), cy(M); std::vector<float> cA(M*N), cx(N), cy(M);
isc::copy(x, cx); sc::copy(x, cx);
isc::copy(y, cy); sc::copy(y, cy);
isc::copy(A, cA); sc::copy(A, cA);
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t); BENCHMARK_HOST(cblas_sgemv(CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -369,11 +369,11 @@ void bench(isc::numeric_type dtype, std::string operation)
int_t Bs1 = K, Bs2 = N; int_t Bs1 = K, Bs2 = N;
if(BT) std::swap(Bs1, Bs2); if(BT) std::swap(Bs1, Bs2);
isc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype); sc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
#ifdef HAS_A_BLAS #ifdef HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld(); int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif #endif
BENCHMARK_ISAAC(C = isc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), isc::execution_options_type(0, &events)), (double)2*M*N*K/t); BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events)), (double)2*M*N*K/t);
/* clblas */ /* clblas */
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb, BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
@@ -382,9 +382,9 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */ /* BLAS */
#ifdef BENCH_CBLAS #ifdef BENCH_CBLAS
std::vector<float> cC(M*N), cA(M*K), cB(N*K); std::vector<float> cC(M*N), cA(M*K), cB(N*K);
isc::copy(C, cC); sc::copy(C, cC);
isc::copy(A, cA); sc::copy(A, cA);
isc::copy(B, cB); sc::copy(B, cB);
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t); BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
#endif #endif
#ifdef BENCH_CUBLAS #ifdef BENCH_CUBLAS
@@ -409,11 +409,11 @@ int main(int argc, char* argv[])
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
clblasSetup(); clblasSetup();
#endif #endif
isc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE; sc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
int device_idx = 0; int device_idx = 0;
std::list<isc::driver::Context const *> contexts; std::list<sc::driver::Context const *> contexts;
isc::driver::backend::contexts::get(contexts); sc::driver::backend::contexts::get(contexts);
std::string operation; std::string operation;
if(contexts.size() > 1) if(contexts.size() > 1)
@@ -423,9 +423,9 @@ int main(int argc, char* argv[])
std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl; std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl;
std::cout << "Devices available: " << std::endl; std::cout << "Devices available: " << std::endl;
unsigned int current=0; unsigned int current=0;
for(isc::driver::Context const * context: contexts) for(sc::driver::Context const * context: contexts)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
} }
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
@@ -443,10 +443,10 @@ int main(int argc, char* argv[])
operation = args[1]; operation = args[1];
} }
isc::driver::backend::default_device = device_idx; sc::driver::backend::default_device = device_idx;
std::cout << "#Benchmark : BLAS" << std::endl; std::cout << "#Benchmark : BLAS" << std::endl;
std::cout << "#----------------" << std::endl; std::cout << "#----------------" << std::endl;
bench<float>(isc::FLOAT_TYPE, operation); bench<float>(sc::FLOAT_TYPE, operation);
#ifdef BENCH_CLBLAS #ifdef BENCH_CLBLAS
clblasTeardown(); clblasTeardown();

View File

@@ -10,7 +10,7 @@
#define MAP_ENUM(v, ns) .value(#v, ns::v) #define MAP_ENUM(v, ns) .value(#v, ns::v)
namespace bp = boost::python; namespace bp = boost::python;
namespace isc = isaac; namespace sc = isaac;
namespace np = boost::numpy; namespace np = boost::numpy;
namespace tools namespace tools
@@ -36,7 +36,7 @@ namespace tools
} }
inline isc::numeric_type extract_dtype(bp::object const & odtype) inline sc::numeric_type extract_dtype(bp::object const & odtype)
{ {
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))(); std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class") if(name=="class")
@@ -44,16 +44,16 @@ namespace tools
else else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))(); name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="int8") return isc::CHAR_TYPE; if(name=="int8") return sc::CHAR_TYPE;
else if(name=="uint8") return isc::UCHAR_TYPE; else if(name=="uint8") return sc::UCHAR_TYPE;
else if(name=="int16") return isc::SHORT_TYPE; else if(name=="int16") return sc::SHORT_TYPE;
else if(name=="uint16") return isc::USHORT_TYPE; else if(name=="uint16") return sc::USHORT_TYPE;
else if(name=="int32") return isc::INT_TYPE; else if(name=="int32") return sc::INT_TYPE;
else if(name=="uint32") return isc::UINT_TYPE; else if(name=="uint32") return sc::UINT_TYPE;
else if(name=="int64") return isc::LONG_TYPE; else if(name=="int64") return sc::LONG_TYPE;
else if(name=="uint64") return isc::ULONG_TYPE; else if(name=="uint64") return sc::ULONG_TYPE;
else if(name=="float32") return isc::FLOAT_TYPE; else if(name=="float32") return sc::FLOAT_TYPE;
else if(name=="float64") return isc::DOUBLE_TYPE; else if(name=="float64") return sc::DOUBLE_TYPE;
else else
{ {
PyErr_SetString(PyExc_TypeError, "Data type not understood"); PyErr_SetString(PyExc_TypeError, "Data type not understood");
@@ -62,7 +62,7 @@ namespace tools
} }
} }
inline isc::expression_type extract_template_type(bp::object const & odtype) inline sc::expression_type extract_template_type(bp::object const & odtype)
{ {
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))(); std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class") if(name=="class")
@@ -70,15 +70,15 @@ namespace tools
else else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))(); name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="axpy") return isc::AXPY_TYPE; if(name=="axpy") return sc::AXPY_TYPE;
else if(name=="ger") return isc::GER_TYPE; else if(name=="ger") return sc::GER_TYPE;
else if(name=="dot") return isc::DOT_TYPE; else if(name=="dot") return sc::DOT_TYPE;
else if(name=="gemv_n") return isc::GEMV_N_TYPE; else if(name=="gemv_n") return sc::GEMV_N_TYPE;
else if(name=="gemv_t") return isc::GEMV_T_TYPE; else if(name=="gemv_t") return sc::GEMV_T_TYPE;
else if(name=="gemm_nn") return isc::GEMM_NN_TYPE; else if(name=="gemm_nn") return sc::GEMM_NN_TYPE;
else if(name=="gemm_tn") return isc::GEMM_TN_TYPE; else if(name=="gemm_tn") return sc::GEMM_TN_TYPE;
else if(name=="gemm_nt") return isc::GEMM_NT_TYPE; else if(name=="gemm_nt") return sc::GEMM_NT_TYPE;
else if(name=="gemm_tt") return isc::GEMM_TT_TYPE; else if(name=="gemm_tt") return sc::GEMM_TT_TYPE;
else else
{ {
PyErr_SetString(PyExc_TypeError, "Template type not understood"); PyErr_SetString(PyExc_TypeError, "Template type not understood");

View File

@@ -6,19 +6,19 @@ namespace detail
{ {
isc::numeric_type to_isc_dtype(np::dtype const & T) sc::numeric_type to_sc_dtype(np::dtype const & T)
{ {
if(T==np::detail::get_int_dtype<8, false>()) return isc::CHAR_TYPE; if(T==np::detail::get_int_dtype<8, false>()) return sc::CHAR_TYPE;
else if(T==np::detail::get_int_dtype<8, true>()) return isc::UCHAR_TYPE; else if(T==np::detail::get_int_dtype<8, true>()) return sc::UCHAR_TYPE;
else if(T==np::detail::get_int_dtype<16, false>()) return isc::SHORT_TYPE; else if(T==np::detail::get_int_dtype<16, false>()) return sc::SHORT_TYPE;
else if(T==np::detail::get_int_dtype<16, true>()) return isc::USHORT_TYPE; else if(T==np::detail::get_int_dtype<16, true>()) return sc::USHORT_TYPE;
else if(T==np::detail::get_int_dtype<32, false>()) return isc::INT_TYPE; else if(T==np::detail::get_int_dtype<32, false>()) return sc::INT_TYPE;
else if(T==np::detail::get_int_dtype<32, true>()) return isc::UINT_TYPE; else if(T==np::detail::get_int_dtype<32, true>()) return sc::UINT_TYPE;
else if(T==np::detail::get_int_dtype<64, false>()) return isc::LONG_TYPE; else if(T==np::detail::get_int_dtype<64, false>()) return sc::LONG_TYPE;
else if(T==np::detail::get_int_dtype<64, true>()) return isc::ULONG_TYPE; else if(T==np::detail::get_int_dtype<64, true>()) return sc::ULONG_TYPE;
// else if(T==np::detail::get_float_dtype<16>()) return isc::HALF_TYPE; // else if(T==np::detail::get_float_dtype<16>()) return sc::HALF_TYPE;
else if(T==np::detail::get_float_dtype<32>()) return isc::FLOAT_TYPE; else if(T==np::detail::get_float_dtype<32>()) return sc::FLOAT_TYPE;
else if(T==np::detail::get_float_dtype<64>()) return isc::DOUBLE_TYPE; else if(T==np::detail::get_float_dtype<64>()) return sc::DOUBLE_TYPE;
else{ else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype"); PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set(); bp::throw_error_already_set();
@@ -26,19 +26,19 @@ isc::numeric_type to_isc_dtype(np::dtype const & T)
} }
} }
np::dtype to_np_dtype(isc::numeric_type const & T) throw() np::dtype to_np_dtype(sc::numeric_type const & T) throw()
{ {
if(T==isc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>(); if(T==sc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
else if(T==isc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>(); else if(T==sc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
else if(T==isc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>(); else if(T==sc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
else if(T==isc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>(); else if(T==sc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
else if(T==isc::INT_TYPE) return np::detail::get_int_dtype<32, false>(); else if(T==sc::INT_TYPE) return np::detail::get_int_dtype<32, false>();
else if(T==isc::UINT_TYPE) return np::detail::get_int_dtype<32, true>(); else if(T==sc::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
else if(T==isc::LONG_TYPE) return np::detail::get_int_dtype<64, false>(); else if(T==sc::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
else if(T==isc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>(); else if(T==sc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
// else if(T==isc::HALF_TYPE) return np::detail::get_float_dtype<16>(); // else if(T==sc::HALF_TYPE) return np::detail::get_float_dtype<16>();
else if(T==isc::FLOAT_TYPE) return np::detail::get_float_dtype<32>(); else if(T==sc::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
else if(T==isc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>(); else if(T==sc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
else{ else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype"); PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set(); bp::throw_error_already_set();
@@ -46,21 +46,21 @@ np::dtype to_np_dtype(isc::numeric_type const & T) throw()
} }
} }
bp::tuple get_shape(isc::array const & x) bp::tuple get_shape(sc::array const & x)
{ {
return bp::make_tuple(x.shape()[0], x.shape()[1]); return bp::make_tuple(x.shape()[0], x.shape()[1]);
} }
template<class T> template<class T>
struct datatype : public isc::value_scalar struct datatype : public sc::value_scalar
{ {
datatype(T t) : isc::value_scalar(t){ } datatype(T t) : sc::value_scalar(t){ }
}; };
template<class T> template<class T>
unsigned int size(datatype<T> const & dt) unsigned int size(datatype<T> const & dt)
{ return isc::size_of(dt.dtype()) ; } { return sc::size_of(dt.dtype()) ; }
#define INSTANTIATE(name, clname) \ #define INSTANTIATE(name, clname) \
struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} }; struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} };
@@ -80,13 +80,13 @@ unsigned int size(datatype<T> const & dt)
namespace detail namespace detail
{ {
std::shared_ptr<isc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, isc::driver::CommandQueue & queue) std::shared_ptr<sc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, sc::driver::CommandQueue & queue)
{ {
return std::shared_ptr<isc::profiles::value_type>(new isc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue)); return std::shared_ptr<sc::profiles::value_type>(new sc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue));
} }
std::shared_ptr<isc::array> std::shared_ptr<sc::array>
ndarray_to_iscarray(const np::ndarray& array, isc::driver::Context const & ctx) ndarray_to_scarray(const np::ndarray& array, sc::driver::Context const & ctx)
{ {
int d = array.get_nd(); int d = array.get_nd();
@@ -95,14 +95,14 @@ namespace detail
bp::throw_error_already_set(); bp::throw_error_already_set();
} }
isc::numeric_type dtype = to_isc_dtype(array.get_dtype()); sc::numeric_type dtype = to_sc_dtype(array.get_dtype());
isc::int_t size = (isc::int_t)array.shape(0); sc::int_t size = (sc::int_t)array.shape(0);
isc::array* v = new isc::array(size, dtype, ctx); sc::array* v = new sc::array(size, dtype, ctx);
void* data = (void*)array.get_data(); void* data = (void*)array.get_data();
isc::copy(data, *v); sc::copy(data, *v);
return std::shared_ptr<isc::array>(v); return std::shared_ptr<sc::array>(v);
} }
isaac::driver::Context const & extract_context(bp::object context) isaac::driver::Context const & extract_context(bp::object context)
@@ -118,19 +118,19 @@ namespace detail
} }
std::shared_ptr<isc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext) std::shared_ptr<sc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext)
{ {
return ndarray_to_iscarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext)); return ndarray_to_scarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext));
} }
std::shared_ptr<isc::array> create_zeros_array(isc::int_t M, isc::int_t N, bp::object odtype, bp::object pycontext) std::shared_ptr<sc::array> create_zeros_array(sc::int_t M, sc::int_t N, bp::object odtype, bp::object pycontext)
{ {
return std::shared_ptr<isc::array>(new isc::array(isc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext)))); return std::shared_ptr<sc::array>(new sc::array(sc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext))));
} }
std::shared_ptr<isc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext) std::shared_ptr<sc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext)
{ {
typedef std::shared_ptr<isc::array> result_type; typedef std::shared_ptr<sc::array> result_type;
std::size_t len; std::size_t len;
int size1; int size1;
@@ -145,17 +145,17 @@ namespace detail
size1 = bp::extract<int>(sizes)(); size1 = bp::extract<int>(sizes)();
} }
isc::numeric_type dtype = tools::extract_dtype(odtype); sc::numeric_type dtype = tools::extract_dtype(odtype);
if(len < 1 || len > 2) if(len < 1 || len > 2)
{ {
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!"); PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
bp::throw_error_already_set(); bp::throw_error_already_set();
} }
isc::driver::Context const & context = extract_context(pycontext); sc::driver::Context const & context = extract_context(pycontext);
if(len==1) if(len==1)
return result_type(new isc::array(size1, dtype, context)); return result_type(new sc::array(size1, dtype, context));
return result_type(new isc::array(size1, size2, dtype, context)); return result_type(new sc::array(size1, size2, dtype, context));
} }
std::string type_name(bp::object const & obj) std::string type_name(bp::object const & obj)
@@ -167,26 +167,26 @@ namespace detail
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))(); return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
} }
std::shared_ptr<isc::scalar> construct_scalar(bp::object obj, bp::object pycontext) std::shared_ptr<sc::scalar> construct_scalar(bp::object obj, bp::object pycontext)
{ {
typedef std::shared_ptr<isc::scalar> result_type; typedef std::shared_ptr<sc::scalar> result_type;
isc::driver::Context const & context = extract_context(pycontext); sc::driver::Context const & context = extract_context(pycontext);
std::string name = type_name(obj); std::string name = type_name(obj);
if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context)); if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
else if(name=="float") return result_type(new isc::scalar(bp::extract<double>(obj)(), context)); else if(name=="float") return result_type(new sc::scalar(bp::extract<double>(obj)(), context));
else if(name=="long") return result_type(new isc::scalar(bp::extract<long>(obj)(), context)); else if(name=="long") return result_type(new sc::scalar(bp::extract<long>(obj)(), context));
else if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context)); else if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
else if(name=="int8") return result_type(new isc::scalar(isc::CHAR_TYPE, context)); else if(name=="int8") return result_type(new sc::scalar(sc::CHAR_TYPE, context));
else if(name=="uint8") return result_type(new isc::scalar(isc::UCHAR_TYPE, context)); else if(name=="uint8") return result_type(new sc::scalar(sc::UCHAR_TYPE, context));
else if(name=="int16") return result_type(new isc::scalar(isc::SHORT_TYPE, context)); else if(name=="int16") return result_type(new sc::scalar(sc::SHORT_TYPE, context));
else if(name=="uint16") return result_type(new isc::scalar(isc::USHORT_TYPE, context)); else if(name=="uint16") return result_type(new sc::scalar(sc::USHORT_TYPE, context));
else if(name=="int32") return result_type(new isc::scalar(isc::INT_TYPE, context)); else if(name=="int32") return result_type(new sc::scalar(sc::INT_TYPE, context));
else if(name=="uint32") return result_type(new isc::scalar(isc::UINT_TYPE, context)); else if(name=="uint32") return result_type(new sc::scalar(sc::UINT_TYPE, context));
else if(name=="int64") return result_type(new isc::scalar(isc::LONG_TYPE, context)); else if(name=="int64") return result_type(new sc::scalar(sc::LONG_TYPE, context));
else if(name=="uint64") return result_type(new isc::scalar(isc::ULONG_TYPE, context)); else if(name=="uint64") return result_type(new sc::scalar(sc::ULONG_TYPE, context));
else if(name=="float32") return result_type(new isc::scalar(isc::FLOAT_TYPE, context)); else if(name=="float32") return result_type(new sc::scalar(sc::FLOAT_TYPE, context));
else if(name=="float64") return result_type(new isc::scalar(isc::DOUBLE_TYPE, context)); else if(name=="float64") return result_type(new sc::scalar(sc::DOUBLE_TYPE, context));
else{ else{
PyErr_SetString(PyExc_TypeError, "Data type not understood"); PyErr_SetString(PyExc_TypeError, "Data type not understood");
bp::throw_error_already_set(); bp::throw_error_already_set();
@@ -196,11 +196,11 @@ namespace detail
struct model_map_indexing struct model_map_indexing
{ {
static isc::profiles::value_type& get_item(isc::profiles::map_type& container, bp::tuple i_) static sc::profiles::value_type& get_item(sc::profiles::map_type& container, bp::tuple i_)
{ {
isc::expression_type expression = tools::extract_template_type(i_[0]); sc::expression_type expression = tools::extract_template_type(i_[0]);
isc::numeric_type dtype = tools::extract_dtype(i_[1]); sc::numeric_type dtype = tools::extract_dtype(i_[1]);
isc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype)); sc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype));
if (i == container.end()) if (i == container.end())
{ {
PyErr_SetString(PyExc_KeyError, "Invalid key"); PyErr_SetString(PyExc_KeyError, "Invalid key");
@@ -209,11 +209,11 @@ namespace detail
return *i->second; return *i->second;
} }
static void set_item(isc::profiles::map_type& container, bp::tuple i_, isc::profiles::value_type const & v) static void set_item(sc::profiles::map_type& container, bp::tuple i_, sc::profiles::value_type const & v)
{ {
isc::expression_type expression = tools::extract_template_type(i_[0]); sc::expression_type expression = tools::extract_template_type(i_[0]);
isc::numeric_type dtype = tools::extract_dtype(i_[1]); sc::numeric_type dtype = tools::extract_dtype(i_[1]);
container[std::make_pair(expression, dtype)].reset(new isc::profiles::value_type(v)); container[std::make_pair(expression, dtype)].reset(new sc::profiles::value_type(v));
} }
}; };
} }
@@ -227,13 +227,13 @@ void export_core()
bp::class_<isaac::profiles::value_type>("profile", bp::no_init) bp::class_<isaac::profiles::value_type>("profile", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_model)) .def("__init__", bp::make_constructor(detail::construct_model))
.def("execute", &isc::profiles::value_type::execute); .def("execute", &sc::profiles::value_type::execute);
bp::class_<isc::value_scalar>("value_scalar", bp::no_init) bp::class_<sc::value_scalar>("value_scalar", bp::no_init)
.add_property("dtype", &isc::value_scalar::dtype); .add_property("dtype", &sc::value_scalar::dtype);
#define INSTANTIATE(name, clname) \ #define INSTANTIATE(name, clname) \
bp::class_<detail::datatype<clname>, bp::bases<isc::value_scalar> >(#name, bp::init<clname>());\ bp::class_<detail::datatype<clname>, bp::bases<sc::value_scalar> >(#name, bp::init<clname>());\
bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\ bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\
.add_property("size", &detail::size<clname>)\ .add_property("size", &detail::size<clname>)\
; ;
@@ -251,36 +251,36 @@ void export_core()
INSTANTIATE(float64, cl_double) INSTANTIATE(float64, cl_double)
#undef INSTANTIATE #undef INSTANTIATE
bp::enum_<isc::expression_type>("operations") bp::enum_<sc::expression_type>("operations")
MAP_ENUM(AXPY_TYPE, isc) MAP_ENUM(AXPY_TYPE, sc)
MAP_ENUM(GER_TYPE, isc) MAP_ENUM(GER_TYPE, sc)
MAP_ENUM(DOT_TYPE, isc) MAP_ENUM(DOT_TYPE, sc)
MAP_ENUM(GEMV_N_TYPE, isc) MAP_ENUM(GEMV_N_TYPE, sc)
MAP_ENUM(GEMV_T_TYPE, isc) MAP_ENUM(GEMV_T_TYPE, sc)
MAP_ENUM(GEMM_NN_TYPE, isc) MAP_ENUM(GEMM_NN_TYPE, sc)
MAP_ENUM(GEMM_TN_TYPE, isc) MAP_ENUM(GEMM_TN_TYPE, sc)
MAP_ENUM(GEMM_NT_TYPE, isc) MAP_ENUM(GEMM_NT_TYPE, sc)
MAP_ENUM(GEMM_TT_TYPE, isc); MAP_ENUM(GEMM_TT_TYPE, sc);
#define ADD_SCALAR_HANDLING(OP)\ #define ADD_SCALAR_HANDLING(OP)\
.def(bp::self OP int())\ .def(bp::self OP int())\
.def(bp::self OP long())\ .def(bp::self OP long())\
.def(bp::self OP double())\ .def(bp::self OP double())\
.def(bp::self OP bp::other<isc::value_scalar>())\ .def(bp::self OP bp::other<sc::value_scalar>())\
.def(int() OP bp::self)\ .def(int() OP bp::self)\
.def(long() OP bp::self)\ .def(long() OP bp::self)\
.def(double() OP bp::self)\ .def(double() OP bp::self)\
.def(bp::other<isc::value_scalar>() OP bp::self) .def(bp::other<sc::value_scalar>() OP bp::self)
#define ADD_ARRAY_OPERATOR(OP)\ #define ADD_ARRAY_OPERATOR(OP)\
.def(bp::self OP bp::self)\ .def(bp::self OP bp::self)\
ADD_SCALAR_HANDLING(OP) ADD_SCALAR_HANDLING(OP)
bp::class_<isc::expressions_tuple> bp::class_<sc::expressions_tuple>
("array_expression_container", bp::init<isc::array_expression const &>()) ("array_expression_container", bp::init<sc::array_expression const &>())
; ;
bp::class_<isc::array_expression >("array_expression", bp::no_init) bp::class_<sc::array_expression >("array_expression", bp::no_init)
ADD_ARRAY_OPERATOR(+) ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-) ADD_ARRAY_OPERATOR(-)
ADD_ARRAY_OPERATOR(*) ADD_ARRAY_OPERATOR(*)
@@ -291,7 +291,7 @@ void export_core()
ADD_ARRAY_OPERATOR(<=) ADD_ARRAY_OPERATOR(<=)
ADD_ARRAY_OPERATOR(==) ADD_ARRAY_OPERATOR(==)
ADD_ARRAY_OPERATOR(!=) ADD_ARRAY_OPERATOR(!=)
.add_property("context", bp::make_function(&isc::array_expression::context, bp::return_internal_reference<>())) .add_property("context", bp::make_function(&sc::array_expression::context, bp::return_internal_reference<>()))
.def(bp::self_ns::abs(bp::self)) .def(bp::self_ns::abs(bp::self))
// .def(bp::self_ns::pow(bp::self)) // .def(bp::self_ns::pow(bp::self))
; ;
@@ -299,18 +299,18 @@ void export_core()
#define ADD_ARRAY_OPERATOR(OP) \ #define ADD_ARRAY_OPERATOR(OP) \
.def(bp::self OP bp::self)\ .def(bp::self OP bp::self)\
.def(bp::self OP bp::other<isc::array_expression>())\ .def(bp::self OP bp::other<sc::array_expression>())\
.def(bp::other<isc::array_expression>() OP bp::self) \ .def(bp::other<sc::array_expression>() OP bp::self) \
ADD_SCALAR_HANDLING(OP) ADD_SCALAR_HANDLING(OP)
bp::class_<isc::array, bp::class_<sc::array,
std::shared_ptr<isc::array> > std::shared_ptr<sc::array> >
( "array", bp::no_init) ( "array", bp::no_init)
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object()))) .def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object())))
.def(bp::init<isc::array_expression>()) .def(bp::init<sc::array_expression>())
.add_property("dtype", &isc::array::dtype) .add_property("dtype", &sc::array::dtype)
.add_property("context", bp::make_function(&isc::array::context, bp::return_internal_reference<>())) .add_property("context", bp::make_function(&sc::array::context, bp::return_internal_reference<>()))
.add_property("T", &isc::array::T) .add_property("T", &sc::array::T)
.add_property("shape", &detail::get_shape) .add_property("shape", &detail::get_shape)
ADD_ARRAY_OPERATOR(+) ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-) ADD_ARRAY_OPERATOR(-)
@@ -327,7 +327,7 @@ void export_core()
.def(bp::self_ns::str(bp::self_ns::self)) .def(bp::self_ns::str(bp::self_ns::self))
; ;
bp::class_<isc::scalar, bp::bases<isc::array> > bp::class_<sc::scalar, bp::bases<sc::array> >
("scalar", bp::no_init) ("scalar", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=bp::object()))) .def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=bp::object())))
; ;
@@ -336,15 +336,15 @@ void export_core()
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object())); bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
//Assign //Assign
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::assign));\ bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::assign));\
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::assign));\ bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::assign));\
//Binary //Binary
#define MAP_FUNCTION(name) \ #define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::name));\ bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array const &)>(&isc::name));\ bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::name));\ bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array_expression const &)>(&isc::name)); bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array_expression const &)>(&sc::name));
MAP_FUNCTION(maximum) MAP_FUNCTION(maximum)
MAP_FUNCTION(minimum) MAP_FUNCTION(minimum)
@@ -354,8 +354,8 @@ void export_core()
//Unary //Unary
#define MAP_FUNCTION(name) \ #define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &)>(&isc::name));\ bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &)>(&isc::name)); bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &)>(&sc::name));
bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object())); bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
@@ -380,8 +380,8 @@ void export_core()
/*--- Reduction operators----*/ /*--- Reduction operators----*/
//--------------------------------------- //---------------------------------------
#define MAP_FUNCTION(name) \ #define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::int_t)>(&isc::name));\ bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::int_t)>(&sc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::int_t)>(&isc::name)); bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::int_t)>(&sc::name));
MAP_FUNCTION(sum) MAP_FUNCTION(sum)
MAP_FUNCTION(max) MAP_FUNCTION(max)
@@ -392,7 +392,7 @@ void export_core()
/*--- Profiles----*/ /*--- Profiles----*/
//--------------------------------------- //---------------------------------------
bp::class_<isc::profiles::map_type>("profiles") bp::class_<sc::profiles::map_type>("profiles")
.def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>()) .def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>())
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>()) .def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
; ;

View File

@@ -10,7 +10,7 @@
namespace detail namespace detail
{ {
bp::list nv_compute_capability(isc::driver::Device const & device) bp::list nv_compute_capability(sc::driver::Device const & device)
{ {
bp::list res; bp::list res;
std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability(); std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability();
@@ -21,63 +21,63 @@ namespace detail
bp::list get_platforms() bp::list get_platforms()
{ {
std::vector<isc::driver::Platform> platforms; std::vector<sc::driver::Platform> platforms;
isc::driver::backend::platforms(platforms); sc::driver::backend::platforms(platforms);
return tools::to_list(platforms.begin(), platforms.end()); return tools::to_list(platforms.begin(), platforms.end());
} }
bp::list get_devices(isc::driver::Platform const & platform) bp::list get_devices(sc::driver::Platform const & platform)
{ {
std::vector<isc::driver::Device> devices; std::vector<sc::driver::Device> devices;
platform.devices(devices); platform.devices(devices);
return tools::to_list(devices.begin(), devices.end()); return tools::to_list(devices.begin(), devices.end());
} }
bp::list get_queues(isc::driver::Context const & context) bp::list get_queues(sc::driver::Context const & context)
{ {
std::vector<isc::driver::CommandQueue*> queues; std::vector<sc::driver::CommandQueue*> queues;
isc::driver::backend::queues::get(context, queues); sc::driver::backend::queues::get(context, queues);
bp::list res; bp::list res;
for(isc::driver::CommandQueue* queue:queues) for(sc::driver::CommandQueue* queue:queues)
res.append(*queue); res.append(*queue);
return res; return res;
} }
std::shared_ptr< isc::driver::CommandQueue> create_queue(isc::driver::Context const & context, isc::driver::Device const & device) std::shared_ptr< sc::driver::CommandQueue> create_queue(sc::driver::Context const & context, sc::driver::Device const & device)
{ {
return std::shared_ptr<isc::driver::CommandQueue>(new isc::driver::CommandQueue(context, device)); return std::shared_ptr<sc::driver::CommandQueue>(new sc::driver::CommandQueue(context, device));
} }
std::string to_string(isc::driver::device_type type) std::string to_string(sc::driver::device_type type)
{ {
if(type==isc::driver::DEVICE_TYPE_CPU) return "CPU"; if(type==sc::driver::DEVICE_TYPE_CPU) return "CPU";
if(type==isc::driver::DEVICE_TYPE_GPU) return "GPU"; if(type==sc::driver::DEVICE_TYPE_GPU) return "GPU";
if(type==isc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR"; if(type==sc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
throw; throw;
} }
std::shared_ptr<isc::driver::Context> make_context(isc::driver::Device const & dev) std::shared_ptr<sc::driver::Context> make_context(sc::driver::Device const & dev)
{ return std::shared_ptr<isc::driver::Context>(new isc::driver::Context(dev)); } { return std::shared_ptr<sc::driver::Context>(new sc::driver::Context(dev)); }
bp::object enqueue(isc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile) bp::object enqueue(sc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
{ {
std::list<isc::driver::Event> events; std::list<sc::driver::Event> events;
std::vector<isc::driver::Event> cdependencies = tools::to_vector<isc::driver::Event>(dependencies); std::vector<sc::driver::Event> cdependencies = tools::to_vector<sc::driver::Event>(dependencies);
isc::execution_options_type execution_options(queue_id, &events, &cdependencies); sc::execution_options_type execution_options(queue_id, &events, &cdependencies);
isc::dispatcher_options_type dispatcher_options(tune, label); sc::dispatcher_options_type dispatcher_options(tune, label);
isc::compilation_options_type compilation_options(program_name, force_recompile); sc::compilation_options_type compilation_options(program_name, force_recompile);
isc::array_expression::container_type::value_type root = expression.tree()[expression.root()]; sc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
if(isc::detail::is_assignment(root.op)) if(sc::detail::is_assignment(root.op))
{ {
isc::execute(isc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context()))); sc::execute(sc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end())); return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
} }
else else
{ {
std::shared_ptr<isc::array> parray(new isc::array(isc::control(expression, execution_options, dispatcher_options, compilation_options))); std::shared_ptr<sc::array> parray(new sc::array(sc::control(expression, execution_options, dispatcher_options, compilation_options)));
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end())); return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
} }
} }
@@ -88,7 +88,7 @@ default_driver_values_type default_driver_parameters;
void export_driver() void export_driver()
{ {
typedef std::vector<isc::driver::CommandQueue> queues_t; typedef std::vector<sc::driver::CommandQueue> queues_t;
bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver")))); bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver"))));
bp::scope().attr("driver") = driver_module; bp::scope().attr("driver") = driver_module;
@@ -103,58 +103,58 @@ void export_driver()
bp::enum_<isc::driver::backend_type> bp::enum_<sc::driver::backend_type>
("backend_type") ("backend_type")
.value("OPENCL", isc::driver::OPENCL) .value("OPENCL", sc::driver::OPENCL)
#ifdef ISAAC_WITH_CUDA #ifdef ISAAC_WITH_CUDA
.value("CUDA", isc::driver::CUDA) .value("CUDA", sc::driver::CUDA)
#endif #endif
; ;
bp::enum_<isc::driver::device_type> bp::enum_<sc::driver::device_type>
("device_type") ("device_type")
.value("DEVICE_TYPE_GPU", isc::driver::DEVICE_TYPE_GPU) .value("DEVICE_TYPE_GPU", sc::driver::DEVICE_TYPE_GPU)
.value("DEVICE_TYPE_CPU", isc::driver::DEVICE_TYPE_CPU) .value("DEVICE_TYPE_CPU", sc::driver::DEVICE_TYPE_CPU)
; ;
bp::class_<isc::driver::Platform>("platform", bp::no_init) bp::class_<sc::driver::Platform>("platform", bp::no_init)
.def("get_devices", &detail::get_devices) .def("get_devices", &detail::get_devices)
.add_property("name",&isc::driver::Platform::name) .add_property("name",&sc::driver::Platform::name)
; ;
bp::enum_<isaac::driver::Device::Vendor> bp::enum_<isaac::driver::Device::Vendor>
("vendor") ("vendor")
.value("AMD", isc::driver::Device::Vendor::AMD) .value("AMD", sc::driver::Device::Vendor::AMD)
.value("INTEL", isc::driver::Device::Vendor::INTEL) .value("INTEL", sc::driver::Device::Vendor::INTEL)
.value("NVIDIA", isc::driver::Device::Vendor::NVIDIA) .value("NVIDIA", sc::driver::Device::Vendor::NVIDIA)
.value("UNKNOWN", isc::driver::Device::Vendor::UNKNOWN) .value("UNKNOWN", sc::driver::Device::Vendor::UNKNOWN)
; ;
bp::class_<isc::driver::Device>("device", bp::no_init) bp::class_<sc::driver::Device>("device", bp::no_init)
.add_property("clock_rate", &isc::driver::Device::clock_rate) .add_property("clock_rate", &sc::driver::Device::clock_rate)
.add_property("name", &isc::driver::Device::name) .add_property("name", &sc::driver::Device::name)
.add_property("type", &isc::driver::Device::type) .add_property("type", &sc::driver::Device::type)
.add_property("platform", &isc::driver::Device::platform) .add_property("platform", &sc::driver::Device::platform)
.add_property("vendor", &isc::driver::Device::vendor) .add_property("vendor", &sc::driver::Device::vendor)
.add_property("nv_compute_capability", &detail::nv_compute_capability) .add_property("nv_compute_capability", &detail::nv_compute_capability)
; ;
bp::class_<isc::driver::Context, boost::noncopyable>("context", bp::no_init) bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)
.def("__init__", bp::make_constructor(&detail::make_context)) .def("__init__", bp::make_constructor(&detail::make_context))
.def("synchronize", &isc::driver::backend::synchronize) .def("synchronize", &sc::driver::backend::synchronize)
.add_property("queues", &detail::get_queues) .add_property("queues", &detail::get_queues)
.add_property("backend", &isc::driver::Context::backend) .add_property("backend", &sc::driver::Context::backend)
; ;
bp::class_<isc::driver::CommandQueue>("command_queue", bp::init<isc::driver::Context const &, isc::driver::Device const &>()) bp::class_<sc::driver::CommandQueue>("command_queue", bp::init<sc::driver::Context const &, sc::driver::Device const &>())
.def("synchronize", &isc::driver::CommandQueue::synchronize) .def("synchronize", &sc::driver::CommandQueue::synchronize)
.add_property("profiles", bp::make_function(&isc::profiles::get, bp::return_internal_reference<>())) .add_property("profiles", bp::make_function(&sc::profiles::get, bp::return_internal_reference<>()))
.add_property("device", bp::make_function(&isc::driver::CommandQueue::device, bp::return_internal_reference<>())) .add_property("device", bp::make_function(&sc::driver::CommandQueue::device, bp::return_internal_reference<>()))
; ;
bp::class_<isc::driver::Event>("event", bp::init<isc::driver::backend_type>()) bp::class_<sc::driver::Event>("event", bp::init<sc::driver::backend_type>())
.add_property("elapsed_time", &isc::driver::Event::elapsed_time) .add_property("elapsed_time", &sc::driver::Event::elapsed_time)
; ;
bp::def("device_type_to_string", &detail::to_string); bp::def("device_type_to_string", &detail::to_string);
@@ -164,8 +164,8 @@ void export_driver()
bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false)); bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::class_<default_driver_values_type>("default_type") bp::class_<default_driver_values_type>("default_type")
.def_readwrite("queue_properties",&isc::driver::backend::default_queue_properties) .def_readwrite("queue_properties",&sc::driver::backend::default_queue_properties)
.def_readwrite("device", &isc::driver::backend::default_device) .def_readwrite("device", &sc::driver::backend::default_device)
; ;
bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters)); bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters));

View File

@@ -13,7 +13,7 @@ namespace tpt = isaac::templates;
namespace detail namespace detail
{ {
bp::list input_sizes(tpt::base & temp, isc::expressions_tuple const & tree) bp::list input_sizes(tpt::base & temp, sc::expressions_tuple const & tree)
{ {
std::vector<int> tmp = temp.input_sizes(tree); std::vector<int> tmp = temp.input_sizes(tree);
return tools::to_list(tmp.begin(), tmp.end()); return tools::to_list(tmp.begin(), tmp.end());

View File

@@ -4,19 +4,19 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace isc = isaac; namespace sc = isaac;
typedef isaac::int_t int_t; typedef isaac::int_t int_t;
template<typename T> template<typename T>
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz, void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
isc::array& x, isc::array& y, isc::array& z) sc::array& x, sc::array& y, sc::array& z)
{ {
using namespace std; using namespace std;
int failure_count = 0; int failure_count = 0;
isc::numeric_type dtype = x.dtype(); sc::numeric_type dtype = x.dtype();
isc::driver::Context const & context = x.context(); sc::driver::Context const & context = x.context();
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(context,0); sc::driver::CommandQueue queue = sc::driver::backend::queues::get(context,0);
cl_command_queue clqueue = queue.handle().cl(); cl_command_queue clqueue = queue.handle().cl();
int_t N = cz.size(); int_t N = cz.size();
@@ -113,7 +113,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
} }
template<typename T> template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx) void test_impl(T epsilon, sc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -140,11 +140,11 @@ int main()
{ {
clblasSetup(); clblasSetup();
std::list<isaac::driver::Context const *> data; std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data); sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data) for(isaac::driver::Context const * context : data)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
if(device.type() != isc::driver::DEVICE_TYPE_GPU) if(device.type() != sc::driver::DEVICE_TYPE_GPU)
continue; continue;
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;

View File

@@ -5,23 +5,23 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace isc = isaac; namespace sc = isaac;
typedef isc::int_t int_t; typedef sc::int_t int_t;
template<typename T> template<typename T>
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy, void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
isc::array & x, isc::array & y) sc::array & x, sc::array & y)
{ {
using namespace std; using namespace std;
isc::driver::Context const & ctx = x.context(); sc::driver::Context const & ctx = x.context();
int_t N = cx.size(); int_t N = cx.size();
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(ctx,0); sc::driver::CommandQueue queue = sc::driver::backend::queues::get(ctx,0);
cl_command_queue clqueue = queue.handle().cl(); cl_command_queue clqueue = queue.handle().cl();
isc::array scratch(N, x.dtype()); sc::array scratch(N, x.dtype());
unsigned int failure_count = 0; unsigned int failure_count = 0;
isaac::numeric_type dtype = isc::to_numeric_type<T>::value; isaac::numeric_type dtype = sc::to_numeric_type<T>::value;
T cs = 0; T cs = 0;
T tmp = 0; T tmp = 0;
@@ -67,7 +67,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
} }
template<typename T> template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx) void test_impl(T epsilon, sc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -92,10 +92,10 @@ int main()
{ {
clblasSetup(); clblasSetup();
std::list<isaac::driver::Context const *> data; std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data); sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data) for(isaac::driver::Context const * context : data)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -3,23 +3,23 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace isc = isaac; namespace sc = isaac;
template<typename T> template<typename T>
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB, void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
isc::array & C, isc::array const & A, isc::array const & AT, isc::array const & B, isc::array const & BT, sc::array & C, sc::array const & A, sc::array const & AT, sc::array const & B, sc::array const & BT,
interface_t interf, const char * prefix) interface_t interf, const char * prefix)
{ {
int failure_count = 0; int failure_count = 0;
isc::int_t M = C.shape()[0]; sc::int_t M = C.shape()[0];
isc::int_t N = C.shape()[1]; sc::int_t N = C.shape()[1];
isc::int_t K = A.shape()[1]; sc::int_t K = A.shape()[1];
T alpha = 1; T alpha = 1;
T beta = 0; T beta = 0;
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(C.context(),0); sc::driver::CommandQueue queue = sc::driver::backend::queues::get(C.context(),0);
for(int i = 0 ; i < M ; ++i) for(int i = 0 ; i < M ; ++i)
{ {
@@ -43,7 +43,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\ std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
GPU_OP;\ GPU_OP;\
queue.synchronize();\ queue.synchronize();\
isc::copy(C, buffer);\ sc::copy(C, buffer);\
if(diff(buffer, cCbuffer, epsilon))\ if(diff(buffer, cCbuffer, epsilon))\
{\ {\
failure_count++;\ failure_count++;\
@@ -94,7 +94,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
} }
template<typename T> template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx) void test_impl(T epsilon, sc::driver::Context const & ctx)
{ {
int_t M = 173; int_t M = 173;
int_t N = 241; int_t N = 241;
@@ -126,11 +126,11 @@ int main()
{ {
clblasSetup(); clblasSetup();
std::list<isaac::driver::Context const *> data; std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data); sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data) for(isaac::driver::Context const * context : data)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
if(device.type() != isc::driver::DEVICE_TYPE_GPU) if(device.type() != sc::driver::DEVICE_TYPE_GPU)
continue; continue;
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;

View File

@@ -5,16 +5,16 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/wrap/clBLAS.h" #include "isaac/wrap/clBLAS.h"
namespace isc = isaac; namespace sc = isaac;
template<typename T> template<typename T>
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx, void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
isc::array & y, isc::array const & A, isc::array & x, interface_t interf, const char * prefix) sc::array & y, sc::array const & A, sc::array & x, interface_t interf, const char * prefix)
{ {
int failure_count = 0; int failure_count = 0;
isc::int_t M = A.shape()[0]; sc::int_t M = A.shape()[0];
isc::int_t N = A.shape()[1]; sc::int_t N = A.shape()[1];
simple_vector<T> bufy(M); simple_vector<T> bufy(M);
simple_vector<T> bufx(N); simple_vector<T> bufx(N);
@@ -22,7 +22,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
T alpha = static_cast<T>(4.2); T alpha = static_cast<T>(4.2);
T beta = static_cast<T>(5.6); T beta = static_cast<T>(5.6);
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(y.context(),0); sc::driver::CommandQueue queue = sc::driver::backend::queues::get(y.context(),0);
T yi = 0, xi = 0; T yi = 0, xi = 0;
#define TEST_OPERATION(NAME, SIZE1, SIZE2, NEUTRAL, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\ #define TEST_OPERATION(NAME, SIZE1, SIZE2, NEUTRAL, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
@@ -37,7 +37,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
}\ }\
GPU_REDUCTION;\ GPU_REDUCTION;\
queue.synchronize();\ queue.synchronize();\
isc::copy(RES, BUF.data());\ sc::copy(RES, BUF.data());\
if(diff(CRES, BUF, epsilon))\ if(diff(CRES, BUF, epsilon))\
{\ {\
failure_count++;\ failure_count++;\
@@ -90,7 +90,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
} }
template<typename T> template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx) void test_impl(T epsilon, sc::driver::Context const & ctx)
{ {
int_t M = 173; int_t M = 173;
int_t N = 241; int_t N = 241;
@@ -116,10 +116,10 @@ int main()
{ {
clblasSetup(); clblasSetup();
std::list<isaac::driver::Context const *> data; std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data); sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data) for(isaac::driver::Context const * context : data)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -2,18 +2,18 @@
#include "common.hpp" #include "common.hpp"
#include "isaac/array.h" #include "isaac/array.h"
namespace isc = isaac; namespace sc = isaac;
typedef isaac::int_t int_t; typedef isaac::int_t int_t;
template<typename T> template<typename T>
void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy, void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy,
isc::array& A, isc::array& B, isc::array& C, isc::array& x, isc::array& y) sc::array& A, sc::array& B, sc::array& C, sc::array& x, sc::array& y)
{ {
using namespace std; using namespace std;
int failure_count = 0; int failure_count = 0;
isc::numeric_type dtype = C.dtype(); sc::numeric_type dtype = C.dtype();
isc::driver::Context const & ctx = C.context(); sc::driver::Context const & ctx = C.context();
int_t M = cC.size1(); int_t M = cC.size1();
int_t N = cC.size2(); int_t N = cC.size2();
@@ -100,7 +100,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
} }
template<typename T> template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx) void test_impl(T epsilon, sc::driver::Context const & ctx)
{ {
using isaac::_; using isaac::_;
@@ -126,10 +126,10 @@ void test_impl(T epsilon, isc::driver::Context const & ctx)
int main() int main()
{ {
std::list<isaac::driver::Context const *> data; std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data); sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data) for(isaac::driver::Context const * context : data)
{ {
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device(); sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl; std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl; std::cout << ">> float" << std::endl;

View File

@@ -1,4 +1,4 @@
import isaac as isc import isaac as sc
import random import random
from copy import deepcopy from copy import deepcopy
@@ -14,10 +14,10 @@ from numpy import cumsum
import tools import tools
fetch_types = [isc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS, fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
isc.templates.FETCH_FROM_GLOBAL_STRIDED, sc.templates.FETCH_FROM_GLOBAL_STRIDED,
isc.templates.FETCH_FROM_LOCAL, sc.templates.FETCH_FROM_LOCAL,
isc.templates.FETCH_FROM_LOCAL] sc.templates.FETCH_FROM_LOCAL]
def exhaustive(template, sizes, context): def exhaustive(template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context) tree, _ = tools.tree_of(template, sizes, context)
@@ -34,7 +34,7 @@ def exhaustive(template, sizes, context):
time = tools.benchmark(template, parameters, tree) time = tools.benchmark(template, parameters, tree)
if not best or time < best[1]: if not best or time < best[1]:
best = parameters, time best = parameters, time
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass pass
if best: if best:
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0])) stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
@@ -100,7 +100,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
try: try:
individual.fitness.values = toolbox.evaluate(genome) individual.fitness.values = toolbox.evaluate(genome)
population += [individual] population += [individual]
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure ): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ):
pass pass
genome = encode(list(initializer.next())) genome = encode(list(initializer.next()))
hof.update(population) hof.update(population)
@@ -134,7 +134,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
#Reproduction #Reproduction
else: else:
offspring += [random.choice(population)] offspring += [random.choice(population)]
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass pass
@@ -159,21 +159,21 @@ def is_local_optimum(parameters, template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context) tree, _ = tools.tree_of(template, sizes, context)
genetic_infos = tools.genetic_infos_of(template) genetic_infos = tools.genetic_infos_of(template)
if issubclass(template, isc.templates.axpy): if issubclass(template, sc.templates.axpy):
sweep_over = [0,1,2] sweep_over = [0,1,2]
elif issubclass(template, isc.templates.dot): elif issubclass(template, sc.templates.dot):
sweep_over = [0,1,2] sweep_over = [0,1,2]
elif issubclass(template, isc.templates.ger): elif issubclass(template, sc.templates.ger):
sweep_over = [0,1,2,3,4] sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.templates.gemv): elif issubclass(template, sc.templates.gemv):
sweep_over = [0,1,2,3,4] sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.templates.gemm): elif issubclass(template, sc.templates.gemm):
sweep_over = [1,3,5,7] sweep_over = [1,3,5,7]
#Evaluate the provided parameters guess #Evaluate the provided parameters guess
try: try:
reference = tools.benchmark(template, parameters, tree) reference = tools.benchmark(template, parameters, tree)
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
return False return False
#Latency bound -- ignore #Latency bound -- ignore
@@ -190,7 +190,7 @@ def is_local_optimum(parameters, template, sizes, context):
time = tools.benchmark(template, x, tree) time = tools.benchmark(template, x, tree)
if time/reference < .97: if time/reference < .97:
return False return False
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass pass
return True return True

View File

@@ -1,4 +1,4 @@
import isaac as isc import isaac as sc
from numpy import mean, median from numpy import mean, median
from math import ceil, exp, log, sqrt from math import ceil, exp, log, sqrt
@@ -21,13 +21,13 @@ def expspace(a,b,N,r=128):
def benchmark(template, setting, tree): def benchmark(template, setting, tree):
queue = tree.context.queues[0] queue = tree.context.queues[0]
queue.profiles[template, isc.float32] = isc.profile(template(*setting), isc.float32, queue) queue.profiles[template, sc.float32] = sc.profile(template(*setting), sc.float32, queue)
times = [] times = []
total = 0 total = 0
i = 0 i = 0
while total < 1e-2: while total < 1e-2:
#z = isc.zeros(1, 10000000, isc.float32, tree.context) #z = sc.zeros(1, 10000000, sc.float32, tree.context)
z, events = isc.driver.enqueue(tree) z, events = sc.driver.enqueue(tree)
tree.context.queues[0].synchronize() tree.context.queues[0].synchronize()
times.append(1e-9*sum([e.elapsed_time for e in events])) times.append(1e-9*sum([e.elapsed_time for e in events]))
total += times[-1] total += times[-1]
@@ -36,67 +36,67 @@ def benchmark(template, setting, tree):
def tree_of(template, sizes, context): def tree_of(template, sizes, context):
if issubclass(template, isc.templates.axpy): if issubclass(template, sc.templates.axpy):
N, = sizes N, = sizes
x = isc.empty(N, dtype=isc.float32, context=context) x = sc.empty(N, dtype=sc.float32, context=context)
y = isc.empty(N, dtype=isc.float32, context=context) y = sc.empty(N, dtype=sc.float32, context=context)
return x + y, (x, y) return x + y, (x, y)
elif issubclass(template, isc.templates.dot): elif issubclass(template, sc.templates.dot):
N, = sizes N, = sizes
x = isc.empty(N, context=context) x = sc.empty(N, context=context)
y = isc.empty(N, context=context) y = sc.empty(N, context=context)
return isc.dot(x, y), (x, y) return sc.dot(x, y), (x, y)
elif issubclass(template, isc.templates.ger): elif issubclass(template, sc.templates.ger):
M, N = sizes M, N = sizes
A = isc.empty((M,N), context=context) A = sc.empty((M,N), context=context)
B = isc.empty((M,N), context=context) B = sc.empty((M,N), context=context)
return A + B, (A, B) return A + B, (A, B)
elif issubclass(template, isc.templates.gemv): elif issubclass(template, sc.templates.gemv):
T = template is isc.templates.gemv_t T = template is sc.templates.gemv_t
M, N = sizes[::-1] if T else sizes M, N = sizes[::-1] if T else sizes
A = isc.empty((M,N), context=context) A = sc.empty((M,N), context=context)
x = isc.empty(N, context=context) x = sc.empty(N, context=context)
return isc.dot(A.T, x) if T else isc.dot(A, x), (A, x) return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x)
elif issubclass(template, isc.templates.gemm): elif issubclass(template, sc.templates.gemm):
AT = template is isc.templates.gemm_tn or template is isc.templates.gemm_tt AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
BT = template is isc.templates.gemm_nt or template is isc.templates.gemm_tt BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
M, N, K = sizes M, N, K = sizes
A = isc.empty((K, M) if AT else (M, K), context=context) A = sc.empty((K, M) if AT else (M, K), context=context)
B = isc.empty((N, K) if BT else (K, N), context=context) B = sc.empty((N, K) if BT else (K, N), context=context)
AA = A.T if AT else A AA = A.T if AT else A
BB = B.T if BT else B BB = B.T if BT else B
return isc.dot(AA, BB), (A, B) return sc.dot(AA, BB), (A, B)
def memory_footprint(template, sizes): def memory_footprint(template, sizes):
if issubclass(template, isc.templates.axpy): if issubclass(template, sc.templates.axpy):
return 4*3*sizes[0]*1e-9 return 4*3*sizes[0]*1e-9
elif issubclass(template, isc.templates.dot): elif issubclass(template, sc.templates.dot):
return 4*2*sizes[0]*1e-9 return 4*2*sizes[0]*1e-9
elif issubclass(template, isc.templates.ger): elif issubclass(template, sc.templates.ger):
return 4*3*sizes[0]*sizes[1]*1e-9 return 4*3*sizes[0]*sizes[1]*1e-9
elif issubclass(template, isc.templates.gemv): elif issubclass(template, sc.templates.gemv):
return 4*sizes[0]*sizes[1]*1e-9 return 4*sizes[0]*sizes[1]*1e-9
elif issubclass(template, isc.templates.gemm): elif issubclass(template, sc.templates.gemm):
return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9 return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9
def metric_of(template): def metric_of(template):
memory_bound = [isc.templates.axpy, isc.templates.dot, isc.templates.ger, isc.templates.gemv] memory_bound = [sc.templates.axpy, sc.templates.dot, sc.templates.ger, sc.templates.gemv]
compute_bound = [isc.templates.gemm] compute_bound = [sc.templates.gemm]
if any([issubclass(template, x) for x in memory_bound]): if any([issubclass(template, x) for x in memory_bound]):
return lambda sizes, t: memory_footprint(template, sizes)/t return lambda sizes, t: memory_footprint(template, sizes)/t
elif any([issubclass(template, x) for x in compute_bound]): elif any([issubclass(template, x) for x in compute_bound]):
return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t
def genetic_infos_of(template): def genetic_infos_of(template):
if issubclass(template, isc.templates.axpy): if issubclass(template, sc.templates.axpy):
return {'categorical': [3], 'nbits': [3,4,4,2] } return {'categorical': [3], 'nbits': [3,4,4,2] }
elif issubclass(template, isc.templates.dot): elif issubclass(template, sc.templates.dot):
return {'categorical': [3], 'nbits':[3,4,4,2]} return {'categorical': [3], 'nbits':[3,4,4,2]}
elif issubclass(template, isc.templates.ger): elif issubclass(template, sc.templates.ger):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]} return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, isc.templates.gemv): elif issubclass(template, sc.templates.gemv):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]} return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, isc.templates.gemm): elif issubclass(template, sc.templates.gemm):
return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]} return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]}

View File

@@ -4,7 +4,7 @@ from itertools import chain, product
from numpy import argsort, argmax from numpy import argsort, argmax
from operator import mul from operator import mul
from sklearn import ensemble from sklearn import ensemble
import isaac as isc import isaac as sc
import optimize, tools, model import optimize, tools, model
from json import encoder from json import encoder
@@ -22,40 +22,40 @@ def pow2range(a, b):
def tune(device, operation, json_path): def tune(device, operation, json_path):
#List devices #List devices
platforms = isc.driver.get_platforms() platforms = sc.driver.get_platforms()
context = isc.driver.context(device) context = sc.driver.context(device)
#List of size tuples to use #List of size tuples to use
sizes = {} sizes = {}
sizes[isc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)] sizes[sc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
sizes[isc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17)) sizes[sc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
sizes[isc.templates.gemv_t] = sizes[isc.templates.gemv_n] sizes[sc.templates.gemv_t] = sizes[sc.templates.gemv_n]
sizes[isc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12)) sizes[sc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn] sizes[sc.templates.gemm_tn] = sizes[sc.templates.gemm_nn]
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn] sizes[sc.templates.gemm_nt] = sizes[sc.templates.gemm_nn]
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn] sizes[sc.templates.gemm_tt] = sizes[sc.templates.gemm_nn]
#Quick tuning - AlexNet sizes + Intuition #Quick tuning - AlexNet sizes + Intuition
sizes[isc.templates.ger] = [(1536,1536)] sizes[sc.templates.ger] = [(1536,1536)]
sizes[isc.templates.gemv_n] = [(1000,256), sizes[sc.templates.gemv_n] = [(1000,256),
(4096,256)] (4096,256)]
sizes[isc.templates.gemv_t] = [(169,256), sizes[sc.templates.gemv_t] = [(169,256),
(169,384), (169,384),
(729,256), (729,256),
(3025,96)] (3025,96)]
sizes[isc.templates.gemm_nn] = [(3025,96,363), sizes[sc.templates.gemm_nn] = [(3025,96,363),
(729,128,1200), (729,128,1200),
(169,384,2304), (169,384,2304),
(169,192,1728), (169,192,1728),
(169,128,1728)] (169,128,1728)]
sizes[isc.templates.gemm_nt] = [(169,1728,128), sizes[sc.templates.gemm_nt] = [(169,1728,128),
(169,1728,192), (169,1728,192),
(169,2304,384), (169,2304,384),
(729,1200,128)] (729,1200,128)]
sizes[isc.templates.gemm_tn] = [(1728,128,169), sizes[sc.templates.gemm_tn] = [(1728,128,169),
(1728,192,169), (1728,192,169),
(2304,384,169), (2304,384,169),
(1200,128,729), (1200,128,729),
@@ -102,7 +102,7 @@ def tune(device, operation, json_path):
try: try:
time = tools.benchmark(operation, new, _tree) time = tools.benchmark(operation, new, _tree)
perf = performance(xx, time) perf = performance(xx, time)
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0 perf = 0
yy.append(0 if isinf(perf) else perf) yy.append(0 if isinf(perf) else perf)
#Update dataset #Update dataset
@@ -111,7 +111,7 @@ def tune(device, operation, json_path):
for ip, p in enumerate(profiles): for ip, p in enumerate(profiles):
try: try:
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree)) perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure): except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0 perf = 0
y.append(0 if isinf(perf) else perf) y.append(0 if isinf(perf) else perf)
X.append(x) X.append(x)
@@ -141,7 +141,7 @@ def tune(device, operation, json_path):
def parse_arguments(): def parse_arguments():
platforms = isc.driver.get_platforms() platforms = sc.driver.get_platforms()
devices = [d for platform in platforms for d in platform.get_devices()] devices = [d for platform in platforms for d in platform.get_devices()]
#Command line arguments #Command line arguments
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@@ -156,20 +156,20 @@ def parse_arguments():
print("----------------") print("----------------")
for (i, d) in enumerate(devices): for (i, d) in enumerate(devices):
selected = '[' + ('x' if device==d else ' ') + ']' selected = '[' + ('x' if device==d else ' ') + ']'
print selected , '-', isc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name print selected , '-', sc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
print("----------------") print("----------------")
operation = {'axpy': isc.templates.axpy, 'dot': isc.templates.dot, operation = {'axpy': sc.templates.axpy, 'dot': sc.templates.dot,
'ger': isc.templates.ger, 'gemv_n': isc.templates.gemv_n, 'gemv_t': isc.templates.gemv_t, 'ger': sc.templates.ger, 'gemv_n': sc.templates.gemv_n, 'gemv_t': sc.templates.gemv_t,
'gemm_nn': isc.templates.gemm_nn, 'gemm_tn': isc.templates.gemm_tn, 'gemm_nt': isc.templates.gemm_nt, 'gemm_tt':isc.templates.gemm_tt}[args.operation] 'gemm_nn': sc.templates.gemm_nn, 'gemm_tn': sc.templates.gemm_tn, 'gemm_nt': sc.templates.gemm_nt, 'gemm_tt':sc.templates.gemm_tt}[args.operation]
json = tools.sanitize(device.name) + '.json' if not args.json else args.json json = tools.sanitize(device.name) + '.json' if not args.json else args.json
return (device, operation, json) return (device, operation, json)
if __name__ == "__main__": if __name__ == "__main__":
isc.driver.default.queue_properties = isc.driver.PROFILING_ENABLE sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
args = parse_arguments() args = parse_arguments()
tune(*args) tune(*args)