Code quality: renamed isaac shortcut from isc to sc

This commit is contained in:
Philippe Tillet
2015-08-12 19:38:53 -07:00
parent b397d5306e
commit 71224a1507
13 changed files with 355 additions and 355 deletions

View File

@@ -18,8 +18,8 @@
#include "timer.hpp"
namespace isc = isaac;
typedef isc::int_t int_t;
namespace sc = isaac;
typedef sc::int_t int_t;
template<std::size_t> struct int_{};
@@ -86,11 +86,11 @@ T mean(std::vector<T> x)
return res/N;
}
static long time_event(long sum, isc::driver::Event const & e)
static long time_event(long sum, sc::driver::Event const & e)
{ return sum + e.elapsed_time();}
template<class T>
void bench(isc::numeric_type dtype, std::string operation)
void bench(sc::numeric_type dtype, std::string operation)
{
//
@@ -103,8 +103,8 @@ void bench(isc::numeric_type dtype, std::string operation)
std::vector<double> times;\
double total_time = 0;\
while(total_time*1e-9 < 1e-3){\
std::list<isc::driver::Event> events;\
flush = isc::zeros((isaac::int_t)1e6, 1, dtype);\
std::list<sc::driver::Event> events;\
flush = sc::zeros((isaac::int_t)1e6, 1, dtype);\
queue.synchronize();\
OP;\
queue.synchronize();\
@@ -121,11 +121,11 @@ void bench(isc::numeric_type dtype, std::string operation)
double total_time = 0;\
while(total_time*1e-9 < 1e-3){\
cl_event event;\
flush = isc::zeros(1e6, 1, dtype);\
flush = sc::zeros(1e6, 1, dtype);\
queue.synchronize();\
OP;\
queue.synchronize();\
times.push_back(isc::driver::Event(event).elapsed_time());\
times.push_back(sc::driver::Event(event).elapsed_time());\
total_time+=times.back();\
}\
double t = median(times);\
@@ -134,7 +134,7 @@ void bench(isc::numeric_type dtype, std::string operation)
#define BENCHMARK_HOST(OP, PERF) \
{\
isc::tools::timer tmr;\
sc::tools::timer tmr;\
double total_time = 0;\
std::vector<double> times;\
while(total_time < 1e-2){\
@@ -160,7 +160,7 @@ void bench(isc::numeric_type dtype, std::string operation)
OP;\
cudaThreadSynchronize();\
while(total_time*1e-3 < 1e-3){\
flush = isc::zeros(1e6, 1, dtype);\
flush = sc::zeros(1e6, 1, dtype);\
cudaEventRecord(start,0);\
OP;\
cudaEventRecord(stop,0);\
@@ -173,10 +173,10 @@ void bench(isc::numeric_type dtype, std::string operation)
std::cout << "\t" << PERF << std::flush;\
}
unsigned int dtsize = isc::size_of(dtype);
isc::driver::CommandQueue & queue = isc::driver::backend::queues::get(isc::driver::backend::contexts::get_default(),0);
unsigned int dtsize = sc::size_of(dtype);
sc::driver::CommandQueue & queue = sc::driver::backend::queues::get(sc::driver::backend::contexts::get_default(),0);
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
isc::array flush((int)1e6, isc::FLOAT_TYPE);
sc::array flush((int)1e6, sc::FLOAT_TYPE);
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
std::cout << "N";
std::cout << "\tISAAC";
@@ -204,10 +204,10 @@ void bench(isc::numeric_type dtype, std::string operation)
for(int_t N: create_log_range((int)1e3, (int)2e7, 50, 64))
{
std::cout << N;
isc::array x(N, dtype), y(N, dtype);
sc::array x(N, dtype), y(N, dtype);
/* ISAAC */
std::list<isc::driver::Event> events;
BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t)
std::list<sc::driver::Event> events;
BENCHMARK_ISAAC(y = sc::control(x + alpha*y, sc::execution_options_type(0, &events)), 3*N*dtsize/t)
/* clblas */
#ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t);
@@ -215,8 +215,8 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
isc::copy(x, cx);
isc::copy(y, cy);
sc::copy(x, cx);
sc::copy(y, cy);
BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
#endif
/* CuBLAS */
@@ -238,11 +238,11 @@ void bench(isc::numeric_type dtype, std::string operation)
{
std::cout << N;
/* ISAAC */
isc::array x(N, dtype), y(N, dtype);
isc::array scratch(N, dtype);
isc::scalar s(dtype);
sc::array x(N, dtype), y(N, dtype);
sc::array scratch(N, dtype);
sc::scalar s(dtype);
s = dot(x,y); queue.synchronize();
BENCHMARK_ISAAC(s = isc::control(dot(x,y), isc::execution_options_type(0, &events)), 2*N*dtsize/t)
BENCHMARK_ISAAC(s = sc::control(dot(x,y), sc::execution_options_type(0, &events)), 2*N*dtsize/t)
/* clblas */
#ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t)
@@ -250,8 +250,8 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cx(N), cy(N);
isc::copy(x, cx);
isc::copy(y, cy);
sc::copy(x, cx);
sc::copy(y, cy);
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
#endif
#ifdef BENCH_CUBLAS
@@ -294,19 +294,19 @@ void bench(isc::numeric_type dtype, std::string operation)
if(AT) std::swap(As1, As2);
/* ISAAC */
isc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
sc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
#ifdef HAS_A_BLAS
int_t lda = A.ld();
#endif
BENCHMARK_ISAAC(y = isc::control(AT?dot(A.T(),x):dot(A,x), isc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
BENCHMARK_ISAAC(y = sc::control(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
#ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
#endif
#ifdef BENCH_CBLAS
std::vector<float> cA(M*N), cx(N), cy(M);
isc::copy(x, cx);
isc::copy(y, cy);
isc::copy(A, cA);
sc::copy(x, cx);
sc::copy(y, cy);
sc::copy(A, cA);
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
#endif
#ifdef BENCH_CUBLAS
@@ -369,11 +369,11 @@ void bench(isc::numeric_type dtype, std::string operation)
int_t Bs1 = K, Bs2 = N;
if(BT) std::swap(Bs1, Bs2);
isc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
sc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
#ifdef HAS_A_BLAS
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
#endif
BENCHMARK_ISAAC(C = isc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), isc::execution_options_type(0, &events)), (double)2*M*N*K/t);
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events)), (double)2*M*N*K/t);
/* clblas */
#ifdef BENCH_CLBLAS
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
@@ -382,9 +382,9 @@ void bench(isc::numeric_type dtype, std::string operation)
/* BLAS */
#ifdef BENCH_CBLAS
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
isc::copy(C, cC);
isc::copy(A, cA);
isc::copy(B, cB);
sc::copy(C, cC);
sc::copy(A, cA);
sc::copy(B, cB);
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
#endif
#ifdef BENCH_CUBLAS
@@ -409,11 +409,11 @@ int main(int argc, char* argv[])
#ifdef BENCH_CLBLAS
clblasSetup();
#endif
isc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
sc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
int device_idx = 0;
std::list<isc::driver::Context const *> contexts;
isc::driver::backend::contexts::get(contexts);
std::list<sc::driver::Context const *> contexts;
sc::driver::backend::contexts::get(contexts);
std::string operation;
if(contexts.size() > 1)
@@ -423,9 +423,9 @@ int main(int argc, char* argv[])
std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl;
std::cout << "Devices available: " << std::endl;
unsigned int current=0;
for(isc::driver::Context const * context: contexts)
for(sc::driver::Context const * context: contexts)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
}
exit(EXIT_FAILURE);
@@ -443,10 +443,10 @@ int main(int argc, char* argv[])
operation = args[1];
}
isc::driver::backend::default_device = device_idx;
sc::driver::backend::default_device = device_idx;
std::cout << "#Benchmark : BLAS" << std::endl;
std::cout << "#----------------" << std::endl;
bench<float>(isc::FLOAT_TYPE, operation);
bench<float>(sc::FLOAT_TYPE, operation);
#ifdef BENCH_CLBLAS
clblasTeardown();

View File

@@ -10,7 +10,7 @@
#define MAP_ENUM(v, ns) .value(#v, ns::v)
namespace bp = boost::python;
namespace isc = isaac;
namespace sc = isaac;
namespace np = boost::numpy;
namespace tools
@@ -36,7 +36,7 @@ namespace tools
}
inline isc::numeric_type extract_dtype(bp::object const & odtype)
inline sc::numeric_type extract_dtype(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class")
@@ -44,16 +44,16 @@ namespace tools
else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="int8") return isc::CHAR_TYPE;
else if(name=="uint8") return isc::UCHAR_TYPE;
else if(name=="int16") return isc::SHORT_TYPE;
else if(name=="uint16") return isc::USHORT_TYPE;
else if(name=="int32") return isc::INT_TYPE;
else if(name=="uint32") return isc::UINT_TYPE;
else if(name=="int64") return isc::LONG_TYPE;
else if(name=="uint64") return isc::ULONG_TYPE;
else if(name=="float32") return isc::FLOAT_TYPE;
else if(name=="float64") return isc::DOUBLE_TYPE;
if(name=="int8") return sc::CHAR_TYPE;
else if(name=="uint8") return sc::UCHAR_TYPE;
else if(name=="int16") return sc::SHORT_TYPE;
else if(name=="uint16") return sc::USHORT_TYPE;
else if(name=="int32") return sc::INT_TYPE;
else if(name=="uint32") return sc::UINT_TYPE;
else if(name=="int64") return sc::LONG_TYPE;
else if(name=="uint64") return sc::ULONG_TYPE;
else if(name=="float32") return sc::FLOAT_TYPE;
else if(name=="float64") return sc::DOUBLE_TYPE;
else
{
PyErr_SetString(PyExc_TypeError, "Data type not understood");
@@ -62,7 +62,7 @@ namespace tools
}
}
inline isc::expression_type extract_template_type(bp::object const & odtype)
inline sc::expression_type extract_template_type(bp::object const & odtype)
{
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="class")
@@ -70,15 +70,15 @@ namespace tools
else
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
if(name=="axpy") return isc::AXPY_TYPE;
else if(name=="ger") return isc::GER_TYPE;
else if(name=="dot") return isc::DOT_TYPE;
else if(name=="gemv_n") return isc::GEMV_N_TYPE;
else if(name=="gemv_t") return isc::GEMV_T_TYPE;
else if(name=="gemm_nn") return isc::GEMM_NN_TYPE;
else if(name=="gemm_tn") return isc::GEMM_TN_TYPE;
else if(name=="gemm_nt") return isc::GEMM_NT_TYPE;
else if(name=="gemm_tt") return isc::GEMM_TT_TYPE;
if(name=="axpy") return sc::AXPY_TYPE;
else if(name=="ger") return sc::GER_TYPE;
else if(name=="dot") return sc::DOT_TYPE;
else if(name=="gemv_n") return sc::GEMV_N_TYPE;
else if(name=="gemv_t") return sc::GEMV_T_TYPE;
else if(name=="gemm_nn") return sc::GEMM_NN_TYPE;
else if(name=="gemm_tn") return sc::GEMM_TN_TYPE;
else if(name=="gemm_nt") return sc::GEMM_NT_TYPE;
else if(name=="gemm_tt") return sc::GEMM_TT_TYPE;
else
{
PyErr_SetString(PyExc_TypeError, "Template type not understood");

View File

@@ -6,19 +6,19 @@ namespace detail
{
isc::numeric_type to_isc_dtype(np::dtype const & T)
sc::numeric_type to_sc_dtype(np::dtype const & T)
{
if(T==np::detail::get_int_dtype<8, false>()) return isc::CHAR_TYPE;
else if(T==np::detail::get_int_dtype<8, true>()) return isc::UCHAR_TYPE;
else if(T==np::detail::get_int_dtype<16, false>()) return isc::SHORT_TYPE;
else if(T==np::detail::get_int_dtype<16, true>()) return isc::USHORT_TYPE;
else if(T==np::detail::get_int_dtype<32, false>()) return isc::INT_TYPE;
else if(T==np::detail::get_int_dtype<32, true>()) return isc::UINT_TYPE;
else if(T==np::detail::get_int_dtype<64, false>()) return isc::LONG_TYPE;
else if(T==np::detail::get_int_dtype<64, true>()) return isc::ULONG_TYPE;
// else if(T==np::detail::get_float_dtype<16>()) return isc::HALF_TYPE;
else if(T==np::detail::get_float_dtype<32>()) return isc::FLOAT_TYPE;
else if(T==np::detail::get_float_dtype<64>()) return isc::DOUBLE_TYPE;
if(T==np::detail::get_int_dtype<8, false>()) return sc::CHAR_TYPE;
else if(T==np::detail::get_int_dtype<8, true>()) return sc::UCHAR_TYPE;
else if(T==np::detail::get_int_dtype<16, false>()) return sc::SHORT_TYPE;
else if(T==np::detail::get_int_dtype<16, true>()) return sc::USHORT_TYPE;
else if(T==np::detail::get_int_dtype<32, false>()) return sc::INT_TYPE;
else if(T==np::detail::get_int_dtype<32, true>()) return sc::UINT_TYPE;
else if(T==np::detail::get_int_dtype<64, false>()) return sc::LONG_TYPE;
else if(T==np::detail::get_int_dtype<64, true>()) return sc::ULONG_TYPE;
// else if(T==np::detail::get_float_dtype<16>()) return sc::HALF_TYPE;
else if(T==np::detail::get_float_dtype<32>()) return sc::FLOAT_TYPE;
else if(T==np::detail::get_float_dtype<64>()) return sc::DOUBLE_TYPE;
else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set();
@@ -26,19 +26,19 @@ isc::numeric_type to_isc_dtype(np::dtype const & T)
}
}
np::dtype to_np_dtype(isc::numeric_type const & T) throw()
np::dtype to_np_dtype(sc::numeric_type const & T) throw()
{
if(T==isc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
else if(T==isc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
else if(T==isc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
else if(T==isc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
else if(T==isc::INT_TYPE) return np::detail::get_int_dtype<32, false>();
else if(T==isc::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
else if(T==isc::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
else if(T==isc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
// else if(T==isc::HALF_TYPE) return np::detail::get_float_dtype<16>();
else if(T==isc::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
else if(T==isc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
if(T==sc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
else if(T==sc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
else if(T==sc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
else if(T==sc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
else if(T==sc::INT_TYPE) return np::detail::get_int_dtype<32, false>();
else if(T==sc::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
else if(T==sc::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
else if(T==sc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
// else if(T==sc::HALF_TYPE) return np::detail::get_float_dtype<16>();
else if(T==sc::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
else if(T==sc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
else{
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
bp::throw_error_already_set();
@@ -46,21 +46,21 @@ np::dtype to_np_dtype(isc::numeric_type const & T) throw()
}
}
bp::tuple get_shape(isc::array const & x)
bp::tuple get_shape(sc::array const & x)
{
return bp::make_tuple(x.shape()[0], x.shape()[1]);
}
template<class T>
struct datatype : public isc::value_scalar
struct datatype : public sc::value_scalar
{
datatype(T t) : isc::value_scalar(t){ }
datatype(T t) : sc::value_scalar(t){ }
};
template<class T>
unsigned int size(datatype<T> const & dt)
{ return isc::size_of(dt.dtype()) ; }
{ return sc::size_of(dt.dtype()) ; }
#define INSTANTIATE(name, clname) \
struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} };
@@ -80,13 +80,13 @@ unsigned int size(datatype<T> const & dt)
namespace detail
{
std::shared_ptr<isc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, isc::driver::CommandQueue & queue)
std::shared_ptr<sc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, sc::driver::CommandQueue & queue)
{
return std::shared_ptr<isc::profiles::value_type>(new isc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue));
return std::shared_ptr<sc::profiles::value_type>(new sc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue));
}
std::shared_ptr<isc::array>
ndarray_to_iscarray(const np::ndarray& array, isc::driver::Context const & ctx)
std::shared_ptr<sc::array>
ndarray_to_scarray(const np::ndarray& array, sc::driver::Context const & ctx)
{
int d = array.get_nd();
@@ -95,14 +95,14 @@ namespace detail
bp::throw_error_already_set();
}
isc::numeric_type dtype = to_isc_dtype(array.get_dtype());
isc::int_t size = (isc::int_t)array.shape(0);
isc::array* v = new isc::array(size, dtype, ctx);
sc::numeric_type dtype = to_sc_dtype(array.get_dtype());
sc::int_t size = (sc::int_t)array.shape(0);
sc::array* v = new sc::array(size, dtype, ctx);
void* data = (void*)array.get_data();
isc::copy(data, *v);
sc::copy(data, *v);
return std::shared_ptr<isc::array>(v);
return std::shared_ptr<sc::array>(v);
}
isaac::driver::Context const & extract_context(bp::object context)
@@ -118,19 +118,19 @@ namespace detail
}
std::shared_ptr<isc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext)
std::shared_ptr<sc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext)
{
return ndarray_to_iscarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext));
return ndarray_to_scarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext));
}
std::shared_ptr<isc::array> create_zeros_array(isc::int_t M, isc::int_t N, bp::object odtype, bp::object pycontext)
std::shared_ptr<sc::array> create_zeros_array(sc::int_t M, sc::int_t N, bp::object odtype, bp::object pycontext)
{
return std::shared_ptr<isc::array>(new isc::array(isc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext))));
return std::shared_ptr<sc::array>(new sc::array(sc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext))));
}
std::shared_ptr<isc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext)
std::shared_ptr<sc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext)
{
typedef std::shared_ptr<isc::array> result_type;
typedef std::shared_ptr<sc::array> result_type;
std::size_t len;
int size1;
@@ -145,17 +145,17 @@ namespace detail
size1 = bp::extract<int>(sizes)();
}
isc::numeric_type dtype = tools::extract_dtype(odtype);
sc::numeric_type dtype = tools::extract_dtype(odtype);
if(len < 1 || len > 2)
{
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
bp::throw_error_already_set();
}
isc::driver::Context const & context = extract_context(pycontext);
sc::driver::Context const & context = extract_context(pycontext);
if(len==1)
return result_type(new isc::array(size1, dtype, context));
return result_type(new isc::array(size1, size2, dtype, context));
return result_type(new sc::array(size1, dtype, context));
return result_type(new sc::array(size1, size2, dtype, context));
}
std::string type_name(bp::object const & obj)
@@ -167,26 +167,26 @@ namespace detail
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
}
std::shared_ptr<isc::scalar> construct_scalar(bp::object obj, bp::object pycontext)
std::shared_ptr<sc::scalar> construct_scalar(bp::object obj, bp::object pycontext)
{
typedef std::shared_ptr<isc::scalar> result_type;
isc::driver::Context const & context = extract_context(pycontext);
typedef std::shared_ptr<sc::scalar> result_type;
sc::driver::Context const & context = extract_context(pycontext);
std::string name = type_name(obj);
if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context));
else if(name=="float") return result_type(new isc::scalar(bp::extract<double>(obj)(), context));
else if(name=="long") return result_type(new isc::scalar(bp::extract<long>(obj)(), context));
else if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context));
if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
else if(name=="float") return result_type(new sc::scalar(bp::extract<double>(obj)(), context));
else if(name=="long") return result_type(new sc::scalar(bp::extract<long>(obj)(), context));
else if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
else if(name=="int8") return result_type(new isc::scalar(isc::CHAR_TYPE, context));
else if(name=="uint8") return result_type(new isc::scalar(isc::UCHAR_TYPE, context));
else if(name=="int16") return result_type(new isc::scalar(isc::SHORT_TYPE, context));
else if(name=="uint16") return result_type(new isc::scalar(isc::USHORT_TYPE, context));
else if(name=="int32") return result_type(new isc::scalar(isc::INT_TYPE, context));
else if(name=="uint32") return result_type(new isc::scalar(isc::UINT_TYPE, context));
else if(name=="int64") return result_type(new isc::scalar(isc::LONG_TYPE, context));
else if(name=="uint64") return result_type(new isc::scalar(isc::ULONG_TYPE, context));
else if(name=="float32") return result_type(new isc::scalar(isc::FLOAT_TYPE, context));
else if(name=="float64") return result_type(new isc::scalar(isc::DOUBLE_TYPE, context));
else if(name=="int8") return result_type(new sc::scalar(sc::CHAR_TYPE, context));
else if(name=="uint8") return result_type(new sc::scalar(sc::UCHAR_TYPE, context));
else if(name=="int16") return result_type(new sc::scalar(sc::SHORT_TYPE, context));
else if(name=="uint16") return result_type(new sc::scalar(sc::USHORT_TYPE, context));
else if(name=="int32") return result_type(new sc::scalar(sc::INT_TYPE, context));
else if(name=="uint32") return result_type(new sc::scalar(sc::UINT_TYPE, context));
else if(name=="int64") return result_type(new sc::scalar(sc::LONG_TYPE, context));
else if(name=="uint64") return result_type(new sc::scalar(sc::ULONG_TYPE, context));
else if(name=="float32") return result_type(new sc::scalar(sc::FLOAT_TYPE, context));
else if(name=="float64") return result_type(new sc::scalar(sc::DOUBLE_TYPE, context));
else{
PyErr_SetString(PyExc_TypeError, "Data type not understood");
bp::throw_error_already_set();
@@ -196,11 +196,11 @@ namespace detail
struct model_map_indexing
{
static isc::profiles::value_type& get_item(isc::profiles::map_type& container, bp::tuple i_)
static sc::profiles::value_type& get_item(sc::profiles::map_type& container, bp::tuple i_)
{
isc::expression_type expression = tools::extract_template_type(i_[0]);
isc::numeric_type dtype = tools::extract_dtype(i_[1]);
isc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype));
sc::expression_type expression = tools::extract_template_type(i_[0]);
sc::numeric_type dtype = tools::extract_dtype(i_[1]);
sc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype));
if (i == container.end())
{
PyErr_SetString(PyExc_KeyError, "Invalid key");
@@ -209,11 +209,11 @@ namespace detail
return *i->second;
}
static void set_item(isc::profiles::map_type& container, bp::tuple i_, isc::profiles::value_type const & v)
static void set_item(sc::profiles::map_type& container, bp::tuple i_, sc::profiles::value_type const & v)
{
isc::expression_type expression = tools::extract_template_type(i_[0]);
isc::numeric_type dtype = tools::extract_dtype(i_[1]);
container[std::make_pair(expression, dtype)].reset(new isc::profiles::value_type(v));
sc::expression_type expression = tools::extract_template_type(i_[0]);
sc::numeric_type dtype = tools::extract_dtype(i_[1]);
container[std::make_pair(expression, dtype)].reset(new sc::profiles::value_type(v));
}
};
}
@@ -227,13 +227,13 @@ void export_core()
bp::class_<isaac::profiles::value_type>("profile", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_model))
.def("execute", &isc::profiles::value_type::execute);
.def("execute", &sc::profiles::value_type::execute);
bp::class_<isc::value_scalar>("value_scalar", bp::no_init)
.add_property("dtype", &isc::value_scalar::dtype);
bp::class_<sc::value_scalar>("value_scalar", bp::no_init)
.add_property("dtype", &sc::value_scalar::dtype);
#define INSTANTIATE(name, clname) \
bp::class_<detail::datatype<clname>, bp::bases<isc::value_scalar> >(#name, bp::init<clname>());\
bp::class_<detail::datatype<clname>, bp::bases<sc::value_scalar> >(#name, bp::init<clname>());\
bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\
.add_property("size", &detail::size<clname>)\
;
@@ -251,36 +251,36 @@ void export_core()
INSTANTIATE(float64, cl_double)
#undef INSTANTIATE
bp::enum_<isc::expression_type>("operations")
MAP_ENUM(AXPY_TYPE, isc)
MAP_ENUM(GER_TYPE, isc)
MAP_ENUM(DOT_TYPE, isc)
MAP_ENUM(GEMV_N_TYPE, isc)
MAP_ENUM(GEMV_T_TYPE, isc)
MAP_ENUM(GEMM_NN_TYPE, isc)
MAP_ENUM(GEMM_TN_TYPE, isc)
MAP_ENUM(GEMM_NT_TYPE, isc)
MAP_ENUM(GEMM_TT_TYPE, isc);
bp::enum_<sc::expression_type>("operations")
MAP_ENUM(AXPY_TYPE, sc)
MAP_ENUM(GER_TYPE, sc)
MAP_ENUM(DOT_TYPE, sc)
MAP_ENUM(GEMV_N_TYPE, sc)
MAP_ENUM(GEMV_T_TYPE, sc)
MAP_ENUM(GEMM_NN_TYPE, sc)
MAP_ENUM(GEMM_TN_TYPE, sc)
MAP_ENUM(GEMM_NT_TYPE, sc)
MAP_ENUM(GEMM_TT_TYPE, sc);
#define ADD_SCALAR_HANDLING(OP)\
.def(bp::self OP int())\
.def(bp::self OP long())\
.def(bp::self OP double())\
.def(bp::self OP bp::other<isc::value_scalar>())\
.def(bp::self OP bp::other<sc::value_scalar>())\
.def(int() OP bp::self)\
.def(long() OP bp::self)\
.def(double() OP bp::self)\
.def(bp::other<isc::value_scalar>() OP bp::self)
.def(bp::other<sc::value_scalar>() OP bp::self)
#define ADD_ARRAY_OPERATOR(OP)\
.def(bp::self OP bp::self)\
ADD_SCALAR_HANDLING(OP)
bp::class_<isc::expressions_tuple>
("array_expression_container", bp::init<isc::array_expression const &>())
bp::class_<sc::expressions_tuple>
("array_expression_container", bp::init<sc::array_expression const &>())
;
bp::class_<isc::array_expression >("array_expression", bp::no_init)
bp::class_<sc::array_expression >("array_expression", bp::no_init)
ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-)
ADD_ARRAY_OPERATOR(*)
@@ -291,7 +291,7 @@ void export_core()
ADD_ARRAY_OPERATOR(<=)
ADD_ARRAY_OPERATOR(==)
ADD_ARRAY_OPERATOR(!=)
.add_property("context", bp::make_function(&isc::array_expression::context, bp::return_internal_reference<>()))
.add_property("context", bp::make_function(&sc::array_expression::context, bp::return_internal_reference<>()))
.def(bp::self_ns::abs(bp::self))
// .def(bp::self_ns::pow(bp::self))
;
@@ -299,18 +299,18 @@ void export_core()
#define ADD_ARRAY_OPERATOR(OP) \
.def(bp::self OP bp::self)\
.def(bp::self OP bp::other<isc::array_expression>())\
.def(bp::other<isc::array_expression>() OP bp::self) \
.def(bp::self OP bp::other<sc::array_expression>())\
.def(bp::other<sc::array_expression>() OP bp::self) \
ADD_SCALAR_HANDLING(OP)
bp::class_<isc::array,
std::shared_ptr<isc::array> >
bp::class_<sc::array,
std::shared_ptr<sc::array> >
( "array", bp::no_init)
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object())))
.def(bp::init<isc::array_expression>())
.add_property("dtype", &isc::array::dtype)
.add_property("context", bp::make_function(&isc::array::context, bp::return_internal_reference<>()))
.add_property("T", &isc::array::T)
.def(bp::init<sc::array_expression>())
.add_property("dtype", &sc::array::dtype)
.add_property("context", bp::make_function(&sc::array::context, bp::return_internal_reference<>()))
.add_property("T", &sc::array::T)
.add_property("shape", &detail::get_shape)
ADD_ARRAY_OPERATOR(+)
ADD_ARRAY_OPERATOR(-)
@@ -327,7 +327,7 @@ void export_core()
.def(bp::self_ns::str(bp::self_ns::self))
;
bp::class_<isc::scalar, bp::bases<isc::array> >
bp::class_<sc::scalar, bp::bases<sc::array> >
("scalar", bp::no_init)
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=bp::object())))
;
@@ -336,15 +336,15 @@ void export_core()
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
//Assign
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::assign));\
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::assign));\
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::assign));\
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::assign));\
//Binary
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array const &)>(&isc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array_expression const &)>(&isc::name));
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::name));\
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array_expression const &)>(&sc::name));
MAP_FUNCTION(maximum)
MAP_FUNCTION(minimum)
@@ -354,8 +354,8 @@ void export_core()
//Unary
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &)>(&isc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &)>(&isc::name));
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &)>(&sc::name));\
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &)>(&sc::name));
bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
@@ -380,8 +380,8 @@ void export_core()
/*--- Reduction operators----*/
//---------------------------------------
#define MAP_FUNCTION(name) \
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::int_t)>(&isc::name));\
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::int_t)>(&isc::name));
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::int_t)>(&sc::name));\
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::int_t)>(&sc::name));
MAP_FUNCTION(sum)
MAP_FUNCTION(max)
@@ -392,7 +392,7 @@ void export_core()
/*--- Profiles----*/
//---------------------------------------
bp::class_<isc::profiles::map_type>("profiles")
bp::class_<sc::profiles::map_type>("profiles")
.def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>())
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
;

View File

@@ -10,7 +10,7 @@
namespace detail
{
bp::list nv_compute_capability(isc::driver::Device const & device)
bp::list nv_compute_capability(sc::driver::Device const & device)
{
bp::list res;
std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability();
@@ -21,63 +21,63 @@ namespace detail
bp::list get_platforms()
{
std::vector<isc::driver::Platform> platforms;
isc::driver::backend::platforms(platforms);
std::vector<sc::driver::Platform> platforms;
sc::driver::backend::platforms(platforms);
return tools::to_list(platforms.begin(), platforms.end());
}
bp::list get_devices(isc::driver::Platform const & platform)
bp::list get_devices(sc::driver::Platform const & platform)
{
std::vector<isc::driver::Device> devices;
std::vector<sc::driver::Device> devices;
platform.devices(devices);
return tools::to_list(devices.begin(), devices.end());
}
bp::list get_queues(isc::driver::Context const & context)
bp::list get_queues(sc::driver::Context const & context)
{
std::vector<isc::driver::CommandQueue*> queues;
isc::driver::backend::queues::get(context, queues);
std::vector<sc::driver::CommandQueue*> queues;
sc::driver::backend::queues::get(context, queues);
bp::list res;
for(isc::driver::CommandQueue* queue:queues)
for(sc::driver::CommandQueue* queue:queues)
res.append(*queue);
return res;
}
std::shared_ptr< isc::driver::CommandQueue> create_queue(isc::driver::Context const & context, isc::driver::Device const & device)
std::shared_ptr< sc::driver::CommandQueue> create_queue(sc::driver::Context const & context, sc::driver::Device const & device)
{
return std::shared_ptr<isc::driver::CommandQueue>(new isc::driver::CommandQueue(context, device));
return std::shared_ptr<sc::driver::CommandQueue>(new sc::driver::CommandQueue(context, device));
}
std::string to_string(isc::driver::device_type type)
std::string to_string(sc::driver::device_type type)
{
if(type==isc::driver::DEVICE_TYPE_CPU) return "CPU";
if(type==isc::driver::DEVICE_TYPE_GPU) return "GPU";
if(type==isc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
if(type==sc::driver::DEVICE_TYPE_CPU) return "CPU";
if(type==sc::driver::DEVICE_TYPE_GPU) return "GPU";
if(type==sc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
throw;
}
std::shared_ptr<isc::driver::Context> make_context(isc::driver::Device const & dev)
{ return std::shared_ptr<isc::driver::Context>(new isc::driver::Context(dev)); }
std::shared_ptr<sc::driver::Context> make_context(sc::driver::Device const & dev)
{ return std::shared_ptr<sc::driver::Context>(new sc::driver::Context(dev)); }
bp::object enqueue(isc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
bp::object enqueue(sc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
{
std::list<isc::driver::Event> events;
std::vector<isc::driver::Event> cdependencies = tools::to_vector<isc::driver::Event>(dependencies);
std::list<sc::driver::Event> events;
std::vector<sc::driver::Event> cdependencies = tools::to_vector<sc::driver::Event>(dependencies);
isc::execution_options_type execution_options(queue_id, &events, &cdependencies);
isc::dispatcher_options_type dispatcher_options(tune, label);
isc::compilation_options_type compilation_options(program_name, force_recompile);
isc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
if(isc::detail::is_assignment(root.op))
sc::execution_options_type execution_options(queue_id, &events, &cdependencies);
sc::dispatcher_options_type dispatcher_options(tune, label);
sc::compilation_options_type compilation_options(program_name, force_recompile);
sc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
if(sc::detail::is_assignment(root.op))
{
isc::execute(isc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
sc::execute(sc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
}
else
{
std::shared_ptr<isc::array> parray(new isc::array(isc::control(expression, execution_options, dispatcher_options, compilation_options)));
std::shared_ptr<sc::array> parray(new sc::array(sc::control(expression, execution_options, dispatcher_options, compilation_options)));
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
}
}
@@ -88,7 +88,7 @@ default_driver_values_type default_driver_parameters;
void export_driver()
{
typedef std::vector<isc::driver::CommandQueue> queues_t;
typedef std::vector<sc::driver::CommandQueue> queues_t;
bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver"))));
bp::scope().attr("driver") = driver_module;
@@ -103,58 +103,58 @@ void export_driver()
bp::enum_<isc::driver::backend_type>
bp::enum_<sc::driver::backend_type>
("backend_type")
.value("OPENCL", isc::driver::OPENCL)
.value("OPENCL", sc::driver::OPENCL)
#ifdef ISAAC_WITH_CUDA
.value("CUDA", isc::driver::CUDA)
.value("CUDA", sc::driver::CUDA)
#endif
;
bp::enum_<isc::driver::device_type>
bp::enum_<sc::driver::device_type>
("device_type")
.value("DEVICE_TYPE_GPU", isc::driver::DEVICE_TYPE_GPU)
.value("DEVICE_TYPE_CPU", isc::driver::DEVICE_TYPE_CPU)
.value("DEVICE_TYPE_GPU", sc::driver::DEVICE_TYPE_GPU)
.value("DEVICE_TYPE_CPU", sc::driver::DEVICE_TYPE_CPU)
;
bp::class_<isc::driver::Platform>("platform", bp::no_init)
bp::class_<sc::driver::Platform>("platform", bp::no_init)
.def("get_devices", &detail::get_devices)
.add_property("name",&isc::driver::Platform::name)
.add_property("name",&sc::driver::Platform::name)
;
bp::enum_<isaac::driver::Device::Vendor>
("vendor")
.value("AMD", isc::driver::Device::Vendor::AMD)
.value("INTEL", isc::driver::Device::Vendor::INTEL)
.value("NVIDIA", isc::driver::Device::Vendor::NVIDIA)
.value("UNKNOWN", isc::driver::Device::Vendor::UNKNOWN)
.value("AMD", sc::driver::Device::Vendor::AMD)
.value("INTEL", sc::driver::Device::Vendor::INTEL)
.value("NVIDIA", sc::driver::Device::Vendor::NVIDIA)
.value("UNKNOWN", sc::driver::Device::Vendor::UNKNOWN)
;
bp::class_<isc::driver::Device>("device", bp::no_init)
.add_property("clock_rate", &isc::driver::Device::clock_rate)
.add_property("name", &isc::driver::Device::name)
.add_property("type", &isc::driver::Device::type)
.add_property("platform", &isc::driver::Device::platform)
.add_property("vendor", &isc::driver::Device::vendor)
bp::class_<sc::driver::Device>("device", bp::no_init)
.add_property("clock_rate", &sc::driver::Device::clock_rate)
.add_property("name", &sc::driver::Device::name)
.add_property("type", &sc::driver::Device::type)
.add_property("platform", &sc::driver::Device::platform)
.add_property("vendor", &sc::driver::Device::vendor)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
;
bp::class_<isc::driver::Context, boost::noncopyable>("context", bp::no_init)
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)
.def("__init__", bp::make_constructor(&detail::make_context))
.def("synchronize", &isc::driver::backend::synchronize)
.def("synchronize", &sc::driver::backend::synchronize)
.add_property("queues", &detail::get_queues)
.add_property("backend", &isc::driver::Context::backend)
.add_property("backend", &sc::driver::Context::backend)
;
bp::class_<isc::driver::CommandQueue>("command_queue", bp::init<isc::driver::Context const &, isc::driver::Device const &>())
.def("synchronize", &isc::driver::CommandQueue::synchronize)
.add_property("profiles", bp::make_function(&isc::profiles::get, bp::return_internal_reference<>()))
.add_property("device", bp::make_function(&isc::driver::CommandQueue::device, bp::return_internal_reference<>()))
bp::class_<sc::driver::CommandQueue>("command_queue", bp::init<sc::driver::Context const &, sc::driver::Device const &>())
.def("synchronize", &sc::driver::CommandQueue::synchronize)
.add_property("profiles", bp::make_function(&sc::profiles::get, bp::return_internal_reference<>()))
.add_property("device", bp::make_function(&sc::driver::CommandQueue::device, bp::return_internal_reference<>()))
;
bp::class_<isc::driver::Event>("event", bp::init<isc::driver::backend_type>())
.add_property("elapsed_time", &isc::driver::Event::elapsed_time)
bp::class_<sc::driver::Event>("event", bp::init<sc::driver::backend_type>())
.add_property("elapsed_time", &sc::driver::Event::elapsed_time)
;
bp::def("device_type_to_string", &detail::to_string);
@@ -164,8 +164,8 @@ void export_driver()
bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::class_<default_driver_values_type>("default_type")
.def_readwrite("queue_properties",&isc::driver::backend::default_queue_properties)
.def_readwrite("device", &isc::driver::backend::default_device)
.def_readwrite("queue_properties",&sc::driver::backend::default_queue_properties)
.def_readwrite("device", &sc::driver::backend::default_device)
;
bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters));

View File

@@ -13,7 +13,7 @@ namespace tpt = isaac::templates;
namespace detail
{
bp::list input_sizes(tpt::base & temp, isc::expressions_tuple const & tree)
bp::list input_sizes(tpt::base & temp, sc::expressions_tuple const & tree)
{
std::vector<int> tmp = temp.input_sizes(tree);
return tools::to_list(tmp.begin(), tmp.end());

View File

@@ -4,19 +4,19 @@
#include "isaac/array.h"
#include "isaac/wrap/clBLAS.h"
namespace isc = isaac;
namespace sc = isaac;
typedef isaac::int_t int_t;
template<typename T>
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
isc::array& x, isc::array& y, isc::array& z)
sc::array& x, sc::array& y, sc::array& z)
{
using namespace std;
int failure_count = 0;
isc::numeric_type dtype = x.dtype();
isc::driver::Context const & context = x.context();
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(context,0);
sc::numeric_type dtype = x.dtype();
sc::driver::Context const & context = x.context();
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(context,0);
cl_command_queue clqueue = queue.handle().cl();
int_t N = cz.size();
@@ -113,7 +113,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
}
template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx)
void test_impl(T epsilon, sc::driver::Context const & ctx)
{
using isaac::_;
@@ -140,11 +140,11 @@ int main()
{
clblasSetup();
std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data);
sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
if(device.type() != isc::driver::DEVICE_TYPE_GPU)
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
if(device.type() != sc::driver::DEVICE_TYPE_GPU)
continue;
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl;

View File

@@ -5,23 +5,23 @@
#include "isaac/array.h"
#include "isaac/wrap/clBLAS.h"
namespace isc = isaac;
typedef isc::int_t int_t;
namespace sc = isaac;
typedef sc::int_t int_t;
template<typename T>
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
isc::array & x, isc::array & y)
sc::array & x, sc::array & y)
{
using namespace std;
isc::driver::Context const & ctx = x.context();
sc::driver::Context const & ctx = x.context();
int_t N = cx.size();
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(ctx,0);
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(ctx,0);
cl_command_queue clqueue = queue.handle().cl();
isc::array scratch(N, x.dtype());
sc::array scratch(N, x.dtype());
unsigned int failure_count = 0;
isaac::numeric_type dtype = isc::to_numeric_type<T>::value;
isaac::numeric_type dtype = sc::to_numeric_type<T>::value;
T cs = 0;
T tmp = 0;
@@ -67,7 +67,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
}
template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx)
void test_impl(T epsilon, sc::driver::Context const & ctx)
{
using isaac::_;
@@ -92,10 +92,10 @@ int main()
{
clblasSetup();
std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data);
sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -3,23 +3,23 @@
#include "isaac/array.h"
#include "isaac/wrap/clBLAS.h"
namespace isc = isaac;
namespace sc = isaac;
template<typename T>
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
isc::array & C, isc::array const & A, isc::array const & AT, isc::array const & B, isc::array const & BT,
sc::array & C, sc::array const & A, sc::array const & AT, sc::array const & B, sc::array const & BT,
interface_t interf, const char * prefix)
{
int failure_count = 0;
isc::int_t M = C.shape()[0];
isc::int_t N = C.shape()[1];
isc::int_t K = A.shape()[1];
sc::int_t M = C.shape()[0];
sc::int_t N = C.shape()[1];
sc::int_t K = A.shape()[1];
T alpha = 1;
T beta = 0;
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(C.context(),0);
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(C.context(),0);
for(int i = 0 ; i < M ; ++i)
{
@@ -43,7 +43,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
GPU_OP;\
queue.synchronize();\
isc::copy(C, buffer);\
sc::copy(C, buffer);\
if(diff(buffer, cCbuffer, epsilon))\
{\
failure_count++;\
@@ -94,7 +94,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
}
template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx)
void test_impl(T epsilon, sc::driver::Context const & ctx)
{
int_t M = 173;
int_t N = 241;
@@ -126,11 +126,11 @@ int main()
{
clblasSetup();
std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data);
sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
if(device.type() != isc::driver::DEVICE_TYPE_GPU)
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
if(device.type() != sc::driver::DEVICE_TYPE_GPU)
continue;
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl;

View File

@@ -5,16 +5,16 @@
#include "isaac/array.h"
#include "isaac/wrap/clBLAS.h"
namespace isc = isaac;
namespace sc = isaac;
template<typename T>
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
isc::array & y, isc::array const & A, isc::array & x, interface_t interf, const char * prefix)
sc::array & y, sc::array const & A, sc::array & x, interface_t interf, const char * prefix)
{
int failure_count = 0;
isc::int_t M = A.shape()[0];
isc::int_t N = A.shape()[1];
sc::int_t M = A.shape()[0];
sc::int_t N = A.shape()[1];
simple_vector<T> bufy(M);
simple_vector<T> bufx(N);
@@ -22,7 +22,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
T alpha = static_cast<T>(4.2);
T beta = static_cast<T>(5.6);
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(y.context(),0);
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(y.context(),0);
T yi = 0, xi = 0;
#define TEST_OPERATION(NAME, SIZE1, SIZE2, NEUTRAL, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
@@ -37,7 +37,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
}\
GPU_REDUCTION;\
queue.synchronize();\
isc::copy(RES, BUF.data());\
sc::copy(RES, BUF.data());\
if(diff(CRES, BUF, epsilon))\
{\
failure_count++;\
@@ -90,7 +90,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
}
template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx)
void test_impl(T epsilon, sc::driver::Context const & ctx)
{
int_t M = 173;
int_t N = 241;
@@ -116,10 +116,10 @@ int main()
{
clblasSetup();
std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data);
sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -2,18 +2,18 @@
#include "common.hpp"
#include "isaac/array.h"
namespace isc = isaac;
namespace sc = isaac;
typedef isaac::int_t int_t;
template<typename T>
void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy,
isc::array& A, isc::array& B, isc::array& C, isc::array& x, isc::array& y)
sc::array& A, sc::array& B, sc::array& C, sc::array& x, sc::array& y)
{
using namespace std;
int failure_count = 0;
isc::numeric_type dtype = C.dtype();
isc::driver::Context const & ctx = C.context();
sc::numeric_type dtype = C.dtype();
sc::driver::Context const & ctx = C.context();
int_t M = cC.size1();
int_t N = cC.size2();
@@ -100,7 +100,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
}
template<typename T>
void test_impl(T epsilon, isc::driver::Context const & ctx)
void test_impl(T epsilon, sc::driver::Context const & ctx)
{
using isaac::_;
@@ -126,10 +126,10 @@ void test_impl(T epsilon, isc::driver::Context const & ctx)
int main()
{
std::list<isaac::driver::Context const *> data;
isc::driver::backend::contexts::get(data);
sc::driver::backend::contexts::get(data);
for(isaac::driver::Context const * context : data)
{
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
std::cout << "---" << std::endl;
std::cout << ">> float" << std::endl;

View File

@@ -1,4 +1,4 @@
import isaac as isc
import isaac as sc
import random
from copy import deepcopy
@@ -14,10 +14,10 @@ from numpy import cumsum
import tools
fetch_types = [isc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
isc.templates.FETCH_FROM_GLOBAL_STRIDED,
isc.templates.FETCH_FROM_LOCAL,
isc.templates.FETCH_FROM_LOCAL]
fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
sc.templates.FETCH_FROM_GLOBAL_STRIDED,
sc.templates.FETCH_FROM_LOCAL,
sc.templates.FETCH_FROM_LOCAL]
def exhaustive(template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context)
@@ -34,7 +34,7 @@ def exhaustive(template, sizes, context):
time = tools.benchmark(template, parameters, tree)
if not best or time < best[1]:
best = parameters, time
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass
if best:
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
@@ -100,7 +100,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
try:
individual.fitness.values = toolbox.evaluate(genome)
population += [individual]
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure ):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ):
pass
genome = encode(list(initializer.next()))
hof.update(population)
@@ -134,7 +134,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
#Reproduction
else:
offspring += [random.choice(population)]
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass
@@ -159,21 +159,21 @@ def is_local_optimum(parameters, template, sizes, context):
tree, _ = tools.tree_of(template, sizes, context)
genetic_infos = tools.genetic_infos_of(template)
if issubclass(template, isc.templates.axpy):
if issubclass(template, sc.templates.axpy):
sweep_over = [0,1,2]
elif issubclass(template, isc.templates.dot):
elif issubclass(template, sc.templates.dot):
sweep_over = [0,1,2]
elif issubclass(template, isc.templates.ger):
elif issubclass(template, sc.templates.ger):
sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.templates.gemv):
elif issubclass(template, sc.templates.gemv):
sweep_over = [0,1,2,3,4]
elif issubclass(template, isc.templates.gemm):
elif issubclass(template, sc.templates.gemm):
sweep_over = [1,3,5,7]
#Evaluate the provided parameters guess
try:
reference = tools.benchmark(template, parameters, tree)
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
return False
#Latency bound -- ignore
@@ -190,7 +190,7 @@ def is_local_optimum(parameters, template, sizes, context):
time = tools.benchmark(template, x, tree)
if time/reference < .97:
return False
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
pass
return True

View File

@@ -1,4 +1,4 @@
import isaac as isc
import isaac as sc
from numpy import mean, median
from math import ceil, exp, log, sqrt
@@ -21,13 +21,13 @@ def expspace(a,b,N,r=128):
def benchmark(template, setting, tree):
queue = tree.context.queues[0]
queue.profiles[template, isc.float32] = isc.profile(template(*setting), isc.float32, queue)
queue.profiles[template, sc.float32] = sc.profile(template(*setting), sc.float32, queue)
times = []
total = 0
i = 0
while total < 1e-2:
#z = isc.zeros(1, 10000000, isc.float32, tree.context)
z, events = isc.driver.enqueue(tree)
#z = sc.zeros(1, 10000000, sc.float32, tree.context)
z, events = sc.driver.enqueue(tree)
tree.context.queues[0].synchronize()
times.append(1e-9*sum([e.elapsed_time for e in events]))
total += times[-1]
@@ -36,67 +36,67 @@ def benchmark(template, setting, tree):
def tree_of(template, sizes, context):
if issubclass(template, isc.templates.axpy):
if issubclass(template, sc.templates.axpy):
N, = sizes
x = isc.empty(N, dtype=isc.float32, context=context)
y = isc.empty(N, dtype=isc.float32, context=context)
x = sc.empty(N, dtype=sc.float32, context=context)
y = sc.empty(N, dtype=sc.float32, context=context)
return x + y, (x, y)
elif issubclass(template, isc.templates.dot):
elif issubclass(template, sc.templates.dot):
N, = sizes
x = isc.empty(N, context=context)
y = isc.empty(N, context=context)
return isc.dot(x, y), (x, y)
elif issubclass(template, isc.templates.ger):
x = sc.empty(N, context=context)
y = sc.empty(N, context=context)
return sc.dot(x, y), (x, y)
elif issubclass(template, sc.templates.ger):
M, N = sizes
A = isc.empty((M,N), context=context)
B = isc.empty((M,N), context=context)
A = sc.empty((M,N), context=context)
B = sc.empty((M,N), context=context)
return A + B, (A, B)
elif issubclass(template, isc.templates.gemv):
T = template is isc.templates.gemv_t
elif issubclass(template, sc.templates.gemv):
T = template is sc.templates.gemv_t
M, N = sizes[::-1] if T else sizes
A = isc.empty((M,N), context=context)
x = isc.empty(N, context=context)
return isc.dot(A.T, x) if T else isc.dot(A, x), (A, x)
elif issubclass(template, isc.templates.gemm):
AT = template is isc.templates.gemm_tn or template is isc.templates.gemm_tt
BT = template is isc.templates.gemm_nt or template is isc.templates.gemm_tt
A = sc.empty((M,N), context=context)
x = sc.empty(N, context=context)
return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x)
elif issubclass(template, sc.templates.gemm):
AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
M, N, K = sizes
A = isc.empty((K, M) if AT else (M, K), context=context)
B = isc.empty((N, K) if BT else (K, N), context=context)
A = sc.empty((K, M) if AT else (M, K), context=context)
B = sc.empty((N, K) if BT else (K, N), context=context)
AA = A.T if AT else A
BB = B.T if BT else B
return isc.dot(AA, BB), (A, B)
return sc.dot(AA, BB), (A, B)
def memory_footprint(template, sizes):
if issubclass(template, isc.templates.axpy):
if issubclass(template, sc.templates.axpy):
return 4*3*sizes[0]*1e-9
elif issubclass(template, isc.templates.dot):
elif issubclass(template, sc.templates.dot):
return 4*2*sizes[0]*1e-9
elif issubclass(template, isc.templates.ger):
elif issubclass(template, sc.templates.ger):
return 4*3*sizes[0]*sizes[1]*1e-9
elif issubclass(template, isc.templates.gemv):
elif issubclass(template, sc.templates.gemv):
return 4*sizes[0]*sizes[1]*1e-9
elif issubclass(template, isc.templates.gemm):
elif issubclass(template, sc.templates.gemm):
return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9
def metric_of(template):
memory_bound = [isc.templates.axpy, isc.templates.dot, isc.templates.ger, isc.templates.gemv]
compute_bound = [isc.templates.gemm]
memory_bound = [sc.templates.axpy, sc.templates.dot, sc.templates.ger, sc.templates.gemv]
compute_bound = [sc.templates.gemm]
if any([issubclass(template, x) for x in memory_bound]):
return lambda sizes, t: memory_footprint(template, sizes)/t
elif any([issubclass(template, x) for x in compute_bound]):
return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t
def genetic_infos_of(template):
if issubclass(template, isc.templates.axpy):
if issubclass(template, sc.templates.axpy):
return {'categorical': [3], 'nbits': [3,4,4,2] }
elif issubclass(template, isc.templates.dot):
elif issubclass(template, sc.templates.dot):
return {'categorical': [3], 'nbits':[3,4,4,2]}
elif issubclass(template, isc.templates.ger):
elif issubclass(template, sc.templates.ger):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, isc.templates.gemv):
elif issubclass(template, sc.templates.gemv):
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
elif issubclass(template, isc.templates.gemm):
elif issubclass(template, sc.templates.gemm):
return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]}

View File

@@ -4,7 +4,7 @@ from itertools import chain, product
from numpy import argsort, argmax
from operator import mul
from sklearn import ensemble
import isaac as isc
import isaac as sc
import optimize, tools, model
from json import encoder
@@ -22,40 +22,40 @@ def pow2range(a, b):
def tune(device, operation, json_path):
#List devices
platforms = isc.driver.get_platforms()
context = isc.driver.context(device)
platforms = sc.driver.get_platforms()
context = sc.driver.context(device)
#List of size tuples to use
sizes = {}
sizes[isc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
sizes[isc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
sizes[isc.templates.gemv_t] = sizes[isc.templates.gemv_n]
sizes[isc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
sizes[sc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
sizes[sc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
sizes[sc.templates.gemv_t] = sizes[sc.templates.gemv_n]
sizes[sc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
sizes[sc.templates.gemm_tn] = sizes[sc.templates.gemm_nn]
sizes[sc.templates.gemm_nt] = sizes[sc.templates.gemm_nn]
sizes[sc.templates.gemm_tt] = sizes[sc.templates.gemm_nn]
#Quick tuning - AlexNet sizes + Intuition
sizes[isc.templates.ger] = [(1536,1536)]
sizes[sc.templates.ger] = [(1536,1536)]
sizes[isc.templates.gemv_n] = [(1000,256),
sizes[sc.templates.gemv_n] = [(1000,256),
(4096,256)]
sizes[isc.templates.gemv_t] = [(169,256),
sizes[sc.templates.gemv_t] = [(169,256),
(169,384),
(729,256),
(3025,96)]
sizes[isc.templates.gemm_nn] = [(3025,96,363),
sizes[sc.templates.gemm_nn] = [(3025,96,363),
(729,128,1200),
(169,384,2304),
(169,192,1728),
(169,128,1728)]
sizes[isc.templates.gemm_nt] = [(169,1728,128),
sizes[sc.templates.gemm_nt] = [(169,1728,128),
(169,1728,192),
(169,2304,384),
(729,1200,128)]
sizes[isc.templates.gemm_tn] = [(1728,128,169),
sizes[sc.templates.gemm_tn] = [(1728,128,169),
(1728,192,169),
(2304,384,169),
(1200,128,729),
@@ -102,7 +102,7 @@ def tune(device, operation, json_path):
try:
time = tools.benchmark(operation, new, _tree)
perf = performance(xx, time)
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0
yy.append(0 if isinf(perf) else perf)
#Update dataset
@@ -111,7 +111,7 @@ def tune(device, operation, json_path):
for ip, p in enumerate(profiles):
try:
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
perf = 0
y.append(0 if isinf(perf) else perf)
X.append(x)
@@ -141,7 +141,7 @@ def tune(device, operation, json_path):
def parse_arguments():
platforms = isc.driver.get_platforms()
platforms = sc.driver.get_platforms()
devices = [d for platform in platforms for d in platform.get_devices()]
#Command line arguments
parser = argparse.ArgumentParser()
@@ -156,20 +156,20 @@ def parse_arguments():
print("----------------")
for (i, d) in enumerate(devices):
selected = '[' + ('x' if device==d else ' ') + ']'
print selected , '-', isc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
print selected , '-', sc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
print("----------------")
operation = {'axpy': isc.templates.axpy, 'dot': isc.templates.dot,
'ger': isc.templates.ger, 'gemv_n': isc.templates.gemv_n, 'gemv_t': isc.templates.gemv_t,
'gemm_nn': isc.templates.gemm_nn, 'gemm_tn': isc.templates.gemm_tn, 'gemm_nt': isc.templates.gemm_nt, 'gemm_tt':isc.templates.gemm_tt}[args.operation]
operation = {'axpy': sc.templates.axpy, 'dot': sc.templates.dot,
'ger': sc.templates.ger, 'gemv_n': sc.templates.gemv_n, 'gemv_t': sc.templates.gemv_t,
'gemm_nn': sc.templates.gemm_nn, 'gemm_tn': sc.templates.gemm_tn, 'gemm_nt': sc.templates.gemm_nt, 'gemm_tt':sc.templates.gemm_tt}[args.operation]
json = tools.sanitize(device.name) + '.json' if not args.json else args.json
return (device, operation, json)
if __name__ == "__main__":
isc.driver.default.queue_properties = isc.driver.PROFILING_ENABLE
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
args = parse_arguments()
tune(*args)