Code quality: renamed isaac shortcut from isc to sc
This commit is contained in:
@@ -18,8 +18,8 @@
|
|||||||
#include "timer.hpp"
|
#include "timer.hpp"
|
||||||
|
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
typedef isc::int_t int_t;
|
typedef sc::int_t int_t;
|
||||||
|
|
||||||
template<std::size_t> struct int_{};
|
template<std::size_t> struct int_{};
|
||||||
|
|
||||||
@@ -86,11 +86,11 @@ T mean(std::vector<T> x)
|
|||||||
return res/N;
|
return res/N;
|
||||||
}
|
}
|
||||||
|
|
||||||
static long time_event(long sum, isc::driver::Event const & e)
|
static long time_event(long sum, sc::driver::Event const & e)
|
||||||
{ return sum + e.elapsed_time();}
|
{ return sum + e.elapsed_time();}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void bench(isc::numeric_type dtype, std::string operation)
|
void bench(sc::numeric_type dtype, std::string operation)
|
||||||
{
|
{
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -103,8 +103,8 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
std::vector<double> times;\
|
std::vector<double> times;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
while(total_time*1e-9 < 1e-3){\
|
while(total_time*1e-9 < 1e-3){\
|
||||||
std::list<isc::driver::Event> events;\
|
std::list<sc::driver::Event> events;\
|
||||||
flush = isc::zeros((isaac::int_t)1e6, 1, dtype);\
|
flush = sc::zeros((isaac::int_t)1e6, 1, dtype);\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
OP;\
|
OP;\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
@@ -121,11 +121,11 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
while(total_time*1e-9 < 1e-3){\
|
while(total_time*1e-9 < 1e-3){\
|
||||||
cl_event event;\
|
cl_event event;\
|
||||||
flush = isc::zeros(1e6, 1, dtype);\
|
flush = sc::zeros(1e6, 1, dtype);\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
OP;\
|
OP;\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
times.push_back(isc::driver::Event(event).elapsed_time());\
|
times.push_back(sc::driver::Event(event).elapsed_time());\
|
||||||
total_time+=times.back();\
|
total_time+=times.back();\
|
||||||
}\
|
}\
|
||||||
double t = median(times);\
|
double t = median(times);\
|
||||||
@@ -134,7 +134,7 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
|
|
||||||
#define BENCHMARK_HOST(OP, PERF) \
|
#define BENCHMARK_HOST(OP, PERF) \
|
||||||
{\
|
{\
|
||||||
isc::tools::timer tmr;\
|
sc::tools::timer tmr;\
|
||||||
double total_time = 0;\
|
double total_time = 0;\
|
||||||
std::vector<double> times;\
|
std::vector<double> times;\
|
||||||
while(total_time < 1e-2){\
|
while(total_time < 1e-2){\
|
||||||
@@ -160,7 +160,7 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
OP;\
|
OP;\
|
||||||
cudaThreadSynchronize();\
|
cudaThreadSynchronize();\
|
||||||
while(total_time*1e-3 < 1e-3){\
|
while(total_time*1e-3 < 1e-3){\
|
||||||
flush = isc::zeros(1e6, 1, dtype);\
|
flush = sc::zeros(1e6, 1, dtype);\
|
||||||
cudaEventRecord(start,0);\
|
cudaEventRecord(start,0);\
|
||||||
OP;\
|
OP;\
|
||||||
cudaEventRecord(stop,0);\
|
cudaEventRecord(stop,0);\
|
||||||
@@ -173,10 +173,10 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
std::cout << "\t" << PERF << std::flush;\
|
std::cout << "\t" << PERF << std::flush;\
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int dtsize = isc::size_of(dtype);
|
unsigned int dtsize = sc::size_of(dtype);
|
||||||
isc::driver::CommandQueue & queue = isc::driver::backend::queues::get(isc::driver::backend::contexts::get_default(),0);
|
sc::driver::CommandQueue & queue = sc::driver::backend::queues::get(sc::driver::backend::contexts::get_default(),0);
|
||||||
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
|
std::map<std::string, std::string> metric{ {"axpy", "GB/s"}, {"dot", "GB/s"}, {"gemv", "GB/s"}, {"gemm", "GFLOPS"}};
|
||||||
isc::array flush((int)1e6, isc::FLOAT_TYPE);
|
sc::array flush((int)1e6, sc::FLOAT_TYPE);
|
||||||
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
|
std::cout << "#" << operation << " (" << metric[operation] << ")" << std::endl;
|
||||||
std::cout << "N";
|
std::cout << "N";
|
||||||
std::cout << "\tISAAC";
|
std::cout << "\tISAAC";
|
||||||
@@ -204,10 +204,10 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
for(int_t N: create_log_range((int)1e3, (int)2e7, 50, 64))
|
for(int_t N: create_log_range((int)1e3, (int)2e7, 50, 64))
|
||||||
{
|
{
|
||||||
std::cout << N;
|
std::cout << N;
|
||||||
isc::array x(N, dtype), y(N, dtype);
|
sc::array x(N, dtype), y(N, dtype);
|
||||||
/* ISAAC */
|
/* ISAAC */
|
||||||
std::list<isc::driver::Event> events;
|
std::list<sc::driver::Event> events;
|
||||||
BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t)
|
BENCHMARK_ISAAC(y = sc::control(x + alpha*y, sc::execution_options_type(0, &events)), 3*N*dtsize/t)
|
||||||
/* clblas */
|
/* clblas */
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t);
|
BENCHMARK_CLBLAS(clblasSaxpy(N, alpha, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue), 0, NULL, &event), 3*N*dtsize/t);
|
||||||
@@ -215,8 +215,8 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
/* BLAS */
|
/* BLAS */
|
||||||
#ifdef BENCH_CBLAS
|
#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cx(N), cy(N);
|
std::vector<float> cx(N), cy(N);
|
||||||
isc::copy(x, cx);
|
sc::copy(x, cx);
|
||||||
isc::copy(y, cy);
|
sc::copy(y, cy);
|
||||||
BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
BENCHMARK_HOST(cblas_saxpy(N, alpha, cx.data(), 1, cy.data(), 1), 3*N*dtsize/t);
|
||||||
#endif
|
#endif
|
||||||
/* CuBLAS */
|
/* CuBLAS */
|
||||||
@@ -238,11 +238,11 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
{
|
{
|
||||||
std::cout << N;
|
std::cout << N;
|
||||||
/* ISAAC */
|
/* ISAAC */
|
||||||
isc::array x(N, dtype), y(N, dtype);
|
sc::array x(N, dtype), y(N, dtype);
|
||||||
isc::array scratch(N, dtype);
|
sc::array scratch(N, dtype);
|
||||||
isc::scalar s(dtype);
|
sc::scalar s(dtype);
|
||||||
s = dot(x,y); queue.synchronize();
|
s = dot(x,y); queue.synchronize();
|
||||||
BENCHMARK_ISAAC(s = isc::control(dot(x,y), isc::execution_options_type(0, &events)), 2*N*dtsize/t)
|
BENCHMARK_ISAAC(s = sc::control(dot(x,y), sc::execution_options_type(0, &events)), 2*N*dtsize/t)
|
||||||
/* clblas */
|
/* clblas */
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t)
|
BENCHMARK_CLBLAS(clblasSdot(N, CL_HANDLE(s.data()), 0, CL_HANDLE(x.data()), 0, 1, CL_HANDLE(y.data()), 0, 1, CL_HANDLE(scratch.data()), 1, &CL_HANDLE(queue), 0, NULL, &event), 2*N*dtsize/t)
|
||||||
@@ -250,8 +250,8 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
/* BLAS */
|
/* BLAS */
|
||||||
#ifdef BENCH_CBLAS
|
#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cx(N), cy(N);
|
std::vector<float> cx(N), cy(N);
|
||||||
isc::copy(x, cx);
|
sc::copy(x, cx);
|
||||||
isc::copy(y, cy);
|
sc::copy(y, cy);
|
||||||
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
|
BENCHMARK_HOST(cblas_sdot(N, cx.data(), 1, cy.data(), 1), 2*N*dtsize/t);
|
||||||
#endif
|
#endif
|
||||||
#ifdef BENCH_CUBLAS
|
#ifdef BENCH_CUBLAS
|
||||||
@@ -294,19 +294,19 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
if(AT) std::swap(As1, As2);
|
if(AT) std::swap(As1, As2);
|
||||||
|
|
||||||
/* ISAAC */
|
/* ISAAC */
|
||||||
isc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
|
sc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
|
||||||
#ifdef HAS_A_BLAS
|
#ifdef HAS_A_BLAS
|
||||||
int_t lda = A.ld();
|
int_t lda = A.ld();
|
||||||
#endif
|
#endif
|
||||||
BENCHMARK_ISAAC(y = isc::control(AT?dot(A.T(),x):dot(A,x), isc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
|
BENCHMARK_ISAAC(y = sc::control(AT?dot(A.T(),x):dot(A,x), sc::execution_options_type(0, &events)),(M*N + M + N)*dtsize/t);
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
|
BENCHMARK_CLBLAS(clblasSgemv(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(x.data()), 0, 1, 0, CL_HANDLE(y.data()), 0, 1, 1, &CL_HANDLE(queue),0, NULL, &event), (M*N + M + N)*dtsize/t)
|
||||||
#endif
|
#endif
|
||||||
#ifdef BENCH_CBLAS
|
#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cA(M*N), cx(N), cy(M);
|
std::vector<float> cA(M*N), cx(N), cy(M);
|
||||||
isc::copy(x, cx);
|
sc::copy(x, cx);
|
||||||
isc::copy(y, cy);
|
sc::copy(y, cy);
|
||||||
isc::copy(A, cA);
|
sc::copy(A, cA);
|
||||||
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
BENCHMARK_HOST(cblas_sgemv(CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1), (M*N + M + N)*dtsize/t);
|
||||||
#endif
|
#endif
|
||||||
#ifdef BENCH_CUBLAS
|
#ifdef BENCH_CUBLAS
|
||||||
@@ -369,11 +369,11 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
int_t Bs1 = K, Bs2 = N;
|
int_t Bs1 = K, Bs2 = N;
|
||||||
if(BT) std::swap(Bs1, Bs2);
|
if(BT) std::swap(Bs1, Bs2);
|
||||||
|
|
||||||
isc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
|
sc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
|
||||||
#ifdef HAS_A_BLAS
|
#ifdef HAS_A_BLAS
|
||||||
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
int_t lda = A.ld(), ldb = B.ld(), ldc = C.ld();
|
||||||
#endif
|
#endif
|
||||||
BENCHMARK_ISAAC(C = isc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), isc::execution_options_type(0, &events)), (double)2*M*N*K/t);
|
BENCHMARK_ISAAC(C = sc::control(AT?(BT?dot(A.T(),B.T()):dot(A.T(),B)):(BT?dot(A,B.T()):dot(A,B)), sc::execution_options_type(0, &events)), (double)2*M*N*K/t);
|
||||||
/* clblas */
|
/* clblas */
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
|
BENCHMARK_CLBLAS(clblasSgemm(clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans, M, N, K, 1, CL_HANDLE(A.data()), 0, lda, CL_HANDLE(B.data()), 0, ldb,
|
||||||
@@ -382,9 +382,9 @@ void bench(isc::numeric_type dtype, std::string operation)
|
|||||||
/* BLAS */
|
/* BLAS */
|
||||||
#ifdef BENCH_CBLAS
|
#ifdef BENCH_CBLAS
|
||||||
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||||
isc::copy(C, cC);
|
sc::copy(C, cC);
|
||||||
isc::copy(A, cA);
|
sc::copy(A, cA);
|
||||||
isc::copy(B, cB);
|
sc::copy(B, cB);
|
||||||
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
|
BENCHMARK_HOST(cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc), (double)2*M*N*K/t);
|
||||||
#endif
|
#endif
|
||||||
#ifdef BENCH_CUBLAS
|
#ifdef BENCH_CUBLAS
|
||||||
@@ -409,11 +409,11 @@ int main(int argc, char* argv[])
|
|||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
clblasSetup();
|
clblasSetup();
|
||||||
#endif
|
#endif
|
||||||
isc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
|
sc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
|
||||||
|
|
||||||
int device_idx = 0;
|
int device_idx = 0;
|
||||||
std::list<isc::driver::Context const *> contexts;
|
std::list<sc::driver::Context const *> contexts;
|
||||||
isc::driver::backend::contexts::get(contexts);
|
sc::driver::backend::contexts::get(contexts);
|
||||||
|
|
||||||
std::string operation;
|
std::string operation;
|
||||||
if(contexts.size() > 1)
|
if(contexts.size() > 1)
|
||||||
@@ -423,9 +423,9 @@ int main(int argc, char* argv[])
|
|||||||
std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl;
|
std::cerr << "usage : blas-bench DEVICE_IDX OPERATION" << std::endl;
|
||||||
std::cout << "Devices available: " << std::endl;
|
std::cout << "Devices available: " << std::endl;
|
||||||
unsigned int current=0;
|
unsigned int current=0;
|
||||||
for(isc::driver::Context const * context: contexts)
|
for(sc::driver::Context const * context: contexts)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << current++ << ": " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
}
|
}
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
@@ -443,10 +443,10 @@ int main(int argc, char* argv[])
|
|||||||
operation = args[1];
|
operation = args[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
isc::driver::backend::default_device = device_idx;
|
sc::driver::backend::default_device = device_idx;
|
||||||
std::cout << "#Benchmark : BLAS" << std::endl;
|
std::cout << "#Benchmark : BLAS" << std::endl;
|
||||||
std::cout << "#----------------" << std::endl;
|
std::cout << "#----------------" << std::endl;
|
||||||
bench<float>(isc::FLOAT_TYPE, operation);
|
bench<float>(sc::FLOAT_TYPE, operation);
|
||||||
|
|
||||||
#ifdef BENCH_CLBLAS
|
#ifdef BENCH_CLBLAS
|
||||||
clblasTeardown();
|
clblasTeardown();
|
||||||
|
@@ -10,7 +10,7 @@
|
|||||||
#define MAP_ENUM(v, ns) .value(#v, ns::v)
|
#define MAP_ENUM(v, ns) .value(#v, ns::v)
|
||||||
|
|
||||||
namespace bp = boost::python;
|
namespace bp = boost::python;
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
namespace np = boost::numpy;
|
namespace np = boost::numpy;
|
||||||
|
|
||||||
namespace tools
|
namespace tools
|
||||||
@@ -36,7 +36,7 @@ namespace tools
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline isc::numeric_type extract_dtype(bp::object const & odtype)
|
inline sc::numeric_type extract_dtype(bp::object const & odtype)
|
||||||
{
|
{
|
||||||
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||||
if(name=="class")
|
if(name=="class")
|
||||||
@@ -44,16 +44,16 @@ namespace tools
|
|||||||
else
|
else
|
||||||
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||||
|
|
||||||
if(name=="int8") return isc::CHAR_TYPE;
|
if(name=="int8") return sc::CHAR_TYPE;
|
||||||
else if(name=="uint8") return isc::UCHAR_TYPE;
|
else if(name=="uint8") return sc::UCHAR_TYPE;
|
||||||
else if(name=="int16") return isc::SHORT_TYPE;
|
else if(name=="int16") return sc::SHORT_TYPE;
|
||||||
else if(name=="uint16") return isc::USHORT_TYPE;
|
else if(name=="uint16") return sc::USHORT_TYPE;
|
||||||
else if(name=="int32") return isc::INT_TYPE;
|
else if(name=="int32") return sc::INT_TYPE;
|
||||||
else if(name=="uint32") return isc::UINT_TYPE;
|
else if(name=="uint32") return sc::UINT_TYPE;
|
||||||
else if(name=="int64") return isc::LONG_TYPE;
|
else if(name=="int64") return sc::LONG_TYPE;
|
||||||
else if(name=="uint64") return isc::ULONG_TYPE;
|
else if(name=="uint64") return sc::ULONG_TYPE;
|
||||||
else if(name=="float32") return isc::FLOAT_TYPE;
|
else if(name=="float32") return sc::FLOAT_TYPE;
|
||||||
else if(name=="float64") return isc::DOUBLE_TYPE;
|
else if(name=="float64") return sc::DOUBLE_TYPE;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
||||||
@@ -62,7 +62,7 @@ namespace tools
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline isc::expression_type extract_template_type(bp::object const & odtype)
|
inline sc::expression_type extract_template_type(bp::object const & odtype)
|
||||||
{
|
{
|
||||||
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
std::string name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||||
if(name=="class")
|
if(name=="class")
|
||||||
@@ -70,15 +70,15 @@ namespace tools
|
|||||||
else
|
else
|
||||||
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
name = bp::extract<std::string>(odtype.attr("__class__").attr("__name__"))();
|
||||||
|
|
||||||
if(name=="axpy") return isc::AXPY_TYPE;
|
if(name=="axpy") return sc::AXPY_TYPE;
|
||||||
else if(name=="ger") return isc::GER_TYPE;
|
else if(name=="ger") return sc::GER_TYPE;
|
||||||
else if(name=="dot") return isc::DOT_TYPE;
|
else if(name=="dot") return sc::DOT_TYPE;
|
||||||
else if(name=="gemv_n") return isc::GEMV_N_TYPE;
|
else if(name=="gemv_n") return sc::GEMV_N_TYPE;
|
||||||
else if(name=="gemv_t") return isc::GEMV_T_TYPE;
|
else if(name=="gemv_t") return sc::GEMV_T_TYPE;
|
||||||
else if(name=="gemm_nn") return isc::GEMM_NN_TYPE;
|
else if(name=="gemm_nn") return sc::GEMM_NN_TYPE;
|
||||||
else if(name=="gemm_tn") return isc::GEMM_TN_TYPE;
|
else if(name=="gemm_tn") return sc::GEMM_TN_TYPE;
|
||||||
else if(name=="gemm_nt") return isc::GEMM_NT_TYPE;
|
else if(name=="gemm_nt") return sc::GEMM_NT_TYPE;
|
||||||
else if(name=="gemm_tt") return isc::GEMM_TT_TYPE;
|
else if(name=="gemm_tt") return sc::GEMM_TT_TYPE;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_TypeError, "Template type not understood");
|
PyErr_SetString(PyExc_TypeError, "Template type not understood");
|
||||||
|
@@ -6,19 +6,19 @@ namespace detail
|
|||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
isc::numeric_type to_isc_dtype(np::dtype const & T)
|
sc::numeric_type to_sc_dtype(np::dtype const & T)
|
||||||
{
|
{
|
||||||
if(T==np::detail::get_int_dtype<8, false>()) return isc::CHAR_TYPE;
|
if(T==np::detail::get_int_dtype<8, false>()) return sc::CHAR_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<8, true>()) return isc::UCHAR_TYPE;
|
else if(T==np::detail::get_int_dtype<8, true>()) return sc::UCHAR_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<16, false>()) return isc::SHORT_TYPE;
|
else if(T==np::detail::get_int_dtype<16, false>()) return sc::SHORT_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<16, true>()) return isc::USHORT_TYPE;
|
else if(T==np::detail::get_int_dtype<16, true>()) return sc::USHORT_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<32, false>()) return isc::INT_TYPE;
|
else if(T==np::detail::get_int_dtype<32, false>()) return sc::INT_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<32, true>()) return isc::UINT_TYPE;
|
else if(T==np::detail::get_int_dtype<32, true>()) return sc::UINT_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<64, false>()) return isc::LONG_TYPE;
|
else if(T==np::detail::get_int_dtype<64, false>()) return sc::LONG_TYPE;
|
||||||
else if(T==np::detail::get_int_dtype<64, true>()) return isc::ULONG_TYPE;
|
else if(T==np::detail::get_int_dtype<64, true>()) return sc::ULONG_TYPE;
|
||||||
// else if(T==np::detail::get_float_dtype<16>()) return isc::HALF_TYPE;
|
// else if(T==np::detail::get_float_dtype<16>()) return sc::HALF_TYPE;
|
||||||
else if(T==np::detail::get_float_dtype<32>()) return isc::FLOAT_TYPE;
|
else if(T==np::detail::get_float_dtype<32>()) return sc::FLOAT_TYPE;
|
||||||
else if(T==np::detail::get_float_dtype<64>()) return isc::DOUBLE_TYPE;
|
else if(T==np::detail::get_float_dtype<64>()) return sc::DOUBLE_TYPE;
|
||||||
else{
|
else{
|
||||||
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
|
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
@@ -26,19 +26,19 @@ isc::numeric_type to_isc_dtype(np::dtype const & T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
np::dtype to_np_dtype(isc::numeric_type const & T) throw()
|
np::dtype to_np_dtype(sc::numeric_type const & T) throw()
|
||||||
{
|
{
|
||||||
if(T==isc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
|
if(T==sc::CHAR_TYPE) return np::detail::get_int_dtype<8, false>();
|
||||||
else if(T==isc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
|
else if(T==sc::UCHAR_TYPE) return np::detail::get_int_dtype<8, true>();
|
||||||
else if(T==isc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
|
else if(T==sc::SHORT_TYPE) return np::detail::get_int_dtype<16, false>();
|
||||||
else if(T==isc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
|
else if(T==sc::USHORT_TYPE) return np::detail::get_int_dtype<16, true>();
|
||||||
else if(T==isc::INT_TYPE) return np::detail::get_int_dtype<32, false>();
|
else if(T==sc::INT_TYPE) return np::detail::get_int_dtype<32, false>();
|
||||||
else if(T==isc::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
|
else if(T==sc::UINT_TYPE) return np::detail::get_int_dtype<32, true>();
|
||||||
else if(T==isc::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
|
else if(T==sc::LONG_TYPE) return np::detail::get_int_dtype<64, false>();
|
||||||
else if(T==isc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
|
else if(T==sc::ULONG_TYPE) return np::detail::get_int_dtype<64, true>();
|
||||||
// else if(T==isc::HALF_TYPE) return np::detail::get_float_dtype<16>();
|
// else if(T==sc::HALF_TYPE) return np::detail::get_float_dtype<16>();
|
||||||
else if(T==isc::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
|
else if(T==sc::FLOAT_TYPE) return np::detail::get_float_dtype<32>();
|
||||||
else if(T==isc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
|
else if(T==sc::DOUBLE_TYPE) return np::detail::get_float_dtype<64>();
|
||||||
else{
|
else{
|
||||||
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
|
PyErr_SetString(PyExc_TypeError, "Unrecognized datatype");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
@@ -46,21 +46,21 @@ np::dtype to_np_dtype(isc::numeric_type const & T) throw()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bp::tuple get_shape(isc::array const & x)
|
bp::tuple get_shape(sc::array const & x)
|
||||||
{
|
{
|
||||||
return bp::make_tuple(x.shape()[0], x.shape()[1]);
|
return bp::make_tuple(x.shape()[0], x.shape()[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
struct datatype : public isc::value_scalar
|
struct datatype : public sc::value_scalar
|
||||||
{
|
{
|
||||||
datatype(T t) : isc::value_scalar(t){ }
|
datatype(T t) : sc::value_scalar(t){ }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
unsigned int size(datatype<T> const & dt)
|
unsigned int size(datatype<T> const & dt)
|
||||||
{ return isc::size_of(dt.dtype()) ; }
|
{ return sc::size_of(dt.dtype()) ; }
|
||||||
|
|
||||||
#define INSTANTIATE(name, clname) \
|
#define INSTANTIATE(name, clname) \
|
||||||
struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} };
|
struct name : public detail::datatype<clname> { name(clname value) : detail::datatype<clname>(value){} };
|
||||||
@@ -80,13 +80,13 @@ unsigned int size(datatype<T> const & dt)
|
|||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
std::shared_ptr<isc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, isc::driver::CommandQueue & queue)
|
std::shared_ptr<sc::profiles::value_type> construct_model(bp::object const & tp, bp::object dtype, sc::driver::CommandQueue & queue)
|
||||||
{
|
{
|
||||||
return std::shared_ptr<isc::profiles::value_type>(new isc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue));
|
return std::shared_ptr<sc::profiles::value_type>(new sc::profiles::value_type(tools::extract_template_type(tp), tools::extract_dtype(dtype), (isaac::templates::base const &)bp::extract<isaac::templates::base>(tp), queue));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<isc::array>
|
std::shared_ptr<sc::array>
|
||||||
ndarray_to_iscarray(const np::ndarray& array, isc::driver::Context const & ctx)
|
ndarray_to_scarray(const np::ndarray& array, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
|
|
||||||
int d = array.get_nd();
|
int d = array.get_nd();
|
||||||
@@ -95,14 +95,14 @@ namespace detail
|
|||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
}
|
}
|
||||||
|
|
||||||
isc::numeric_type dtype = to_isc_dtype(array.get_dtype());
|
sc::numeric_type dtype = to_sc_dtype(array.get_dtype());
|
||||||
isc::int_t size = (isc::int_t)array.shape(0);
|
sc::int_t size = (sc::int_t)array.shape(0);
|
||||||
isc::array* v = new isc::array(size, dtype, ctx);
|
sc::array* v = new sc::array(size, dtype, ctx);
|
||||||
|
|
||||||
void* data = (void*)array.get_data();
|
void* data = (void*)array.get_data();
|
||||||
isc::copy(data, *v);
|
sc::copy(data, *v);
|
||||||
|
|
||||||
return std::shared_ptr<isc::array>(v);
|
return std::shared_ptr<sc::array>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
isaac::driver::Context const & extract_context(bp::object context)
|
isaac::driver::Context const & extract_context(bp::object context)
|
||||||
@@ -118,19 +118,19 @@ namespace detail
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<isc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext)
|
std::shared_ptr<sc::array> create_array(bp::object const & obj, bp::object odtype, bp::object pycontext)
|
||||||
{
|
{
|
||||||
return ndarray_to_iscarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext));
|
return ndarray_to_scarray(np::from_object(obj, to_np_dtype(tools::extract_dtype(odtype))), extract_context(pycontext));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<isc::array> create_zeros_array(isc::int_t M, isc::int_t N, bp::object odtype, bp::object pycontext)
|
std::shared_ptr<sc::array> create_zeros_array(sc::int_t M, sc::int_t N, bp::object odtype, bp::object pycontext)
|
||||||
{
|
{
|
||||||
return std::shared_ptr<isc::array>(new isc::array(isc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext))));
|
return std::shared_ptr<sc::array>(new sc::array(sc::zeros(M, N, tools::extract_dtype(odtype), extract_context(pycontext))));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<isc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext)
|
std::shared_ptr<sc::array> create_empty_array(bp::object sizes, bp::object odtype, bp::object pycontext)
|
||||||
{
|
{
|
||||||
typedef std::shared_ptr<isc::array> result_type;
|
typedef std::shared_ptr<sc::array> result_type;
|
||||||
|
|
||||||
std::size_t len;
|
std::size_t len;
|
||||||
int size1;
|
int size1;
|
||||||
@@ -145,17 +145,17 @@ namespace detail
|
|||||||
size1 = bp::extract<int>(sizes)();
|
size1 = bp::extract<int>(sizes)();
|
||||||
}
|
}
|
||||||
|
|
||||||
isc::numeric_type dtype = tools::extract_dtype(odtype);
|
sc::numeric_type dtype = tools::extract_dtype(odtype);
|
||||||
if(len < 1 || len > 2)
|
if(len < 1 || len > 2)
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
|
PyErr_SetString(PyExc_TypeError, "Only 1-D and 2-D arrays are supported!");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
}
|
}
|
||||||
|
|
||||||
isc::driver::Context const & context = extract_context(pycontext);
|
sc::driver::Context const & context = extract_context(pycontext);
|
||||||
if(len==1)
|
if(len==1)
|
||||||
return result_type(new isc::array(size1, dtype, context));
|
return result_type(new sc::array(size1, dtype, context));
|
||||||
return result_type(new isc::array(size1, size2, dtype, context));
|
return result_type(new sc::array(size1, size2, dtype, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string type_name(bp::object const & obj)
|
std::string type_name(bp::object const & obj)
|
||||||
@@ -167,26 +167,26 @@ namespace detail
|
|||||||
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
|
return bp::extract<std::string>(obj.attr("__class__").attr("__name__"))();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<isc::scalar> construct_scalar(bp::object obj, bp::object pycontext)
|
std::shared_ptr<sc::scalar> construct_scalar(bp::object obj, bp::object pycontext)
|
||||||
{
|
{
|
||||||
typedef std::shared_ptr<isc::scalar> result_type;
|
typedef std::shared_ptr<sc::scalar> result_type;
|
||||||
isc::driver::Context const & context = extract_context(pycontext);
|
sc::driver::Context const & context = extract_context(pycontext);
|
||||||
std::string name = type_name(obj);
|
std::string name = type_name(obj);
|
||||||
if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context));
|
if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
|
||||||
else if(name=="float") return result_type(new isc::scalar(bp::extract<double>(obj)(), context));
|
else if(name=="float") return result_type(new sc::scalar(bp::extract<double>(obj)(), context));
|
||||||
else if(name=="long") return result_type(new isc::scalar(bp::extract<long>(obj)(), context));
|
else if(name=="long") return result_type(new sc::scalar(bp::extract<long>(obj)(), context));
|
||||||
else if(name=="int") return result_type(new isc::scalar(bp::extract<int>(obj)(), context));
|
else if(name=="int") return result_type(new sc::scalar(bp::extract<int>(obj)(), context));
|
||||||
|
|
||||||
else if(name=="int8") return result_type(new isc::scalar(isc::CHAR_TYPE, context));
|
else if(name=="int8") return result_type(new sc::scalar(sc::CHAR_TYPE, context));
|
||||||
else if(name=="uint8") return result_type(new isc::scalar(isc::UCHAR_TYPE, context));
|
else if(name=="uint8") return result_type(new sc::scalar(sc::UCHAR_TYPE, context));
|
||||||
else if(name=="int16") return result_type(new isc::scalar(isc::SHORT_TYPE, context));
|
else if(name=="int16") return result_type(new sc::scalar(sc::SHORT_TYPE, context));
|
||||||
else if(name=="uint16") return result_type(new isc::scalar(isc::USHORT_TYPE, context));
|
else if(name=="uint16") return result_type(new sc::scalar(sc::USHORT_TYPE, context));
|
||||||
else if(name=="int32") return result_type(new isc::scalar(isc::INT_TYPE, context));
|
else if(name=="int32") return result_type(new sc::scalar(sc::INT_TYPE, context));
|
||||||
else if(name=="uint32") return result_type(new isc::scalar(isc::UINT_TYPE, context));
|
else if(name=="uint32") return result_type(new sc::scalar(sc::UINT_TYPE, context));
|
||||||
else if(name=="int64") return result_type(new isc::scalar(isc::LONG_TYPE, context));
|
else if(name=="int64") return result_type(new sc::scalar(sc::LONG_TYPE, context));
|
||||||
else if(name=="uint64") return result_type(new isc::scalar(isc::ULONG_TYPE, context));
|
else if(name=="uint64") return result_type(new sc::scalar(sc::ULONG_TYPE, context));
|
||||||
else if(name=="float32") return result_type(new isc::scalar(isc::FLOAT_TYPE, context));
|
else if(name=="float32") return result_type(new sc::scalar(sc::FLOAT_TYPE, context));
|
||||||
else if(name=="float64") return result_type(new isc::scalar(isc::DOUBLE_TYPE, context));
|
else if(name=="float64") return result_type(new sc::scalar(sc::DOUBLE_TYPE, context));
|
||||||
else{
|
else{
|
||||||
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
PyErr_SetString(PyExc_TypeError, "Data type not understood");
|
||||||
bp::throw_error_already_set();
|
bp::throw_error_already_set();
|
||||||
@@ -196,11 +196,11 @@ namespace detail
|
|||||||
|
|
||||||
struct model_map_indexing
|
struct model_map_indexing
|
||||||
{
|
{
|
||||||
static isc::profiles::value_type& get_item(isc::profiles::map_type& container, bp::tuple i_)
|
static sc::profiles::value_type& get_item(sc::profiles::map_type& container, bp::tuple i_)
|
||||||
{
|
{
|
||||||
isc::expression_type expression = tools::extract_template_type(i_[0]);
|
sc::expression_type expression = tools::extract_template_type(i_[0]);
|
||||||
isc::numeric_type dtype = tools::extract_dtype(i_[1]);
|
sc::numeric_type dtype = tools::extract_dtype(i_[1]);
|
||||||
isc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype));
|
sc::profiles::map_type::iterator i = container.find(std::make_pair(expression, dtype));
|
||||||
if (i == container.end())
|
if (i == container.end())
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_KeyError, "Invalid key");
|
PyErr_SetString(PyExc_KeyError, "Invalid key");
|
||||||
@@ -209,11 +209,11 @@ namespace detail
|
|||||||
return *i->second;
|
return *i->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_item(isc::profiles::map_type& container, bp::tuple i_, isc::profiles::value_type const & v)
|
static void set_item(sc::profiles::map_type& container, bp::tuple i_, sc::profiles::value_type const & v)
|
||||||
{
|
{
|
||||||
isc::expression_type expression = tools::extract_template_type(i_[0]);
|
sc::expression_type expression = tools::extract_template_type(i_[0]);
|
||||||
isc::numeric_type dtype = tools::extract_dtype(i_[1]);
|
sc::numeric_type dtype = tools::extract_dtype(i_[1]);
|
||||||
container[std::make_pair(expression, dtype)].reset(new isc::profiles::value_type(v));
|
container[std::make_pair(expression, dtype)].reset(new sc::profiles::value_type(v));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -227,13 +227,13 @@ void export_core()
|
|||||||
|
|
||||||
bp::class_<isaac::profiles::value_type>("profile", bp::no_init)
|
bp::class_<isaac::profiles::value_type>("profile", bp::no_init)
|
||||||
.def("__init__", bp::make_constructor(detail::construct_model))
|
.def("__init__", bp::make_constructor(detail::construct_model))
|
||||||
.def("execute", &isc::profiles::value_type::execute);
|
.def("execute", &sc::profiles::value_type::execute);
|
||||||
|
|
||||||
bp::class_<isc::value_scalar>("value_scalar", bp::no_init)
|
bp::class_<sc::value_scalar>("value_scalar", bp::no_init)
|
||||||
.add_property("dtype", &isc::value_scalar::dtype);
|
.add_property("dtype", &sc::value_scalar::dtype);
|
||||||
|
|
||||||
#define INSTANTIATE(name, clname) \
|
#define INSTANTIATE(name, clname) \
|
||||||
bp::class_<detail::datatype<clname>, bp::bases<isc::value_scalar> >(#name, bp::init<clname>());\
|
bp::class_<detail::datatype<clname>, bp::bases<sc::value_scalar> >(#name, bp::init<clname>());\
|
||||||
bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\
|
bp::class_<detail::name, bp::bases<detail::datatype<clname> > >(#name, bp::init<clname>())\
|
||||||
.add_property("size", &detail::size<clname>)\
|
.add_property("size", &detail::size<clname>)\
|
||||||
;
|
;
|
||||||
@@ -251,36 +251,36 @@ void export_core()
|
|||||||
INSTANTIATE(float64, cl_double)
|
INSTANTIATE(float64, cl_double)
|
||||||
#undef INSTANTIATE
|
#undef INSTANTIATE
|
||||||
|
|
||||||
bp::enum_<isc::expression_type>("operations")
|
bp::enum_<sc::expression_type>("operations")
|
||||||
MAP_ENUM(AXPY_TYPE, isc)
|
MAP_ENUM(AXPY_TYPE, sc)
|
||||||
MAP_ENUM(GER_TYPE, isc)
|
MAP_ENUM(GER_TYPE, sc)
|
||||||
MAP_ENUM(DOT_TYPE, isc)
|
MAP_ENUM(DOT_TYPE, sc)
|
||||||
MAP_ENUM(GEMV_N_TYPE, isc)
|
MAP_ENUM(GEMV_N_TYPE, sc)
|
||||||
MAP_ENUM(GEMV_T_TYPE, isc)
|
MAP_ENUM(GEMV_T_TYPE, sc)
|
||||||
MAP_ENUM(GEMM_NN_TYPE, isc)
|
MAP_ENUM(GEMM_NN_TYPE, sc)
|
||||||
MAP_ENUM(GEMM_TN_TYPE, isc)
|
MAP_ENUM(GEMM_TN_TYPE, sc)
|
||||||
MAP_ENUM(GEMM_NT_TYPE, isc)
|
MAP_ENUM(GEMM_NT_TYPE, sc)
|
||||||
MAP_ENUM(GEMM_TT_TYPE, isc);
|
MAP_ENUM(GEMM_TT_TYPE, sc);
|
||||||
|
|
||||||
#define ADD_SCALAR_HANDLING(OP)\
|
#define ADD_SCALAR_HANDLING(OP)\
|
||||||
.def(bp::self OP int())\
|
.def(bp::self OP int())\
|
||||||
.def(bp::self OP long())\
|
.def(bp::self OP long())\
|
||||||
.def(bp::self OP double())\
|
.def(bp::self OP double())\
|
||||||
.def(bp::self OP bp::other<isc::value_scalar>())\
|
.def(bp::self OP bp::other<sc::value_scalar>())\
|
||||||
.def(int() OP bp::self)\
|
.def(int() OP bp::self)\
|
||||||
.def(long() OP bp::self)\
|
.def(long() OP bp::self)\
|
||||||
.def(double() OP bp::self)\
|
.def(double() OP bp::self)\
|
||||||
.def(bp::other<isc::value_scalar>() OP bp::self)
|
.def(bp::other<sc::value_scalar>() OP bp::self)
|
||||||
|
|
||||||
#define ADD_ARRAY_OPERATOR(OP)\
|
#define ADD_ARRAY_OPERATOR(OP)\
|
||||||
.def(bp::self OP bp::self)\
|
.def(bp::self OP bp::self)\
|
||||||
ADD_SCALAR_HANDLING(OP)
|
ADD_SCALAR_HANDLING(OP)
|
||||||
|
|
||||||
bp::class_<isc::expressions_tuple>
|
bp::class_<sc::expressions_tuple>
|
||||||
("array_expression_container", bp::init<isc::array_expression const &>())
|
("array_expression_container", bp::init<sc::array_expression const &>())
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::array_expression >("array_expression", bp::no_init)
|
bp::class_<sc::array_expression >("array_expression", bp::no_init)
|
||||||
ADD_ARRAY_OPERATOR(+)
|
ADD_ARRAY_OPERATOR(+)
|
||||||
ADD_ARRAY_OPERATOR(-)
|
ADD_ARRAY_OPERATOR(-)
|
||||||
ADD_ARRAY_OPERATOR(*)
|
ADD_ARRAY_OPERATOR(*)
|
||||||
@@ -291,7 +291,7 @@ void export_core()
|
|||||||
ADD_ARRAY_OPERATOR(<=)
|
ADD_ARRAY_OPERATOR(<=)
|
||||||
ADD_ARRAY_OPERATOR(==)
|
ADD_ARRAY_OPERATOR(==)
|
||||||
ADD_ARRAY_OPERATOR(!=)
|
ADD_ARRAY_OPERATOR(!=)
|
||||||
.add_property("context", bp::make_function(&isc::array_expression::context, bp::return_internal_reference<>()))
|
.add_property("context", bp::make_function(&sc::array_expression::context, bp::return_internal_reference<>()))
|
||||||
.def(bp::self_ns::abs(bp::self))
|
.def(bp::self_ns::abs(bp::self))
|
||||||
// .def(bp::self_ns::pow(bp::self))
|
// .def(bp::self_ns::pow(bp::self))
|
||||||
;
|
;
|
||||||
@@ -299,18 +299,18 @@ void export_core()
|
|||||||
|
|
||||||
#define ADD_ARRAY_OPERATOR(OP) \
|
#define ADD_ARRAY_OPERATOR(OP) \
|
||||||
.def(bp::self OP bp::self)\
|
.def(bp::self OP bp::self)\
|
||||||
.def(bp::self OP bp::other<isc::array_expression>())\
|
.def(bp::self OP bp::other<sc::array_expression>())\
|
||||||
.def(bp::other<isc::array_expression>() OP bp::self) \
|
.def(bp::other<sc::array_expression>() OP bp::self) \
|
||||||
ADD_SCALAR_HANDLING(OP)
|
ADD_SCALAR_HANDLING(OP)
|
||||||
|
|
||||||
bp::class_<isc::array,
|
bp::class_<sc::array,
|
||||||
std::shared_ptr<isc::array> >
|
std::shared_ptr<sc::array> >
|
||||||
( "array", bp::no_init)
|
( "array", bp::no_init)
|
||||||
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object())))
|
.def("__init__", bp::make_constructor(detail::create_array, bp::default_call_policies(), (bp::arg("obj"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")= bp::object())))
|
||||||
.def(bp::init<isc::array_expression>())
|
.def(bp::init<sc::array_expression>())
|
||||||
.add_property("dtype", &isc::array::dtype)
|
.add_property("dtype", &sc::array::dtype)
|
||||||
.add_property("context", bp::make_function(&isc::array::context, bp::return_internal_reference<>()))
|
.add_property("context", bp::make_function(&sc::array::context, bp::return_internal_reference<>()))
|
||||||
.add_property("T", &isc::array::T)
|
.add_property("T", &sc::array::T)
|
||||||
.add_property("shape", &detail::get_shape)
|
.add_property("shape", &detail::get_shape)
|
||||||
ADD_ARRAY_OPERATOR(+)
|
ADD_ARRAY_OPERATOR(+)
|
||||||
ADD_ARRAY_OPERATOR(-)
|
ADD_ARRAY_OPERATOR(-)
|
||||||
@@ -327,7 +327,7 @@ void export_core()
|
|||||||
.def(bp::self_ns::str(bp::self_ns::self))
|
.def(bp::self_ns::str(bp::self_ns::self))
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::scalar, bp::bases<isc::array> >
|
bp::class_<sc::scalar, bp::bases<sc::array> >
|
||||||
("scalar", bp::no_init)
|
("scalar", bp::no_init)
|
||||||
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=bp::object())))
|
.def("__init__", bp::make_constructor(detail::construct_scalar, bp::default_call_policies(), (bp::arg(""), bp::arg("context")=bp::object())))
|
||||||
;
|
;
|
||||||
@@ -336,15 +336,15 @@ void export_core()
|
|||||||
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
bp::def("empty", &detail::create_empty_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
||||||
|
|
||||||
//Assign
|
//Assign
|
||||||
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::assign));\
|
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::assign));\
|
||||||
bp::def("assign", static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::assign));\
|
bp::def("assign", static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::assign));\
|
||||||
|
|
||||||
//Binary
|
//Binary
|
||||||
#define MAP_FUNCTION(name) \
|
#define MAP_FUNCTION(name) \
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array const &)>(&isc::name));\
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array const &)>(&sc::name));\
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array const &)>(&isc::name));\
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array const &)>(&sc::name));\
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::array_expression const &)>(&isc::name));\
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::array_expression const &)>(&sc::name));\
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::array_expression const &)>(&isc::name));
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::array_expression const &)>(&sc::name));
|
||||||
|
|
||||||
MAP_FUNCTION(maximum)
|
MAP_FUNCTION(maximum)
|
||||||
MAP_FUNCTION(minimum)
|
MAP_FUNCTION(minimum)
|
||||||
@@ -354,8 +354,8 @@ void export_core()
|
|||||||
|
|
||||||
//Unary
|
//Unary
|
||||||
#define MAP_FUNCTION(name) \
|
#define MAP_FUNCTION(name) \
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &)>(&isc::name));\
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &)>(&sc::name));\
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &)>(&isc::name));
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &)>(&sc::name));
|
||||||
|
|
||||||
bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
bp::def("zeros", &detail::create_zeros_array, (bp::arg("shape"), bp::arg("dtype") = bp::scope().attr("float32"), bp::arg("context")=bp::object()));
|
||||||
|
|
||||||
@@ -380,8 +380,8 @@ void export_core()
|
|||||||
/*--- Reduction operators----*/
|
/*--- Reduction operators----*/
|
||||||
//---------------------------------------
|
//---------------------------------------
|
||||||
#define MAP_FUNCTION(name) \
|
#define MAP_FUNCTION(name) \
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array const &, isc::int_t)>(&isc::name));\
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array const &, sc::int_t)>(&sc::name));\
|
||||||
bp::def(#name, static_cast<isc::array_expression (*)(isc::array_expression const &, isc::int_t)>(&isc::name));
|
bp::def(#name, static_cast<sc::array_expression (*)(sc::array_expression const &, sc::int_t)>(&sc::name));
|
||||||
|
|
||||||
MAP_FUNCTION(sum)
|
MAP_FUNCTION(sum)
|
||||||
MAP_FUNCTION(max)
|
MAP_FUNCTION(max)
|
||||||
@@ -392,7 +392,7 @@ void export_core()
|
|||||||
|
|
||||||
/*--- Profiles----*/
|
/*--- Profiles----*/
|
||||||
//---------------------------------------
|
//---------------------------------------
|
||||||
bp::class_<isc::profiles::map_type>("profiles")
|
bp::class_<sc::profiles::map_type>("profiles")
|
||||||
.def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>())
|
.def("__getitem__", &detail::model_map_indexing::get_item, bp::return_internal_reference<>())
|
||||||
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
|
.def("__setitem__", &detail::model_map_indexing::set_item, bp::with_custodian_and_ward<1,2>())
|
||||||
;
|
;
|
||||||
|
@@ -10,7 +10,7 @@
|
|||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
|
|
||||||
bp::list nv_compute_capability(isc::driver::Device const & device)
|
bp::list nv_compute_capability(sc::driver::Device const & device)
|
||||||
{
|
{
|
||||||
bp::list res;
|
bp::list res;
|
||||||
std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability();
|
std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability();
|
||||||
@@ -21,63 +21,63 @@ namespace detail
|
|||||||
|
|
||||||
bp::list get_platforms()
|
bp::list get_platforms()
|
||||||
{
|
{
|
||||||
std::vector<isc::driver::Platform> platforms;
|
std::vector<sc::driver::Platform> platforms;
|
||||||
isc::driver::backend::platforms(platforms);
|
sc::driver::backend::platforms(platforms);
|
||||||
return tools::to_list(platforms.begin(), platforms.end());
|
return tools::to_list(platforms.begin(), platforms.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
bp::list get_devices(isc::driver::Platform const & platform)
|
bp::list get_devices(sc::driver::Platform const & platform)
|
||||||
{
|
{
|
||||||
std::vector<isc::driver::Device> devices;
|
std::vector<sc::driver::Device> devices;
|
||||||
platform.devices(devices);
|
platform.devices(devices);
|
||||||
return tools::to_list(devices.begin(), devices.end());
|
return tools::to_list(devices.begin(), devices.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
bp::list get_queues(isc::driver::Context const & context)
|
bp::list get_queues(sc::driver::Context const & context)
|
||||||
{
|
{
|
||||||
std::vector<isc::driver::CommandQueue*> queues;
|
std::vector<sc::driver::CommandQueue*> queues;
|
||||||
isc::driver::backend::queues::get(context, queues);
|
sc::driver::backend::queues::get(context, queues);
|
||||||
bp::list res;
|
bp::list res;
|
||||||
for(isc::driver::CommandQueue* queue:queues)
|
for(sc::driver::CommandQueue* queue:queues)
|
||||||
res.append(*queue);
|
res.append(*queue);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr< isc::driver::CommandQueue> create_queue(isc::driver::Context const & context, isc::driver::Device const & device)
|
std::shared_ptr< sc::driver::CommandQueue> create_queue(sc::driver::Context const & context, sc::driver::Device const & device)
|
||||||
{
|
{
|
||||||
return std::shared_ptr<isc::driver::CommandQueue>(new isc::driver::CommandQueue(context, device));
|
return std::shared_ptr<sc::driver::CommandQueue>(new sc::driver::CommandQueue(context, device));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::string to_string(isc::driver::device_type type)
|
std::string to_string(sc::driver::device_type type)
|
||||||
{
|
{
|
||||||
if(type==isc::driver::DEVICE_TYPE_CPU) return "CPU";
|
if(type==sc::driver::DEVICE_TYPE_CPU) return "CPU";
|
||||||
if(type==isc::driver::DEVICE_TYPE_GPU) return "GPU";
|
if(type==sc::driver::DEVICE_TYPE_GPU) return "GPU";
|
||||||
if(type==isc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
|
if(type==sc::driver::DEVICE_TYPE_ACCELERATOR) return "ACCELERATOR";
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<isc::driver::Context> make_context(isc::driver::Device const & dev)
|
std::shared_ptr<sc::driver::Context> make_context(sc::driver::Device const & dev)
|
||||||
{ return std::shared_ptr<isc::driver::Context>(new isc::driver::Context(dev)); }
|
{ return std::shared_ptr<sc::driver::Context>(new sc::driver::Context(dev)); }
|
||||||
|
|
||||||
bp::object enqueue(isc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
|
bp::object enqueue(sc::array_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
|
||||||
{
|
{
|
||||||
std::list<isc::driver::Event> events;
|
std::list<sc::driver::Event> events;
|
||||||
std::vector<isc::driver::Event> cdependencies = tools::to_vector<isc::driver::Event>(dependencies);
|
std::vector<sc::driver::Event> cdependencies = tools::to_vector<sc::driver::Event>(dependencies);
|
||||||
|
|
||||||
isc::execution_options_type execution_options(queue_id, &events, &cdependencies);
|
sc::execution_options_type execution_options(queue_id, &events, &cdependencies);
|
||||||
isc::dispatcher_options_type dispatcher_options(tune, label);
|
sc::dispatcher_options_type dispatcher_options(tune, label);
|
||||||
isc::compilation_options_type compilation_options(program_name, force_recompile);
|
sc::compilation_options_type compilation_options(program_name, force_recompile);
|
||||||
isc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
|
sc::array_expression::container_type::value_type root = expression.tree()[expression.root()];
|
||||||
if(isc::detail::is_assignment(root.op))
|
if(sc::detail::is_assignment(root.op))
|
||||||
{
|
{
|
||||||
isc::execute(isc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
|
sc::execute(sc::control(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
|
||||||
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
|
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::shared_ptr<isc::array> parray(new isc::array(isc::control(expression, execution_options, dispatcher_options, compilation_options)));
|
std::shared_ptr<sc::array> parray(new sc::array(sc::control(expression, execution_options, dispatcher_options, compilation_options)));
|
||||||
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
|
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -88,7 +88,7 @@ default_driver_values_type default_driver_parameters;
|
|||||||
|
|
||||||
void export_driver()
|
void export_driver()
|
||||||
{
|
{
|
||||||
typedef std::vector<isc::driver::CommandQueue> queues_t;
|
typedef std::vector<sc::driver::CommandQueue> queues_t;
|
||||||
|
|
||||||
bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver"))));
|
bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver"))));
|
||||||
bp::scope().attr("driver") = driver_module;
|
bp::scope().attr("driver") = driver_module;
|
||||||
@@ -103,58 +103,58 @@ void export_driver()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
bp::enum_<isc::driver::backend_type>
|
bp::enum_<sc::driver::backend_type>
|
||||||
("backend_type")
|
("backend_type")
|
||||||
.value("OPENCL", isc::driver::OPENCL)
|
.value("OPENCL", sc::driver::OPENCL)
|
||||||
#ifdef ISAAC_WITH_CUDA
|
#ifdef ISAAC_WITH_CUDA
|
||||||
.value("CUDA", isc::driver::CUDA)
|
.value("CUDA", sc::driver::CUDA)
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::enum_<isc::driver::device_type>
|
bp::enum_<sc::driver::device_type>
|
||||||
("device_type")
|
("device_type")
|
||||||
.value("DEVICE_TYPE_GPU", isc::driver::DEVICE_TYPE_GPU)
|
.value("DEVICE_TYPE_GPU", sc::driver::DEVICE_TYPE_GPU)
|
||||||
.value("DEVICE_TYPE_CPU", isc::driver::DEVICE_TYPE_CPU)
|
.value("DEVICE_TYPE_CPU", sc::driver::DEVICE_TYPE_CPU)
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
bp::class_<isc::driver::Platform>("platform", bp::no_init)
|
bp::class_<sc::driver::Platform>("platform", bp::no_init)
|
||||||
.def("get_devices", &detail::get_devices)
|
.def("get_devices", &detail::get_devices)
|
||||||
.add_property("name",&isc::driver::Platform::name)
|
.add_property("name",&sc::driver::Platform::name)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::enum_<isaac::driver::Device::Vendor>
|
bp::enum_<isaac::driver::Device::Vendor>
|
||||||
("vendor")
|
("vendor")
|
||||||
.value("AMD", isc::driver::Device::Vendor::AMD)
|
.value("AMD", sc::driver::Device::Vendor::AMD)
|
||||||
.value("INTEL", isc::driver::Device::Vendor::INTEL)
|
.value("INTEL", sc::driver::Device::Vendor::INTEL)
|
||||||
.value("NVIDIA", isc::driver::Device::Vendor::NVIDIA)
|
.value("NVIDIA", sc::driver::Device::Vendor::NVIDIA)
|
||||||
.value("UNKNOWN", isc::driver::Device::Vendor::UNKNOWN)
|
.value("UNKNOWN", sc::driver::Device::Vendor::UNKNOWN)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::driver::Device>("device", bp::no_init)
|
bp::class_<sc::driver::Device>("device", bp::no_init)
|
||||||
.add_property("clock_rate", &isc::driver::Device::clock_rate)
|
.add_property("clock_rate", &sc::driver::Device::clock_rate)
|
||||||
.add_property("name", &isc::driver::Device::name)
|
.add_property("name", &sc::driver::Device::name)
|
||||||
.add_property("type", &isc::driver::Device::type)
|
.add_property("type", &sc::driver::Device::type)
|
||||||
.add_property("platform", &isc::driver::Device::platform)
|
.add_property("platform", &sc::driver::Device::platform)
|
||||||
.add_property("vendor", &isc::driver::Device::vendor)
|
.add_property("vendor", &sc::driver::Device::vendor)
|
||||||
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
.add_property("nv_compute_capability", &detail::nv_compute_capability)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::driver::Context, boost::noncopyable>("context", bp::no_init)
|
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)
|
||||||
.def("__init__", bp::make_constructor(&detail::make_context))
|
.def("__init__", bp::make_constructor(&detail::make_context))
|
||||||
.def("synchronize", &isc::driver::backend::synchronize)
|
.def("synchronize", &sc::driver::backend::synchronize)
|
||||||
.add_property("queues", &detail::get_queues)
|
.add_property("queues", &detail::get_queues)
|
||||||
.add_property("backend", &isc::driver::Context::backend)
|
.add_property("backend", &sc::driver::Context::backend)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::driver::CommandQueue>("command_queue", bp::init<isc::driver::Context const &, isc::driver::Device const &>())
|
bp::class_<sc::driver::CommandQueue>("command_queue", bp::init<sc::driver::Context const &, sc::driver::Device const &>())
|
||||||
.def("synchronize", &isc::driver::CommandQueue::synchronize)
|
.def("synchronize", &sc::driver::CommandQueue::synchronize)
|
||||||
.add_property("profiles", bp::make_function(&isc::profiles::get, bp::return_internal_reference<>()))
|
.add_property("profiles", bp::make_function(&sc::profiles::get, bp::return_internal_reference<>()))
|
||||||
.add_property("device", bp::make_function(&isc::driver::CommandQueue::device, bp::return_internal_reference<>()))
|
.add_property("device", bp::make_function(&sc::driver::CommandQueue::device, bp::return_internal_reference<>()))
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::class_<isc::driver::Event>("event", bp::init<isc::driver::backend_type>())
|
bp::class_<sc::driver::Event>("event", bp::init<sc::driver::backend_type>())
|
||||||
.add_property("elapsed_time", &isc::driver::Event::elapsed_time)
|
.add_property("elapsed_time", &sc::driver::Event::elapsed_time)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::def("device_type_to_string", &detail::to_string);
|
bp::def("device_type_to_string", &detail::to_string);
|
||||||
@@ -164,8 +164,8 @@ void export_driver()
|
|||||||
bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
|
||||||
|
|
||||||
bp::class_<default_driver_values_type>("default_type")
|
bp::class_<default_driver_values_type>("default_type")
|
||||||
.def_readwrite("queue_properties",&isc::driver::backend::default_queue_properties)
|
.def_readwrite("queue_properties",&sc::driver::backend::default_queue_properties)
|
||||||
.def_readwrite("device", &isc::driver::backend::default_device)
|
.def_readwrite("device", &sc::driver::backend::default_device)
|
||||||
;
|
;
|
||||||
|
|
||||||
bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters));
|
bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters));
|
||||||
|
@@ -13,7 +13,7 @@ namespace tpt = isaac::templates;
|
|||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
bp::list input_sizes(tpt::base & temp, isc::expressions_tuple const & tree)
|
bp::list input_sizes(tpt::base & temp, sc::expressions_tuple const & tree)
|
||||||
{
|
{
|
||||||
std::vector<int> tmp = temp.input_sizes(tree);
|
std::vector<int> tmp = temp.input_sizes(tree);
|
||||||
return tools::to_list(tmp.begin(), tmp.end());
|
return tools::to_list(tmp.begin(), tmp.end());
|
||||||
|
@@ -4,19 +4,19 @@
|
|||||||
#include "isaac/array.h"
|
#include "isaac/array.h"
|
||||||
#include "isaac/wrap/clBLAS.h"
|
#include "isaac/wrap/clBLAS.h"
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
typedef isaac::int_t int_t;
|
typedef isaac::int_t int_t;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
|
void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T>& cy, simple_vector_base<T>& cz,
|
||||||
isc::array& x, isc::array& y, isc::array& z)
|
sc::array& x, sc::array& y, sc::array& z)
|
||||||
{
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int failure_count = 0;
|
int failure_count = 0;
|
||||||
isc::numeric_type dtype = x.dtype();
|
sc::numeric_type dtype = x.dtype();
|
||||||
isc::driver::Context const & context = x.context();
|
sc::driver::Context const & context = x.context();
|
||||||
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(context,0);
|
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(context,0);
|
||||||
cl_command_queue clqueue = queue.handle().cl();
|
cl_command_queue clqueue = queue.handle().cl();
|
||||||
int_t N = cz.size();
|
int_t N = cz.size();
|
||||||
|
|
||||||
@@ -113,7 +113,7 @@ void test_element_wise_vector(T epsilon, simple_vector_base<T> & cx, simple_vect
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, isc::driver::Context const & ctx)
|
void test_impl(T epsilon, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
using isaac::_;
|
using isaac::_;
|
||||||
|
|
||||||
@@ -140,11 +140,11 @@ int main()
|
|||||||
{
|
{
|
||||||
clblasSetup();
|
clblasSetup();
|
||||||
std::list<isaac::driver::Context const *> data;
|
std::list<isaac::driver::Context const *> data;
|
||||||
isc::driver::backend::contexts::get(data);
|
sc::driver::backend::contexts::get(data);
|
||||||
for(isaac::driver::Context const * context : data)
|
for(isaac::driver::Context const * context : data)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
if(device.type() != isc::driver::DEVICE_TYPE_GPU)
|
if(device.type() != sc::driver::DEVICE_TYPE_GPU)
|
||||||
continue;
|
continue;
|
||||||
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
|
@@ -5,23 +5,23 @@
|
|||||||
#include "isaac/array.h"
|
#include "isaac/array.h"
|
||||||
#include "isaac/wrap/clBLAS.h"
|
#include "isaac/wrap/clBLAS.h"
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
typedef isc::int_t int_t;
|
typedef sc::int_t int_t;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
|
void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T> & cy,
|
||||||
isc::array & x, isc::array & y)
|
sc::array & x, sc::array & y)
|
||||||
{
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
isc::driver::Context const & ctx = x.context();
|
sc::driver::Context const & ctx = x.context();
|
||||||
int_t N = cx.size();
|
int_t N = cx.size();
|
||||||
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(ctx,0);
|
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(ctx,0);
|
||||||
cl_command_queue clqueue = queue.handle().cl();
|
cl_command_queue clqueue = queue.handle().cl();
|
||||||
isc::array scratch(N, x.dtype());
|
sc::array scratch(N, x.dtype());
|
||||||
|
|
||||||
unsigned int failure_count = 0;
|
unsigned int failure_count = 0;
|
||||||
|
|
||||||
isaac::numeric_type dtype = isc::to_numeric_type<T>::value;
|
isaac::numeric_type dtype = sc::to_numeric_type<T>::value;
|
||||||
|
|
||||||
T cs = 0;
|
T cs = 0;
|
||||||
T tmp = 0;
|
T tmp = 0;
|
||||||
@@ -67,7 +67,7 @@ void test_reduction(T epsilon, simple_vector_base<T> & cx, simple_vector_base<T
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, isc::driver::Context const & ctx)
|
void test_impl(T epsilon, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
using isaac::_;
|
using isaac::_;
|
||||||
|
|
||||||
@@ -92,10 +92,10 @@ int main()
|
|||||||
{
|
{
|
||||||
clblasSetup();
|
clblasSetup();
|
||||||
std::list<isaac::driver::Context const *> data;
|
std::list<isaac::driver::Context const *> data;
|
||||||
isc::driver::backend::contexts::get(data);
|
sc::driver::backend::contexts::get(data);
|
||||||
for(isaac::driver::Context const * context : data)
|
for(isaac::driver::Context const * context : data)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
std::cout << ">> float" << std::endl;
|
std::cout << ">> float" << std::endl;
|
||||||
|
@@ -3,23 +3,23 @@
|
|||||||
#include "isaac/array.h"
|
#include "isaac/array.h"
|
||||||
#include "isaac/wrap/clBLAS.h"
|
#include "isaac/wrap/clBLAS.h"
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
|
void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> const & cA, simple_matrix_base<T> const & cB,
|
||||||
isc::array & C, isc::array const & A, isc::array const & AT, isc::array const & B, isc::array const & BT,
|
sc::array & C, sc::array const & A, sc::array const & AT, sc::array const & B, sc::array const & BT,
|
||||||
interface_t interf, const char * prefix)
|
interface_t interf, const char * prefix)
|
||||||
{
|
{
|
||||||
int failure_count = 0;
|
int failure_count = 0;
|
||||||
|
|
||||||
isc::int_t M = C.shape()[0];
|
sc::int_t M = C.shape()[0];
|
||||||
isc::int_t N = C.shape()[1];
|
sc::int_t N = C.shape()[1];
|
||||||
isc::int_t K = A.shape()[1];
|
sc::int_t K = A.shape()[1];
|
||||||
|
|
||||||
T alpha = 1;
|
T alpha = 1;
|
||||||
T beta = 0;
|
T beta = 0;
|
||||||
|
|
||||||
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(C.context(),0);
|
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(C.context(),0);
|
||||||
|
|
||||||
for(int i = 0 ; i < M ; ++i)
|
for(int i = 0 ; i < M ; ++i)
|
||||||
{
|
{
|
||||||
@@ -43,7 +43,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
|||||||
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
|
std::cout << "[" << prefix << "] \t" << NAME << "..." << std::flush;\
|
||||||
GPU_OP;\
|
GPU_OP;\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
isc::copy(C, buffer);\
|
sc::copy(C, buffer);\
|
||||||
if(diff(buffer, cCbuffer, epsilon))\
|
if(diff(buffer, cCbuffer, epsilon))\
|
||||||
{\
|
{\
|
||||||
failure_count++;\
|
failure_count++;\
|
||||||
@@ -94,7 +94,7 @@ void test_impl(T epsilon, simple_matrix_base<T> & cC, simple_matrix_base<T> cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, isc::driver::Context const & ctx)
|
void test_impl(T epsilon, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
int_t M = 173;
|
int_t M = 173;
|
||||||
int_t N = 241;
|
int_t N = 241;
|
||||||
@@ -126,11 +126,11 @@ int main()
|
|||||||
{
|
{
|
||||||
clblasSetup();
|
clblasSetup();
|
||||||
std::list<isaac::driver::Context const *> data;
|
std::list<isaac::driver::Context const *> data;
|
||||||
isc::driver::backend::contexts::get(data);
|
sc::driver::backend::contexts::get(data);
|
||||||
for(isaac::driver::Context const * context : data)
|
for(isaac::driver::Context const * context : data)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
if(device.type() != isc::driver::DEVICE_TYPE_GPU)
|
if(device.type() != sc::driver::DEVICE_TYPE_GPU)
|
||||||
continue;
|
continue;
|
||||||
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
|
@@ -5,16 +5,16 @@
|
|||||||
#include "isaac/array.h"
|
#include "isaac/array.h"
|
||||||
#include "isaac/wrap/clBLAS.h"
|
#include "isaac/wrap/clBLAS.h"
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
|
void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matrix_base<T> const & cA, simple_vector_base<T> & cx,
|
||||||
isc::array & y, isc::array const & A, isc::array & x, interface_t interf, const char * prefix)
|
sc::array & y, sc::array const & A, sc::array & x, interface_t interf, const char * prefix)
|
||||||
{
|
{
|
||||||
int failure_count = 0;
|
int failure_count = 0;
|
||||||
|
|
||||||
isc::int_t M = A.shape()[0];
|
sc::int_t M = A.shape()[0];
|
||||||
isc::int_t N = A.shape()[1];
|
sc::int_t N = A.shape()[1];
|
||||||
|
|
||||||
simple_vector<T> bufy(M);
|
simple_vector<T> bufy(M);
|
||||||
simple_vector<T> bufx(N);
|
simple_vector<T> bufx(N);
|
||||||
@@ -22,7 +22,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
|||||||
T alpha = static_cast<T>(4.2);
|
T alpha = static_cast<T>(4.2);
|
||||||
T beta = static_cast<T>(5.6);
|
T beta = static_cast<T>(5.6);
|
||||||
|
|
||||||
isc::driver::CommandQueue queue = isc::driver::backend::queues::get(y.context(),0);
|
sc::driver::CommandQueue queue = sc::driver::backend::queues::get(y.context(),0);
|
||||||
|
|
||||||
T yi = 0, xi = 0;
|
T yi = 0, xi = 0;
|
||||||
#define TEST_OPERATION(NAME, SIZE1, SIZE2, NEUTRAL, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
|
#define TEST_OPERATION(NAME, SIZE1, SIZE2, NEUTRAL, REDUCTION, ASSIGNMENT, GPU_REDUCTION, RES, BUF, CRES)\
|
||||||
@@ -37,7 +37,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
|||||||
}\
|
}\
|
||||||
GPU_REDUCTION;\
|
GPU_REDUCTION;\
|
||||||
queue.synchronize();\
|
queue.synchronize();\
|
||||||
isc::copy(RES, BUF.data());\
|
sc::copy(RES, BUF.data());\
|
||||||
if(diff(CRES, BUF, epsilon))\
|
if(diff(CRES, BUF, epsilon))\
|
||||||
{\
|
{\
|
||||||
failure_count++;\
|
failure_count++;\
|
||||||
@@ -90,7 +90,7 @@ void test_row_wise_reduction(T epsilon, simple_vector_base<T> & cy, simple_matri
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, isc::driver::Context const & ctx)
|
void test_impl(T epsilon, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
int_t M = 173;
|
int_t M = 173;
|
||||||
int_t N = 241;
|
int_t N = 241;
|
||||||
@@ -116,10 +116,10 @@ int main()
|
|||||||
{
|
{
|
||||||
clblasSetup();
|
clblasSetup();
|
||||||
std::list<isaac::driver::Context const *> data;
|
std::list<isaac::driver::Context const *> data;
|
||||||
isc::driver::backend::contexts::get(data);
|
sc::driver::backend::contexts::get(data);
|
||||||
for(isaac::driver::Context const * context : data)
|
for(isaac::driver::Context const * context : data)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
std::cout << ">> float" << std::endl;
|
std::cout << ">> float" << std::endl;
|
||||||
|
@@ -2,18 +2,18 @@
|
|||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "isaac/array.h"
|
#include "isaac/array.h"
|
||||||
|
|
||||||
namespace isc = isaac;
|
namespace sc = isaac;
|
||||||
typedef isaac::int_t int_t;
|
typedef isaac::int_t int_t;
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy,
|
void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simple_matrix_base<T>& cC, simple_vector_base<T>& cx, simple_vector_base<T>& cy,
|
||||||
isc::array& A, isc::array& B, isc::array& C, isc::array& x, isc::array& y)
|
sc::array& A, sc::array& B, sc::array& C, sc::array& x, sc::array& y)
|
||||||
{
|
{
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int failure_count = 0;
|
int failure_count = 0;
|
||||||
isc::numeric_type dtype = C.dtype();
|
sc::numeric_type dtype = C.dtype();
|
||||||
isc::driver::Context const & ctx = C.context();
|
sc::driver::Context const & ctx = C.context();
|
||||||
|
|
||||||
int_t M = cC.size1();
|
int_t M = cC.size1();
|
||||||
int_t N = cC.size2();
|
int_t N = cC.size2();
|
||||||
@@ -100,7 +100,7 @@ void test(T epsilon, simple_matrix_base<T> & cA, simple_matrix_base<T>& cB, simp
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void test_impl(T epsilon, isc::driver::Context const & ctx)
|
void test_impl(T epsilon, sc::driver::Context const & ctx)
|
||||||
{
|
{
|
||||||
using isaac::_;
|
using isaac::_;
|
||||||
|
|
||||||
@@ -126,10 +126,10 @@ void test_impl(T epsilon, isc::driver::Context const & ctx)
|
|||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
std::list<isaac::driver::Context const *> data;
|
std::list<isaac::driver::Context const *> data;
|
||||||
isc::driver::backend::contexts::get(data);
|
sc::driver::backend::contexts::get(data);
|
||||||
for(isaac::driver::Context const * context : data)
|
for(isaac::driver::Context const * context : data)
|
||||||
{
|
{
|
||||||
isc::driver::Device device = isc::driver::backend::queues::get(*context,0).device();
|
sc::driver::Device device = sc::driver::backend::queues::get(*context,0).device();
|
||||||
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl;
|
||||||
std::cout << "---" << std::endl;
|
std::cout << "---" << std::endl;
|
||||||
std::cout << ">> float" << std::endl;
|
std::cout << ">> float" << std::endl;
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import isaac as isc
|
import isaac as sc
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@@ -14,10 +14,10 @@ from numpy import cumsum
|
|||||||
|
|
||||||
import tools
|
import tools
|
||||||
|
|
||||||
fetch_types = [isc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
fetch_types = [sc.templates.FETCH_FROM_GLOBAL_CONTIGUOUS,
|
||||||
isc.templates.FETCH_FROM_GLOBAL_STRIDED,
|
sc.templates.FETCH_FROM_GLOBAL_STRIDED,
|
||||||
isc.templates.FETCH_FROM_LOCAL,
|
sc.templates.FETCH_FROM_LOCAL,
|
||||||
isc.templates.FETCH_FROM_LOCAL]
|
sc.templates.FETCH_FROM_LOCAL]
|
||||||
|
|
||||||
def exhaustive(template, sizes, context):
|
def exhaustive(template, sizes, context):
|
||||||
tree, _ = tools.tree_of(template, sizes, context)
|
tree, _ = tools.tree_of(template, sizes, context)
|
||||||
@@ -34,7 +34,7 @@ def exhaustive(template, sizes, context):
|
|||||||
time = tools.benchmark(template, parameters, tree)
|
time = tools.benchmark(template, parameters, tree)
|
||||||
if not best or time < best[1]:
|
if not best or time < best[1]:
|
||||||
best = parameters, time
|
best = parameters, time
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
pass
|
pass
|
||||||
if best:
|
if best:
|
||||||
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
|
stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0]))
|
||||||
@@ -100,7 +100,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
|
|||||||
try:
|
try:
|
||||||
individual.fitness.values = toolbox.evaluate(genome)
|
individual.fitness.values = toolbox.evaluate(genome)
|
||||||
population += [individual]
|
population += [individual]
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure ):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure ):
|
||||||
pass
|
pass
|
||||||
genome = encode(list(initializer.next()))
|
genome = encode(list(initializer.next()))
|
||||||
hof.update(population)
|
hof.update(population)
|
||||||
@@ -134,7 +134,7 @@ def genetic(template, sizes, context, naccept=200, niter = 1000, cxpb=0.4, mutpb
|
|||||||
#Reproduction
|
#Reproduction
|
||||||
else:
|
else:
|
||||||
offspring += [random.choice(population)]
|
offspring += [random.choice(population)]
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -159,21 +159,21 @@ def is_local_optimum(parameters, template, sizes, context):
|
|||||||
tree, _ = tools.tree_of(template, sizes, context)
|
tree, _ = tools.tree_of(template, sizes, context)
|
||||||
genetic_infos = tools.genetic_infos_of(template)
|
genetic_infos = tools.genetic_infos_of(template)
|
||||||
|
|
||||||
if issubclass(template, isc.templates.axpy):
|
if issubclass(template, sc.templates.axpy):
|
||||||
sweep_over = [0,1,2]
|
sweep_over = [0,1,2]
|
||||||
elif issubclass(template, isc.templates.dot):
|
elif issubclass(template, sc.templates.dot):
|
||||||
sweep_over = [0,1,2]
|
sweep_over = [0,1,2]
|
||||||
elif issubclass(template, isc.templates.ger):
|
elif issubclass(template, sc.templates.ger):
|
||||||
sweep_over = [0,1,2,3,4]
|
sweep_over = [0,1,2,3,4]
|
||||||
elif issubclass(template, isc.templates.gemv):
|
elif issubclass(template, sc.templates.gemv):
|
||||||
sweep_over = [0,1,2,3,4]
|
sweep_over = [0,1,2,3,4]
|
||||||
elif issubclass(template, isc.templates.gemm):
|
elif issubclass(template, sc.templates.gemm):
|
||||||
sweep_over = [1,3,5,7]
|
sweep_over = [1,3,5,7]
|
||||||
|
|
||||||
#Evaluate the provided parameters guess
|
#Evaluate the provided parameters guess
|
||||||
try:
|
try:
|
||||||
reference = tools.benchmark(template, parameters, tree)
|
reference = tools.benchmark(template, parameters, tree)
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
#Latency bound -- ignore
|
#Latency bound -- ignore
|
||||||
@@ -190,7 +190,7 @@ def is_local_optimum(parameters, template, sizes, context):
|
|||||||
time = tools.benchmark(template, x, tree)
|
time = tools.benchmark(template, x, tree)
|
||||||
if time/reference < .97:
|
if time/reference < .97:
|
||||||
return False
|
return False
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
pass
|
pass
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import isaac as isc
|
import isaac as sc
|
||||||
from numpy import mean, median
|
from numpy import mean, median
|
||||||
from math import ceil, exp, log, sqrt
|
from math import ceil, exp, log, sqrt
|
||||||
|
|
||||||
@@ -21,13 +21,13 @@ def expspace(a,b,N,r=128):
|
|||||||
|
|
||||||
def benchmark(template, setting, tree):
|
def benchmark(template, setting, tree):
|
||||||
queue = tree.context.queues[0]
|
queue = tree.context.queues[0]
|
||||||
queue.profiles[template, isc.float32] = isc.profile(template(*setting), isc.float32, queue)
|
queue.profiles[template, sc.float32] = sc.profile(template(*setting), sc.float32, queue)
|
||||||
times = []
|
times = []
|
||||||
total = 0
|
total = 0
|
||||||
i = 0
|
i = 0
|
||||||
while total < 1e-2:
|
while total < 1e-2:
|
||||||
#z = isc.zeros(1, 10000000, isc.float32, tree.context)
|
#z = sc.zeros(1, 10000000, sc.float32, tree.context)
|
||||||
z, events = isc.driver.enqueue(tree)
|
z, events = sc.driver.enqueue(tree)
|
||||||
tree.context.queues[0].synchronize()
|
tree.context.queues[0].synchronize()
|
||||||
times.append(1e-9*sum([e.elapsed_time for e in events]))
|
times.append(1e-9*sum([e.elapsed_time for e in events]))
|
||||||
total += times[-1]
|
total += times[-1]
|
||||||
@@ -36,67 +36,67 @@ def benchmark(template, setting, tree):
|
|||||||
|
|
||||||
|
|
||||||
def tree_of(template, sizes, context):
|
def tree_of(template, sizes, context):
|
||||||
if issubclass(template, isc.templates.axpy):
|
if issubclass(template, sc.templates.axpy):
|
||||||
N, = sizes
|
N, = sizes
|
||||||
x = isc.empty(N, dtype=isc.float32, context=context)
|
x = sc.empty(N, dtype=sc.float32, context=context)
|
||||||
y = isc.empty(N, dtype=isc.float32, context=context)
|
y = sc.empty(N, dtype=sc.float32, context=context)
|
||||||
return x + y, (x, y)
|
return x + y, (x, y)
|
||||||
elif issubclass(template, isc.templates.dot):
|
elif issubclass(template, sc.templates.dot):
|
||||||
N, = sizes
|
N, = sizes
|
||||||
x = isc.empty(N, context=context)
|
x = sc.empty(N, context=context)
|
||||||
y = isc.empty(N, context=context)
|
y = sc.empty(N, context=context)
|
||||||
return isc.dot(x, y), (x, y)
|
return sc.dot(x, y), (x, y)
|
||||||
elif issubclass(template, isc.templates.ger):
|
elif issubclass(template, sc.templates.ger):
|
||||||
M, N = sizes
|
M, N = sizes
|
||||||
A = isc.empty((M,N), context=context)
|
A = sc.empty((M,N), context=context)
|
||||||
B = isc.empty((M,N), context=context)
|
B = sc.empty((M,N), context=context)
|
||||||
return A + B, (A, B)
|
return A + B, (A, B)
|
||||||
elif issubclass(template, isc.templates.gemv):
|
elif issubclass(template, sc.templates.gemv):
|
||||||
T = template is isc.templates.gemv_t
|
T = template is sc.templates.gemv_t
|
||||||
M, N = sizes[::-1] if T else sizes
|
M, N = sizes[::-1] if T else sizes
|
||||||
A = isc.empty((M,N), context=context)
|
A = sc.empty((M,N), context=context)
|
||||||
x = isc.empty(N, context=context)
|
x = sc.empty(N, context=context)
|
||||||
return isc.dot(A.T, x) if T else isc.dot(A, x), (A, x)
|
return sc.dot(A.T, x) if T else sc.dot(A, x), (A, x)
|
||||||
elif issubclass(template, isc.templates.gemm):
|
elif issubclass(template, sc.templates.gemm):
|
||||||
AT = template is isc.templates.gemm_tn or template is isc.templates.gemm_tt
|
AT = template is sc.templates.gemm_tn or template is sc.templates.gemm_tt
|
||||||
BT = template is isc.templates.gemm_nt or template is isc.templates.gemm_tt
|
BT = template is sc.templates.gemm_nt or template is sc.templates.gemm_tt
|
||||||
M, N, K = sizes
|
M, N, K = sizes
|
||||||
A = isc.empty((K, M) if AT else (M, K), context=context)
|
A = sc.empty((K, M) if AT else (M, K), context=context)
|
||||||
B = isc.empty((N, K) if BT else (K, N), context=context)
|
B = sc.empty((N, K) if BT else (K, N), context=context)
|
||||||
AA = A.T if AT else A
|
AA = A.T if AT else A
|
||||||
BB = B.T if BT else B
|
BB = B.T if BT else B
|
||||||
return isc.dot(AA, BB), (A, B)
|
return sc.dot(AA, BB), (A, B)
|
||||||
|
|
||||||
def memory_footprint(template, sizes):
|
def memory_footprint(template, sizes):
|
||||||
if issubclass(template, isc.templates.axpy):
|
if issubclass(template, sc.templates.axpy):
|
||||||
return 4*3*sizes[0]*1e-9
|
return 4*3*sizes[0]*1e-9
|
||||||
elif issubclass(template, isc.templates.dot):
|
elif issubclass(template, sc.templates.dot):
|
||||||
return 4*2*sizes[0]*1e-9
|
return 4*2*sizes[0]*1e-9
|
||||||
elif issubclass(template, isc.templates.ger):
|
elif issubclass(template, sc.templates.ger):
|
||||||
return 4*3*sizes[0]*sizes[1]*1e-9
|
return 4*3*sizes[0]*sizes[1]*1e-9
|
||||||
elif issubclass(template, isc.templates.gemv):
|
elif issubclass(template, sc.templates.gemv):
|
||||||
return 4*sizes[0]*sizes[1]*1e-9
|
return 4*sizes[0]*sizes[1]*1e-9
|
||||||
elif issubclass(template, isc.templates.gemm):
|
elif issubclass(template, sc.templates.gemm):
|
||||||
return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9
|
return 4*(sizes[0]*sizes[1] + sizes[0]*sizes[2] + sizes[1]*sizes[2])*1e-9
|
||||||
|
|
||||||
def metric_of(template):
|
def metric_of(template):
|
||||||
memory_bound = [isc.templates.axpy, isc.templates.dot, isc.templates.ger, isc.templates.gemv]
|
memory_bound = [sc.templates.axpy, sc.templates.dot, sc.templates.ger, sc.templates.gemv]
|
||||||
compute_bound = [isc.templates.gemm]
|
compute_bound = [sc.templates.gemm]
|
||||||
if any([issubclass(template, x) for x in memory_bound]):
|
if any([issubclass(template, x) for x in memory_bound]):
|
||||||
return lambda sizes, t: memory_footprint(template, sizes)/t
|
return lambda sizes, t: memory_footprint(template, sizes)/t
|
||||||
elif any([issubclass(template, x) for x in compute_bound]):
|
elif any([issubclass(template, x) for x in compute_bound]):
|
||||||
return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t
|
return lambda sizes, t: 2*sizes[0]*sizes[1]*sizes[2]*1e-9/t
|
||||||
|
|
||||||
def genetic_infos_of(template):
|
def genetic_infos_of(template):
|
||||||
if issubclass(template, isc.templates.axpy):
|
if issubclass(template, sc.templates.axpy):
|
||||||
return {'categorical': [3], 'nbits': [3,4,4,2] }
|
return {'categorical': [3], 'nbits': [3,4,4,2] }
|
||||||
elif issubclass(template, isc.templates.dot):
|
elif issubclass(template, sc.templates.dot):
|
||||||
return {'categorical': [3], 'nbits':[3,4,4,2]}
|
return {'categorical': [3], 'nbits':[3,4,4,2]}
|
||||||
elif issubclass(template, isc.templates.ger):
|
elif issubclass(template, sc.templates.ger):
|
||||||
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
|
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
|
||||||
elif issubclass(template, isc.templates.gemv):
|
elif issubclass(template, sc.templates.gemv):
|
||||||
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
|
return {'categorical': [5], 'nbits': [3,3,3,3,4,2]}
|
||||||
elif issubclass(template, isc.templates.gemm):
|
elif issubclass(template, sc.templates.gemm):
|
||||||
return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]}
|
return {'categorical': [8,9], 'nbits': [3,3,3,3,3,2,2,2,2,2,3,3]}
|
||||||
|
|
||||||
|
|
||||||
|
48
tune/tune.py
48
tune/tune.py
@@ -4,7 +4,7 @@ from itertools import chain, product
|
|||||||
from numpy import argsort, argmax
|
from numpy import argsort, argmax
|
||||||
from operator import mul
|
from operator import mul
|
||||||
from sklearn import ensemble
|
from sklearn import ensemble
|
||||||
import isaac as isc
|
import isaac as sc
|
||||||
import optimize, tools, model
|
import optimize, tools, model
|
||||||
|
|
||||||
from json import encoder
|
from json import encoder
|
||||||
@@ -22,40 +22,40 @@ def pow2range(a, b):
|
|||||||
|
|
||||||
def tune(device, operation, json_path):
|
def tune(device, operation, json_path):
|
||||||
#List devices
|
#List devices
|
||||||
platforms = isc.driver.get_platforms()
|
platforms = sc.driver.get_platforms()
|
||||||
context = isc.driver.context(device)
|
context = sc.driver.context(device)
|
||||||
|
|
||||||
#List of size tuples to use
|
#List of size tuples to use
|
||||||
sizes = {}
|
sizes = {}
|
||||||
sizes[isc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
|
sizes[sc.templates.axpy] = [(x,) for x in tools.expspace(1e3, 1e8, 4)]
|
||||||
sizes[isc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
|
sizes[sc.templates.gemv_n] = product(pow2range(4,17), pow2range(4,17))
|
||||||
sizes[isc.templates.gemv_t] = sizes[isc.templates.gemv_n]
|
sizes[sc.templates.gemv_t] = sizes[sc.templates.gemv_n]
|
||||||
sizes[isc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
|
sizes[sc.templates.gemm_nn] = product(pow2range(6, 12), pow2range(6, 12), pow2range(6, 12))
|
||||||
sizes[isc.templates.gemm_tn] = sizes[isc.templates.gemm_nn]
|
sizes[sc.templates.gemm_tn] = sizes[sc.templates.gemm_nn]
|
||||||
sizes[isc.templates.gemm_nt] = sizes[isc.templates.gemm_nn]
|
sizes[sc.templates.gemm_nt] = sizes[sc.templates.gemm_nn]
|
||||||
sizes[isc.templates.gemm_tt] = sizes[isc.templates.gemm_nn]
|
sizes[sc.templates.gemm_tt] = sizes[sc.templates.gemm_nn]
|
||||||
|
|
||||||
|
|
||||||
#Quick tuning - AlexNet sizes + Intuition
|
#Quick tuning - AlexNet sizes + Intuition
|
||||||
sizes[isc.templates.ger] = [(1536,1536)]
|
sizes[sc.templates.ger] = [(1536,1536)]
|
||||||
|
|
||||||
sizes[isc.templates.gemv_n] = [(1000,256),
|
sizes[sc.templates.gemv_n] = [(1000,256),
|
||||||
(4096,256)]
|
(4096,256)]
|
||||||
sizes[isc.templates.gemv_t] = [(169,256),
|
sizes[sc.templates.gemv_t] = [(169,256),
|
||||||
(169,384),
|
(169,384),
|
||||||
(729,256),
|
(729,256),
|
||||||
(3025,96)]
|
(3025,96)]
|
||||||
|
|
||||||
sizes[isc.templates.gemm_nn] = [(3025,96,363),
|
sizes[sc.templates.gemm_nn] = [(3025,96,363),
|
||||||
(729,128,1200),
|
(729,128,1200),
|
||||||
(169,384,2304),
|
(169,384,2304),
|
||||||
(169,192,1728),
|
(169,192,1728),
|
||||||
(169,128,1728)]
|
(169,128,1728)]
|
||||||
sizes[isc.templates.gemm_nt] = [(169,1728,128),
|
sizes[sc.templates.gemm_nt] = [(169,1728,128),
|
||||||
(169,1728,192),
|
(169,1728,192),
|
||||||
(169,2304,384),
|
(169,2304,384),
|
||||||
(729,1200,128)]
|
(729,1200,128)]
|
||||||
sizes[isc.templates.gemm_tn] = [(1728,128,169),
|
sizes[sc.templates.gemm_tn] = [(1728,128,169),
|
||||||
(1728,192,169),
|
(1728,192,169),
|
||||||
(2304,384,169),
|
(2304,384,169),
|
||||||
(1200,128,729),
|
(1200,128,729),
|
||||||
@@ -102,7 +102,7 @@ def tune(device, operation, json_path):
|
|||||||
try:
|
try:
|
||||||
time = tools.benchmark(operation, new, _tree)
|
time = tools.benchmark(operation, new, _tree)
|
||||||
perf = performance(xx, time)
|
perf = performance(xx, time)
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
perf = 0
|
perf = 0
|
||||||
yy.append(0 if isinf(perf) else perf)
|
yy.append(0 if isinf(perf) else perf)
|
||||||
#Update dataset
|
#Update dataset
|
||||||
@@ -111,7 +111,7 @@ def tune(device, operation, json_path):
|
|||||||
for ip, p in enumerate(profiles):
|
for ip, p in enumerate(profiles):
|
||||||
try:
|
try:
|
||||||
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
|
perf = 0 if fastest and ip < nparams and predperf[ip]/fastest < .1 else performance(x,tools.benchmark(operation, p, tree))
|
||||||
except (isc.OperationNotSupported, isc.LaunchOutOfResources, isc.MemObjectAllocationFailure):
|
except (sc.OperationNotSupported, sc.LaunchOutOfResources, sc.MemObjectAllocationFailure):
|
||||||
perf = 0
|
perf = 0
|
||||||
y.append(0 if isinf(perf) else perf)
|
y.append(0 if isinf(perf) else perf)
|
||||||
X.append(x)
|
X.append(x)
|
||||||
@@ -141,7 +141,7 @@ def tune(device, operation, json_path):
|
|||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
platforms = isc.driver.get_platforms()
|
platforms = sc.driver.get_platforms()
|
||||||
devices = [d for platform in platforms for d in platform.get_devices()]
|
devices = [d for platform in platforms for d in platform.get_devices()]
|
||||||
#Command line arguments
|
#Command line arguments
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@@ -156,20 +156,20 @@ def parse_arguments():
|
|||||||
print("----------------")
|
print("----------------")
|
||||||
for (i, d) in enumerate(devices):
|
for (i, d) in enumerate(devices):
|
||||||
selected = '[' + ('x' if device==d else ' ') + ']'
|
selected = '[' + ('x' if device==d else ' ') + ']'
|
||||||
print selected , '-', isc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
|
print selected , '-', sc.driver.device_type_to_string(d.type), '-', d.name, 'on', d.platform.name
|
||||||
print("----------------")
|
print("----------------")
|
||||||
|
|
||||||
|
|
||||||
operation = {'axpy': isc.templates.axpy, 'dot': isc.templates.dot,
|
operation = {'axpy': sc.templates.axpy, 'dot': sc.templates.dot,
|
||||||
'ger': isc.templates.ger, 'gemv_n': isc.templates.gemv_n, 'gemv_t': isc.templates.gemv_t,
|
'ger': sc.templates.ger, 'gemv_n': sc.templates.gemv_n, 'gemv_t': sc.templates.gemv_t,
|
||||||
'gemm_nn': isc.templates.gemm_nn, 'gemm_tn': isc.templates.gemm_tn, 'gemm_nt': isc.templates.gemm_nt, 'gemm_tt':isc.templates.gemm_tt}[args.operation]
|
'gemm_nn': sc.templates.gemm_nn, 'gemm_tn': sc.templates.gemm_tn, 'gemm_nt': sc.templates.gemm_nt, 'gemm_tt':sc.templates.gemm_tt}[args.operation]
|
||||||
json = tools.sanitize(device.name) + '.json' if not args.json else args.json
|
json = tools.sanitize(device.name) + '.json' if not args.json else args.json
|
||||||
|
|
||||||
return (device, operation, json)
|
return (device, operation, json)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
isc.driver.default.queue_properties = isc.driver.PROFILING_ENABLE
|
sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
tune(*args)
|
tune(*args)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user