diff --git a/bench/blas.cpp b/bench/blas.cpp index 2e6c55a39..781095006 100644 --- a/bench/blas.cpp +++ b/bench/blas.cpp @@ -103,7 +103,7 @@ void bench(isc::numeric_type dtype, std::string operation) double total_time = 0;\ while(total_time*1e-9 < 1e-3){\ std::list events;\ - flush = isc::zeros(1e6, 1, dtype);\ + flush = isc::zeros((isaac::int_t)1e6, 1, dtype);\ queue.synchronize();\ OP;\ queue.synchronize();\ @@ -205,7 +205,7 @@ void bench(isc::numeric_type dtype, std::string operation) std::cout << N; isc::array x(N, dtype), y(N, dtype); /* ISAAC */ - std::list events;\ + std::list events; BENCHMARK_ISAAC(y = isc::control(x + alpha*y, isc::execution_options_type(0, &events)), 3*N*dtsize/t) /* clblas */ #ifdef BENCH_CLBLAS diff --git a/python/setup.py b/python/setup.py old mode 100644 new mode 100755 index 584def5e0..3f0745a4a --- a/python/setup.py +++ b/python/setup.py @@ -1,170 +1,170 @@ -#Thanks to Andreas Knoeckler for providing stand-alone boost.python -#through PyOpenCL and PyCUDA - -import os, sys -from distutils.ccompiler import show_compilers,new_compiler -from distutils.command.build_ext import build_ext -from distutils.command.build_py import build_py -from distutils.core import setup, Extension -from distutils.sysconfig import get_python_inc -from distutils import sysconfig -from imp import find_module -from glob import glob -from os.path import dirname - -platform_cflags = {} -platform_ldflags = {} -platform_libs = {} - -class build_ext_subclass(build_ext): - def build_extensions(self): - c = self.compiler.compiler_type - if c in platform_cflags.keys(): - for e in self.extensions: - e.extra_compile_args = platform_cflags[c] - if c in platform_ldflags.keys(): - for e in self.extensions: - e.extra_link_args = platform_ldflags[c] - if c in platform_libs.keys(): - for e in self.extensions: - try: - e.libraries += platform_libs[c] - except: - e.libraries = platform_libs[c] - build_ext.build_extensions(self) - -def main(): - - def recursive_glob(rootdir='.', suffix=''): - return [os.path.join(looproot, filename) - for looproot, _, filenames in os.walk(rootdir) - for filename in filenames if filename.endswith(suffix)] - - def remove_prefixes(optlist, bad_prefixes): - for bad_prefix in bad_prefixes: - for i, flag in enumerate(optlist): - if flag.startswith(bad_prefix): - optlist.pop(i) - break - return optlist - - def find_library(name, cmake_glob_list): - cvars = sysconfig.get_config_vars() - compiler = new_compiler() - dirs = [] - for gpath in cmake_glob_list.split(';'): - path = glob(gpath) - if path: - dirs += [path[0]] - return compiler.find_library_file(cvars['LIBDIR'].split(';') + dirs, name) - - def find_opencl(): - cvars = sysconfig.get_config_vars() - is_on_android = '-mandroid' in cvars['PY_CFLAGS'] - lib = find_library('OpenCL', '' if is_on_android else '/opt/AMDAPPSDK*/lib/x86_64') - return {'include': '', 'lib': dirname(lib)} if lib else None - - def find_in_path(name, path): - "Find a file in a search path" - #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ - for dir in path.split(os.pathsep): - binpath = os.path.join(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - - def find_cuda(): - if 'CUDAHOME' in os.environ: - home = os.environ['CUDAHOME'] - nvcc = os.path.join(home, 'bin', 'nvcc') - else: - nvcc = find_in_path('nvcc', os.environ['PATH']) - - if nvcc: - home = dirname(os.path.dirname(nvcc)) - return {'include': os.path.join(home, 'include'), - 'lib': os.path.join(home, 'lib64')} - else: - return None - - - #Tweaks warning, because boost-numpy and boost-python won't compile cleanly without these changes - cvars = sysconfig.get_config_vars() - cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-Wstrict-prototypes'])) - cvars["CFLAGS"] = cvars["BASECFLAGS"] + ' ' + cvars['OPT'] - cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"] - - #OpenCL - opencl_config = find_opencl() - - #CUDA - cuda_config = find_cuda() - - #Libraries - libraries = ['OpenCL'] - if cuda_config: libraries += ['cuda', 'nvrtc'] - - #Backends: - backend_defines = ['-DISAAC_WITH_OPENCL'] - if cuda_config: backend_defines += ['-DISAAC_WITH_CUDA'] - - #Library directories - library_dirs = [config['lib'] for config in [opencl_config, cuda_config] if config is not None] - - #Include directories - include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] - - #Source files - src = 'src/lib/wrap/clBLAS.cpp src/lib/value_scalar.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/stream.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/array.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/program_cache.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] - boostsrc = 'external/boost/libs/' - for s in ['numpy','python','smart_ptr','system','thread']: - src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] - # make sure next line succeeds even on Windows - src = [f.replace("\\", "/") for f in src] - if sys.platform == "win32": - src += glob(boostsrc + "/thread/src/win32/*.cpp") - src += glob(boostsrc + "/thread/src/tss_null.cpp") - else: - src += glob(boostsrc + "/thread/src/pthread/*.cpp") - src= [f for f in src if not f.endswith("once_atomic.cpp")] - - #Setup - setup( - name='isaac', - version='1.0', - description="Input-specific architecture-aware computations", - author='Philippe Tillet', - author_email='ptillet@g.harvard.edu', - license='MPL 2.0', - packages=["isaac"], - ext_package="isaac", - ext_modules=[Extension( - '_isaac',src, - extra_compile_args= backend_defines + ['-std=c++11', '-Wno-unused-function', '-Wno-unused-local-typedefs', '-Wno-sign-compare'], - extra_link_args=['-Wl,-soname=_isaac.so'], - undef_macros=[], - include_dirs=include, - library_dirs=library_dirs, - libraries=libraries - )], - cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass}, - classifiers=[ - 'Environment :: Console', - 'Development Status :: 1 - Experimental', - 'Intended Audience :: Developers', - 'Intended Audience :: Other Audience', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Natural Language :: English', - 'Programming Language :: C++', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Physics', - 'Topic :: Scientific/Engineering :: Machine Learning', - ] - ) - -if __name__ == "__main__": - main() +#Thanks to Andreas Knoeckler for providing stand-alone boost.python +#through PyOpenCL and PyCUDA + +import os, sys +from distutils.ccompiler import show_compilers,new_compiler +from distutils.command.build_ext import build_ext +from distutils.command.build_py import build_py +from distutils.core import setup, Extension +from distutils.sysconfig import get_python_inc +from distutils import sysconfig +from imp import find_module +from glob import glob +from os.path import dirname + +platform_cflags = {} +platform_ldflags = {} +platform_libs = {} + +class build_ext_subclass(build_ext): + def build_extensions(self): + c = self.compiler.compiler_type + if c in platform_cflags.keys(): + for e in self.extensions: + e.extra_compile_args = platform_cflags[c] + if c in platform_ldflags.keys(): + for e in self.extensions: + e.extra_link_args = platform_ldflags[c] + if c in platform_libs.keys(): + for e in self.extensions: + try: + e.libraries += platform_libs[c] + except: + e.libraries = platform_libs[c] + build_ext.build_extensions(self) + +def main(): + + def recursive_glob(rootdir='.', suffix=''): + return [os.path.join(looproot, filename) + for looproot, _, filenames in os.walk(rootdir) + for filename in filenames if filename.endswith(suffix)] + + def remove_prefixes(optlist, bad_prefixes): + for bad_prefix in bad_prefixes: + for i, flag in enumerate(optlist): + if flag.startswith(bad_prefix): + optlist.pop(i) + break + return optlist + + def find_library(name, cmake_glob_list): + cvars = sysconfig.get_config_vars() + compiler = new_compiler() + dirs = [] + for gpath in cmake_glob_list.split(';'): + path = glob(gpath) + if path: + dirs += [path[0]] + return compiler.find_library_file(cvars['LIBDIR'].split(';') + dirs, name) + + def find_opencl(): + cvars = sysconfig.get_config_vars() + is_on_android = '-mandroid' in cvars['PY_CFLAGS'] + lib = find_library('OpenCL', '' if is_on_android else '/opt/AMDAPPSDK*/lib/x86_64') + return {'include': '', 'lib': dirname(lib)} if lib else None + + def find_in_path(name, path): + "Find a file in a search path" + #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + for dir in path.split(os.pathsep): + binpath = os.path.join(dir, name) + if os.path.exists(binpath): + return os.path.abspath(binpath) + return None + + def find_cuda(): + if 'CUDAHOME' in os.environ: + home = os.environ['CUDAHOME'] + nvcc = os.path.join(home, 'bin', 'nvcc') + else: + nvcc = find_in_path('nvcc', os.environ['PATH']) + + if nvcc: + home = dirname(os.path.dirname(nvcc)) + return {'include': os.path.join(home, 'include'), + 'lib': os.path.join(home, 'lib64')} + else: + return None + + + #Tweaks warning, because boost-numpy and boost-python won't compile cleanly without these changes + cvars = sysconfig.get_config_vars() + cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-Wstrict-prototypes'])) + cvars["CFLAGS"] = cvars["BASECFLAGS"] + ' ' + cvars['OPT'] + cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"] + + #OpenCL + opencl_config = find_opencl() + + #CUDA + cuda_config = find_cuda() + + #Libraries + libraries = ['OpenCL'] + if cuda_config: libraries += ['cuda', 'nvrtc'] + + #Backends: + backend_defines = ['-DISAAC_WITH_OPENCL'] + if cuda_config: backend_defines += ['-DISAAC_WITH_CUDA'] + + #Library directories + library_dirs = [config['lib'] for config in [opencl_config, cuda_config] if config is not None] + + #Include directories + include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] + + #Source files + src = 'src/lib/wrap/clBLAS.cpp src/lib/value_scalar.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/model/predictors/random_forest.cpp src/lib/model/model.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program_cache.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] + boostsrc = 'external/boost/libs/' + for s in ['numpy','python','smart_ptr','system','thread']: + src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] + # make sure next line succeeds even on Windows + src = [f.replace("\\", "/") for f in src] + if sys.platform == "win32": + src += glob(boostsrc + "/thread/src/win32/*.cpp") + src += glob(boostsrc + "/thread/src/tss_null.cpp") + else: + src += glob(boostsrc + "/thread/src/pthread/*.cpp") + src= [f for f in src if not f.endswith("once_atomic.cpp")] + + #Setup + setup( + name='isaac', + version='1.0', + description="Input-specific architecture-aware computations", + author='Philippe Tillet', + author_email='ptillet@g.harvard.edu', + license='MPL 2.0', + packages=["isaac"], + ext_package="isaac", + ext_modules=[Extension( + '_isaac',src, + extra_compile_args= backend_defines + ['-std=c++11', '-Wno-unused-function', '-Wno-unused-local-typedefs', '-Wno-sign-compare'], + extra_link_args=['-Wl,-soname=_isaac.so'], + undef_macros=[], + include_dirs=include, + library_dirs=library_dirs, + libraries=libraries + )], + cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass}, + classifiers=[ + 'Environment :: Console', + 'Development Status :: 1 - Experimental', + 'Intended Audience :: Developers', + 'Intended Audience :: Other Audience', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: C++', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Scientific/Engineering :: Physics', + 'Topic :: Scientific/Engineering :: Machine Learning', + ] + ) + +if __name__ == "__main__": + main() diff --git a/tests/linalg/axpy.cpp b/tests/linalg/axpy.cpp index 0e6761e1b..395d87e10 100644 --- a/tests/linalg/axpy.cpp +++ b/tests/linalg/axpy.cpp @@ -20,8 +20,8 @@ void test_element_wise_vector(T epsilon, simple_vector_base & cx, simple_vect cl_command_queue clqueue = queue.handle().cl(); int_t N = cz.size(); - T aa = -4.3; - T bb=3.5; + T aa = static_cast(-4.3); + T bb = static_cast(3.5); isaac::value_scalar a(aa), b(bb); isaac::scalar da(a, context), db(b, context); @@ -149,11 +149,11 @@ int main() std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; - test_impl(1e-4, *context); + test_impl(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; - test_impl(1e-9, *context); + test_impl(eps_double, *context); } std::cout << "---" << std::endl; } diff --git a/tests/linalg/common.hpp b/tests/linalg/common.hpp index 5b9013d1d..f3142c118 100644 --- a/tests/linalg/common.hpp +++ b/tests/linalg/common.hpp @@ -7,6 +7,9 @@ typedef isaac::int_t int_t; +static const float eps_float = static_cast(1e-4); +static const double eps_double = static_cast(1e-8); + template struct BLAS; template<> struct BLAS { template static FT F(FT SAXPY, DT ) { return SAXPY; } }; template<> struct BLAS { template static DT F(FT , DT DAXPY) { return DAXPY; } }; diff --git a/tests/linalg/dot.cpp b/tests/linalg/dot.cpp index 4a2023dd3..9531bc22e 100644 --- a/tests/linalg/dot.cpp +++ b/tests/linalg/dot.cpp @@ -99,11 +99,11 @@ int main() std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; - test_impl(1e-4, *context); + test_impl(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; - test_impl(1e-9, *context); + test_impl(eps_double, *context); } std::cout << "---" << std::endl; } diff --git a/tests/linalg/gemm.cpp b/tests/linalg/gemm.cpp index 6bbe0de4e..07710aff8 100644 --- a/tests/linalg/gemm.cpp +++ b/tests/linalg/gemm.cpp @@ -136,11 +136,11 @@ int main() std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; - test_impl(1e-4, *context); + test_impl(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; - test_impl(1e-9, *context); + test_impl(eps_double, *context); } std::cout << "---" << std::endl; } diff --git a/tests/linalg/gemv.cpp b/tests/linalg/gemv.cpp index d2696ad25..6173408a4 100644 --- a/tests/linalg/gemv.cpp +++ b/tests/linalg/gemv.cpp @@ -18,7 +18,8 @@ void test_row_wise_reduction(T epsilon, simple_vector_base & cy, simple_matri simple_vector bufy(M); simple_vector bufx(N); - T alpha = 4.2, beta = 5.6; + T alpha = static_cast(4.2); + T beta = static_cast(5.6); isc::driver::CommandQueue queue = isc::driver::backend::queues::get(y.context(),0); @@ -116,11 +117,11 @@ int main() std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; - test_impl(1e-4, *context); + test_impl(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; - test_impl(1e-9, *context); + test_impl(eps_double, *context); } std::cout << "---" << std::endl; } diff --git a/tests/linalg/ger.cpp b/tests/linalg/ger.cpp index 7961f369e..0fdec5238 100644 --- a/tests/linalg/ger.cpp +++ b/tests/linalg/ger.cpp @@ -19,7 +19,8 @@ void test(T epsilon, simple_matrix_base & cA, simple_matrix_base& cB, simp int_t N = cC.size2(); - T aa = 3.12, bb=3.5; + T aa = static_cast(3.12); + T bb = static_cast(3.5); isaac::value_scalar a(aa), b(bb); isaac::scalar da(a, ctx), db(b, ctx); @@ -132,11 +133,11 @@ int main() std::cout << "Device: " << device.name() << " on " << device.platform().name() << " " << device.platform().version() << std::endl; std::cout << "---" << std::endl; std::cout << ">> float" << std::endl; - test_impl(1e-4, *context); + test_impl(eps_float, *context); if(device.fp64_support()) { std::cout << ">> double" << std::endl; - test_impl(1e-9, *context); + test_impl(eps_double, *context); } std::cout << "---" << std::endl; }