diff --git a/CMakeLists.txt b/CMakeLists.txt index 68a2e7234..c5fa29710 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,9 +7,6 @@ add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} ) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) -if(NOT CUDA_TOOLKIT_ROOT_DIR) - set(CUDA_TOOLKIT_ROOT_DIR ".") -endif() find_package(CUDA QUIET) find_package(OpenCL QUIET REQUIRED) if(CUDA_FOUND) diff --git a/cmake/helpers/CodeToH.cmake b/cmake/helpers/CodeToH.cmake index 183f09d85..0a1a7dccb 100644 --- a/cmake/helpers/CodeToH.cmake +++ b/cmake/helpers/CodeToH.cmake @@ -1,3 +1,6 @@ +#Copyright (c) 2014, ArrayFire +#All rights reserved. + # Function to turn an OpenCL source file into a C string within a source file. # xxd uses its input's filename to name the string and its length, so we # need to move them to a name that depends only on the path output, not its @@ -28,36 +31,31 @@ include(CMakeParseArguments) set(BIN2CPP_PROGRAM "bin2cpp") function(CODE_TO_H) - cmake_parse_arguments(RTCS "" "VARNAME;EXTENSION;OUTPUT_DIR;TARGETS;NAMESPACE;EOF" "SOURCES" ${ARGN}) + cmake_parse_arguments(ARGS "" "VARNAME;EXTENSION;OUTPUT_DIR;TARGET;NAMESPACE;EOF" "SOURCES" ${ARGN}) set(_output_files "") - foreach(_input_file ${RTCS_SOURCES}) + foreach(_input_file ${ARGS_SOURCES}) get_filename_component(_path "${_input_file}" PATH) get_filename_component(_name "${_input_file}" NAME) - get_filename_component(var_name "${_input_file}" NAME) get_filename_component(_name_we "${_input_file}" NAME_WE) + set(var_name ${_name_we}) - set(_namespace "${RTCS_NAMESPACE}") + set(_namespace "${ARGS_NAMESPACE}") string(REPLACE "." "_" var_name ${var_name}) - set(_output_path "${CMAKE_CURRENT_BINARY_DIR}/${RTCS_OUTPUT_DIR}") - set(_output_file "${_output_path}/${_name_we}.${RTCS_EXTENSION}") + set(_output_path "${ARGS_OUTPUT_DIR}") + set(_output_file "${_output_path}/${_name_we}.${ARGS_EXTENSION}") - ADD_CUSTOM_COMMAND( + add_custom_command( OUTPUT ${_output_file} DEPENDS ${_input_file} ${BIN2CPP_PROGRAM} COMMAND ${CMAKE_COMMAND} -E make_directory "${_output_path}" COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\<${_path}/${_name_we}.hpp\\>" >>"${_output_file}" - COMMAND ${BIN2CPP_PROGRAM} --file ${_name} --namespace ${_namespace} --output ${_output_file} --name ${var_name} --eof ${RTCS_EOF} + COMMAND ${BIN2CPP_PROGRAM} --file ${_name} --namespace ${_namespace} --output ${_output_file} --name ${var_name} --eof ${ARGS_EOF} WORKING_DIRECTORY "${_path}" COMMENT "Compiling ${_input_file} to C++ source" ) - - list(APPEND _output_files ${_output_file}) endforeach() - ADD_CUSTOM_TARGET(${RTCS_NAMESPACE}_bin_target DEPENDS ${_output_files}) - - set("${RTCS_VARNAME}" ${_output_files} PARENT_SCOPE) - set("${RTCS_TARGETS}" ${RTCS_NAMESPACE}_bin_target PARENT_SCOPE) -endfunction(CL_KERNEL_TO_H) + add_custom_target(${ARGS_TARGET} ALL DEPENDS ${_output_files}) +endfunction() diff --git a/cmake/helpers/bin2cpp.cpp b/cmake/helpers/bin2cpp.cpp index 796e29ff1..f2849b3e2 100644 --- a/cmake/helpers/bin2cpp.cpp +++ b/cmake/helpers/bin2cpp.cpp @@ -1,3 +1,5 @@ +// Copyright (c) 2014, ArrayFire +// All rights reserved. // Umar Arshad // Copyright 2014 diff --git a/include/isaac/backend/keywords.h b/include/isaac/backend/keywords.h index e744b2b39..7dc8fcb7f 100644 --- a/include/isaac/backend/keywords.h +++ b/include/isaac/backend/keywords.h @@ -62,10 +62,11 @@ ADD_KEYWORD(GroupSize0, "get_num_groups(0)", "GridDim.x") ADD_KEYWORD(GroupSize1, "get_num_groups(1)", "GridDim.y") ADD_KEYWORD(GroupSize2, "get_num_groups(2)", "GridDim.z") - ADD_KEYWORD(LocalBarrier, "barrier(CLK_LOCAL_MEM_FENCE)", "__syncthreads()") struct CastPrefix: public keyword{ CastPrefix(driver::backend_type backend, std::string const & datatype): keyword(backend, "convert_" + datatype, "make_" + datatype){} }; struct InitPrefix: public keyword{ InitPrefix(driver::backend_type backend, std::string const & datatype): keyword(backend, "", "make_" + datatype){} }; + +struct Infinity: public keyword{ Infinity(driver::backend_type backend, std::string const & datatype): keyword(backend, "INFINITY", "infinity<" + datatype + ">()"){} }; #undef ADD_KEYWORD diff --git a/include/isaac/backend/templates/base.h b/include/isaac/backend/templates/base.h index 03dcb32e2..94d311867 100644 --- a/include/isaac/backend/templates/base.h +++ b/include/isaac/backend/templates/base.h @@ -151,7 +151,7 @@ protected: static void compute_index_reduction(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op); static void process_all(std::string const & type_key, std::string const & str, kernel_generation_stream & stream, std::vector const & mappings); static void process_all_at(std::string const & type_key, std::string const & str, kernel_generation_stream & stream, std::vector const & mappings, size_t root_idx, leaf_t leaf); - static std::string neutral_element(op_element const & op); + static std::string neutral_element(op_element const & op, driver::backend_type backend, std::string const & datatype); static std::string generate_arguments(std::vector const & mappings, std::map const & accessors, expressions_tuple const & expressions); static std::string generate_arguments(std::string const & data_type, driver::Device const & device, std::vector const & mappings, expressions_tuple const & expressions); static bool is_node_trans(array_expression::container_type const & array, size_t root_idx, leaf_t leaf_type); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 8b07c9392..6761a548a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -11,10 +11,11 @@ if(CUDA_FOUND) cuda_find_library_local_first(CUDA_NVRTC_LIBRARY nvrtc "\"nvrtc\" library") target_link_libraries(isaac ${CUDA_CUDA_LIBRARY} ${CUDA_NVRTC_LIBRARY}) #Cuda JIT headers to file - file( GLOB_RECURSE CUDA_HELPERS_SRC driver/helpers/*.u) - include("${CMAKE_MODULE_PATH}/CodeToH.cmake") - CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "h" - OUTPUT_DIR driver/helpers TARGETS cl_kernel_targets EOF "0") + set(CUDA_HELPERS_PATH ${CMAKE_CURRENT_SOURCE_DIR}/driver/helpers/cuda/) + file(GLOB_RECURSE CUDA_HELPERS_SRC ${CUDA_HELPERS_PATH}/*.cu) + include("${CMAKE_MODULE_PATH}/helpers/CodeToH.cmake") + CODE_TO_H(SOURCES ${CUDA_HELPERS_SRC} VARNAME kernel_files EXTENSION "hpp" + OUTPUT_DIR ${CUDA_HELPERS_PATH} NAMESPACE "isaac helpers cuda" TARGET cuda_headers EOF "0") endif() diff --git a/lib/array.cpp b/lib/array.cpp index e4b449b47..df96d2e94 100644 --- a/lib/array.cpp +++ b/lib/array.cpp @@ -295,7 +295,7 @@ void scalar::inject(values_holder & v) const int_t dtsize = size_of(dtype_); #define HANDLE_CASE(DTYPE, VAL) \ case DTYPE:\ - driver::queues[context_][0].read(data_, CL_TRUE, start_[0]*dtsize, dtsize, (void*)&v.VAL);\ + driver::queues[context_][0].read(data_, CL_TRUE, start_[0]*dtsize, dtsize, (void*)&v.VAL); break;\ switch(dtype_) { diff --git a/lib/backend/templates/base.cpp b/lib/backend/templates/base.cpp index f69e97fd7..27ab36c7d 100644 --- a/lib/backend/templates/base.cpp +++ b/lib/backend/templates/base.cpp @@ -240,21 +240,24 @@ void base::base::process_all_at(std::string const & type_key, std::string const } } -std::string base::neutral_element(op_element const & op) +std::string base::neutral_element(op_element const & op, driver::backend_type backend, std::string const & dtype) { + std::string INF = Infinity(backend, dtype).get(); + std::string N_INF = "-" + INF; + switch (op.type) { case OPERATOR_ADD_TYPE : return "0"; case OPERATOR_MULT_TYPE : return "1"; case OPERATOR_DIV_TYPE : return "1"; - case OPERATOR_ELEMENT_FMAX_TYPE : return "-INFINITY"; - case OPERATOR_ELEMENT_ARGFMAX_TYPE : return "-INFINITY"; - case OPERATOR_ELEMENT_MAX_TYPE : return "-INFINITY"; - case OPERATOR_ELEMENT_ARGMAX_TYPE : return "-INFINITY"; - case OPERATOR_ELEMENT_FMIN_TYPE : return "INFINITY"; - case OPERATOR_ELEMENT_ARGFMIN_TYPE : return "INFINITY"; - case OPERATOR_ELEMENT_MIN_TYPE : return "INFINITY"; - case OPERATOR_ELEMENT_ARGMIN_TYPE : return "INFINITY"; + case OPERATOR_ELEMENT_FMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_ARGFMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_MAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_ARGMAX_TYPE : return N_INF; + case OPERATOR_ELEMENT_FMIN_TYPE : return INF; + case OPERATOR_ELEMENT_ARGFMIN_TYPE : return INF; + case OPERATOR_ELEMENT_MIN_TYPE : return INF; + case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF; default: throw operation_not_supported_exception("Unsupported reduction operator : no neutral element known"); } @@ -292,7 +295,7 @@ void base::set_arguments(expressions_tuple const & expressions, driver::Kernel & base::invalid_exception::invalid_exception() : message_() {} base::invalid_exception::invalid_exception(std::string message) : - message_("ViennaCL: Internal error: The generator cannot apply the given template to the given array_expression: " + message + "\n" + message_("ISAAC: Internal error: The generator cannot apply the given template to the given array_expression: " + message + "\n" "If you are using a builtin template, please report on viennacl-support@lists.sourceforge.net! We will provide a fix as soon as possible\n" "If you are using your own template, please try using other parameters") {} diff --git a/lib/backend/templates/maxpy.cpp b/lib/backend/templates/maxpy.cpp index 2b2c680b4..3bf6e4dff 100644 --- a/lib/backend/templates/maxpy.cpp +++ b/lib/backend/templates/maxpy.cpp @@ -30,9 +30,17 @@ std::string maxpy::generate_impl(const char * suffix, expressions_tuple const & std::string _size_t = size_type(device); std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1; std::string data_type = append_width("#scalartype",p_.simd_width); + driver::backend_type backend = device.backend(); - stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; - stream << "__kernel void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; + switch(backend) + { +#ifdef ISAAC_WITH_CUDA + case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break; +#endif + case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; + } + + stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -40,11 +48,11 @@ std::string maxpy::generate_impl(const char * suffix, expressions_tuple const & ("array1", "#pointer += #start;") ("array2", "#pointer = &$VALUE{#start1, #start2};"), expressions, mappings); - fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)", device); + fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device); stream << "for(" << _size_t << " i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); - fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, "get_global_id(1)", "get_global_size(1)", device); + fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, GlobalIdx1(backend).get(), GlobalSize1(backend).get(), device); stream << "for(" << _size_t << " j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -62,7 +70,8 @@ std::string maxpy::generate_impl(const char * suffix, expressions_tuple const & ("repeat", "#namereg") ("array0", "#namereg") ("outer", "#namereg") - ("cast", "convert_"+data_type) + ("cast", CastPrefix(backend, data_type).get()) + ("host_scalar", p_.simd_width==1?"#name": InitPrefix(backend, data_type).get() + "(#name)") , expressions, mappings); process(stream, LHS_NODE_TYPE, tools::make_map >("array2", "$VALUE{i*#stride1,j*#stride2} = #namereg;") @@ -76,7 +85,6 @@ std::string maxpy::generate_impl(const char * suffix, expressions_tuple const & stream.dec_tab(); stream << "}" << std::endl; -// std::cout << stream.str() << std::endl; return stream.str(); } diff --git a/lib/backend/templates/mproduct.cpp b/lib/backend/templates/mproduct.cpp index 0d7c53f3a..17aa37c11 100644 --- a/lib/backend/templates/mproduct.cpp +++ b/lib/backend/templates/mproduct.cpp @@ -129,8 +129,14 @@ mproduct_parameters::mproduct_parameters(unsigned int simd_width strcat(gemm_name, suffix); strcat(reduce_name, suffix); - if(backend==driver::OPENCL) - stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; + switch(backend) + { + #ifdef ISAAC_WITH_CUDA + case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break; + #endif + case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; + } + stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, " << Global(backend) << " " << sdtype << "* C, " << _size_t << " Cld," << _size_t << " Coff," << _size_t << " Cstride1, " << sdtype << " alpha," diff --git a/lib/backend/templates/mreduction.cpp b/lib/backend/templates/mreduction.cpp index 1e5e73f5d..fa4e87e2a 100644 --- a/lib/backend/templates/mreduction.cpp +++ b/lib/backend/templates/mreduction.cpp @@ -65,8 +65,13 @@ std::string mreduction::generate_impl(const char * suffix, expressions_tuple con arguments += e->process(Global(backend).get() + " " + to_string(numeric_type) + "* #name_temp, "); } - if(backend==driver::OPENCL) - stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; + switch(backend) + { +#ifdef ISAAC_WITH_CUDA + case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break; +#endif + case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; + } stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; @@ -99,7 +104,7 @@ std::string mreduction::generate_impl(const char * suffix, expressions_tuple con stream.inc_tab(); for (const auto & e : reductions) - stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op()) + ";") << std::endl; + stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl; stream << "if (r < M)" << std::endl; stream << "{" << std::endl; @@ -240,7 +245,7 @@ std::string mreduction::generate_impl(const char * suffix, expressions_tuple con stream.inc_tab(); for (const auto & e : reductions) - stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op()) + ";") << std::endl; + stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl; stream << "if (r < M)" << std::endl; stream << "{" << std::endl; diff --git a/lib/backend/templates/reduction.cpp b/lib/backend/templates/reduction.cpp index 86e84e9e5..2d5182070 100644 --- a/lib/backend/templates/reduction.cpp +++ b/lib/backend/templates/reduction.cpp @@ -72,8 +72,7 @@ std::string reduction::generate_impl(const char * suffix, expressions_tuple cons std::string arguments = _size_t + " N, "; for (unsigned int k = 0; k < N; ++k) { - std::string numeric_type = numeric_type_to_string(lhs_most(exprs[k]->array_expression().tree(), - exprs[k]->array_expression().root()).lhs.dtype); + std::string numeric_type = numeric_type_to_string(lhs_most(exprs[k]->array_expression().tree(), exprs[k]->array_expression().root()).lhs.dtype); if (exprs[k]->is_index_reduction()) { arguments += exprs[k]->process(Global(backend).get() + " unsigned int* #name_temp, "); @@ -90,8 +89,14 @@ std::string reduction::generate_impl(const char * suffix, expressions_tuple cons /* ------------------------ * First Kernel * -----------------------*/ - if(backend==driver::OPENCL) - stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; + switch(backend) + { +#ifdef ISAAC_WITH_CUDA + case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break; +#endif + case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break; + } + stream << KernelPrefix(backend) << " void " << name[0] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -110,19 +115,19 @@ std::string reduction::generate_impl(const char * suffix, expressions_tuple cons if (exprs[k]->is_index_reduction()) { stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl; - stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op()) + ";") << std::endl; + stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl; stream << exprs[k]->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];") << std::endl; stream << exprs[k]->process("unsigned int #name_acc = 0;") << std::endl; } else { stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf[" + tools::to_string(p_.local_size_0) + "];") << std::endl; - stream << exprs[k]->process("#scalartype #name_acc = " + neutral_element(exprs[k]->root_op()) + ";") << std::endl; + stream << exprs[k]->process("#scalartype #name_acc = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl; } } - element_wise_loop_1D(stream, p_.fetching_policy, p_.simd_width, "i", "N", "get_global_id(0)", "get_global_size(0)", device, [&](unsigned int simd_width) + element_wise_loop_1D(stream, p_.fetching_policy, p_.simd_width, "i", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int simd_width) { std::string i = (simd_width==1)?"i*#stride":"i"; //Fetch vector entry @@ -190,8 +195,9 @@ std::string reduction::generate_impl(const char * suffix, expressions_tuple cons /* ------------------------ * Second kernel * -----------------------*/ - if(backend==driver::OPENCL) - stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; + + + stream << KernelPrefix(backend) << " void " << name[1] << "(" << arguments << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -206,12 +212,12 @@ std::string reduction::generate_impl(const char * suffix, expressions_tuple cons stream << e->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];"); stream << e->process("unsigned int #name_acc = 0;") << std::endl; stream << e->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl; - stream << e->process("#scalartype #name_acc_value = " + neutral_element(e->root_op()) + ";"); + stream << e->process("#scalartype #name_acc_value = " + neutral_element(e->root_op(), backend, "#scalartype") + ";"); } else { stream << e->process(Local(backend).get() + " #scalartype #name_buf[" + tools::to_string(p_.local_size_0) + "];") << std::endl; - stream << e->process("#scalartype #name_acc = " + neutral_element(e->root_op()) + ";"); + stream << e->process("#scalartype #name_acc = " + neutral_element(e->root_op(), backend, "#scalartype") + ";"); } } diff --git a/lib/backend/templates/vaxpy.cpp b/lib/backend/templates/vaxpy.cpp index 27262d323..b4008a3df 100644 --- a/lib/backend/templates/vaxpy.cpp +++ b/lib/backend/templates/vaxpy.cpp @@ -38,7 +38,7 @@ std::string vaxpy::generate_impl(const char * suffix, expressions_tuple const & #ifdef ISAAC_WITH_CUDA case driver::CUDA: stream << "#include \"helper_math.h\"" << std::endl; break; #endif - case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; break; + case driver::OPENCL: stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; } stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl; @@ -87,7 +87,6 @@ std::string vaxpy::generate_impl(const char * suffix, expressions_tuple const & stream.dec_tab(); stream << "}" << std::endl; - return stream.str(); } diff --git a/lib/driver/helpers/cuda_vector_overload.cu b/lib/driver/helpers/cuda/vector.cu similarity index 98% rename from lib/driver/helpers/cuda_vector_overload.cu rename to lib/driver/helpers/cuda/vector.cu index 5db14eb18..0feca88eb 100644 --- a/lib/driver/helpers/cuda_vector_overload.cu +++ b/lib/driver/helpers/cuda/vector.cu @@ -12,7 +12,14 @@ #ifndef HELPER_MATH_H_ #define HELPER_MATH_H_ -typedef unsigned int uint; + +template +inline __device__ DTYPE infinity() { return __int_as_float(0x7f800000); } + +template<> +inline __device__ double infinity() { return __hiloint2double(0x7ff00000, 0x00000000) ; } + +typedef unsigned int uint; typedef unsigned short ushort; template diff --git a/lib/driver/kernel.cpp b/lib/driver/kernel.cpp index 900c291ab..1824e8d10 100644 --- a/lib/driver/kernel.cpp +++ b/lib/driver/kernel.cpp @@ -80,12 +80,12 @@ void Kernel::setSizeArg(unsigned int index, size_t N) #endif case OPENCL: if(address_bits_==32){ - cl_int NN = N; + int32_t NN = N; h_.cl->setArg(index, 4, &NN); } else if(address_bits_==64) { - cl_long NN = N; + int64_t NN = N; h_.cl->setArg(index, 8, &NN); } else diff --git a/lib/driver/program.cpp b/lib/driver/program.cpp index d3611234f..a381b1526 100644 --- a/lib/driver/program.cpp +++ b/lib/driver/program.cpp @@ -5,6 +5,10 @@ #include "isaac/driver/context.h" #include "isaac/tools/sha1.hpp" +#ifdef ISAAC_WITH_CUDA +#include "helpers/cuda/vector.hpp" +#endif + namespace isaac { @@ -34,18 +38,9 @@ Program::Program(Context const & context, std::string const & source) : backend_ nvrtcProgram prog; - std::ifstream ifs("/home/philippe/Development/ISAAC/lib/driver/helpers/cuda_vector_overload.h"); - std::string str; - - ifs.seekg(0, std::ios::end); - str.reserve(ifs.tellg()); - ifs.seekg(0, std::ios::beg); - - str.assign((std::istreambuf_iterator(ifs)), - std::istreambuf_iterator()); - const char * includes[] = {"helper_math.h"}; - const char * src[] = {str.c_str()}; + const char * src[] = {helpers::cuda::vector}; + nvrtc::check(nvrtcCreateProgram(&prog, source.c_str(), NULL, 1, src, includes)); try{ const char * options[] = {"--gpu-architecture=compute_52", "--restrict"}; diff --git a/python/setup.py b/python/setup.py index dfc83d4f6..b51edea43 100644 --- a/python/setup.py +++ b/python/setup.py @@ -53,9 +53,9 @@ def main(): cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"] #Includes - include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] + include =' src/include /usr/local/cuda/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")] #Sources - src = 'src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/backend/templates/mreduction.cpp src/lib/backend/templates/reduction.cpp src/lib/backend/templates/mproduct.cpp src/lib/backend/templates/maxpy.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/vaxpy.cpp src/lib/backend/mapped_object.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] + src = 'src/lib/backend/templates/maxpy.cpp src/lib/backend/templates/mreduction.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/vaxpy.cpp src/lib/backend/templates/mproduct.cpp src/lib/backend/templates/reduction.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/mapped_object.cpp src/lib/backend/binder.cpp src/lib/backend/parse.cpp src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/value_scalar.cpp src/lib/model/predictors/random_forest.cpp src/lib/model/model.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']] boostsrc = 'external/boost/libs/' for s in ['numpy','python','smart_ptr','system','thread']: src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]