diff --git a/lib/backend/templates/vaxpy.cpp b/lib/backend/templates/vaxpy.cpp index 9cf56a5fd..caa690f8f 100644 --- a/lib/backend/templates/vaxpy.cpp +++ b/lib/backend/templates/vaxpy.cpp @@ -34,7 +34,9 @@ std::vector vaxpy::generate_impl(unsigned int label, symbolic_expre std::string data_type = append_width("#scalartype",simd_width); stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << ",1,1)))" << std::endl; - stream << "__kernel void " << "k" << label << (i==0?"f":"o") << "(unsigned int N," << generate_arguments(data_type, mappings, symbolic_expressions) << ")" << std::endl; + char kprefix[10]; + fill_kernel_name(kprefix, label, (i==0?"f":"o")); + stream << "__kernel void " << kprefix << "(unsigned int N," << generate_arguments(data_type, mappings, symbolic_expressions) << ")" << std::endl; stream << "{" << std::endl; stream.inc_tab(); @@ -119,9 +121,7 @@ void vaxpy::enqueue(cl::CommandQueue & queue, bool misaligned = has_misaligned_offset(symbolic_expressions); bool fallback = p_.simd_width > 1 && (strided || (size%p_.simd_width>0) || misaligned); cl::Program const & program = programs[fallback?0:1].program(); - if(cl::kernels.find(program)==cl::kernels.end()) - cl::kernels.insert(std::make_pair(program, cl::Kernel(program, fallback?kfb:kopt))).first->second; - cl::Kernel & kernel = cl::kernels.at(program); + cl::Kernel kernel(program, fallback?kfb:kopt); //NDRange cl::NDRange grange(p_.local_size_0*p_.num_groups); cl::NDRange lrange(p_.local_size_0); diff --git a/lib/model/model.cpp b/lib/model/model.cpp index 85e4f12bf..97537c437 100644 --- a/lib/model/model.cpp +++ b/lib/model/model.cpp @@ -141,7 +141,7 @@ namespace detail { static expression_type get_expression_type(std::string const & name) { - if(name=="vector-axpy") return VECTOR_AXPY_TYPE; + if(name=="vaxpy") return VECTOR_AXPY_TYPE; if(name=="reduction") return REDUCTION_TYPE; if(name=="matrix-axpy") return MATRIX_AXPY_TYPE; if(name=="row-wise-reductionN") return ROW_WISE_REDUCTION_TYPE; @@ -163,7 +163,7 @@ namespace detail static tools::shared_ptr create(std::string const & template_name, std::vector const & a) { fetching_policy_type fetch[] = {FETCH_FROM_LOCAL, FETCH_FROM_GLOBAL_STRIDED, FETCH_FROM_GLOBAL_CONTIGUOUS}; - if(template_name=="vector-axpy") + if(template_name=="vaxpy") return tools::shared_ptr(new vaxpy( vaxpy_parameters(a[0], a[1], a[2], fetch[a[3]]))); else if(template_name=="reduction") return tools::shared_ptr(new reduction( reduction_parameters(a[0], a[1], a[2], fetch[a[3]]))); @@ -187,6 +187,7 @@ namespace detail model_map_t import(std::string const & fname, cl::CommandQueue & queue) { + namespace js = rapidjson; model_map_t result; //Parse the JSON document @@ -199,7 +200,7 @@ model_map_t import(std::string const & fname, cl::CommandQueue & queue) str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); document.Parse<0>(str.c_str()); //Deserialize - std::vector operations = tools::make_vector() << "vector-axpy" << "reduction" + std::vector operations = tools::make_vector() << "vaxpy" << "reduction" << "matrix-axpy" << "row-wise-reductionN" << "row-wise-reductionT" << "matrix-productNN" << "matrix-productTN" << "matrix-productNT" << "matrix-productTT"; std::vector dtype = tools::make_vector() << "float32" << "float64"; @@ -221,9 +222,13 @@ model_map_t import(std::string const & fname, cl::CommandQueue & queue) js::Value const & profiles = document[opcstr][dtcstr]["profiles"]; for (js::SizeType id = 0 ; id < profiles.Size() ; ++id) templates.push_back(detail::create(*op, tools::to_int_array(profiles[id]))); - // Get predictor - predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]); - result[std::make_pair(etype, dtype)] = tools::shared_ptr(new model(predictor, templates, queue)); + if(templates.size()>1){ + // Get predictor + predictors::random_forest predictor(document[opcstr][dtcstr]["predictor"]); + result[std::make_pair(etype, dtype)] = tools::shared_ptr(new model(predictor, templates, queue)); + }else{ + result[std::make_pair(etype, dtype)] = tools::shared_ptr(new model(templates, queue)); + } } } } @@ -238,7 +243,7 @@ model_map_t init_models(cl::CommandQueue & queue) typedef tools::shared_ptr ptr_t; numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE}; - for(size_t i = 0 ; i < 10 ; ++i){ + for(size_t i = 0 ; i < 1 ; ++i){ numeric_type DTYPE = types[i]; res[std::make_pair(SCALAR_AXPY_TYPE, DTYPE)] = ptr_t(new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue)); res[std::make_pair(VECTOR_AXPY_TYPE, DTYPE)] = ptr_t (new model(vaxpy(1,64,128,FETCH_FROM_GLOBAL_STRIDED), queue)); @@ -251,10 +256,11 @@ model_map_t init_models(cl::CommandQueue & queue) res[std::make_pair(MATRIX_PRODUCT_NT_TYPE, DTYPE)] = ptr_t(new model(mproduct_nt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue)); res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new model(mproduct_tt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue)); } + if(const char * cmodel_file = std::getenv("ATIDLAS_MODEL_DEVICE_0")) + return import(std::string(cmodel_file), queue); return res; - // if(const char * cmodel_file = std::getenv("ATIDLAS_MODEL_DEVICE_0")) - // return import(std::string(cmodel_file)); + // else // throw "Please specify a model file"; }