2014-08-30 18:02:17 -04:00
|
|
|
#ifndef ATIDLAS_TEMPLATES_MATRIX_AXPY_HPP
|
|
|
|
#define ATIDLAS_TEMPLATES_MATRIX_AXPY_HPP
|
|
|
|
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
2014-11-08 18:41:03 -05:00
|
|
|
#include "atidlas/backend/templates/template_base.hpp"
|
2014-11-09 16:29:55 -05:00
|
|
|
#include "atidlas/scheduler/forwards.h"
|
2014-08-30 18:02:17 -04:00
|
|
|
|
|
|
|
namespace atidlas
|
|
|
|
{
|
|
|
|
|
2014-10-27 01:42:11 -04:00
|
|
|
class matrix_axpy_parameters : public template_base::parameters_type
|
2014-08-30 18:02:17 -04:00
|
|
|
{
|
|
|
|
public:
|
2014-10-27 01:42:11 -04:00
|
|
|
matrix_axpy_parameters(unsigned int _simd_width,
|
2014-08-30 18:02:17 -04:00
|
|
|
unsigned int _local_size_0, unsigned int _local_size_1,
|
|
|
|
unsigned int _num_groups_0, unsigned int _num_groups_1,
|
|
|
|
fetching_policy_type _fetching_policy) : template_base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
|
|
|
|
|
|
|
|
unsigned int num_groups_0;
|
|
|
|
unsigned int num_groups_1;
|
|
|
|
fetching_policy_type fetching_policy;
|
|
|
|
};
|
|
|
|
|
2014-10-27 01:42:11 -04:00
|
|
|
class matrix_axpy_template : public template_base_impl<matrix_axpy_template, matrix_axpy_parameters>
|
2014-08-30 18:02:17 -04:00
|
|
|
{
|
|
|
|
private:
|
2014-11-09 16:29:55 -05:00
|
|
|
int check_invalid_impl(cl::Device const &, statements_container const &) const
|
2014-08-30 18:02:17 -04:00
|
|
|
{
|
|
|
|
if (p_.simd_width>1)
|
|
|
|
return TEMPLATE_INVALID_SIMD_WIDTH;
|
2014-09-06 00:39:38 -04:00
|
|
|
if(p_.fetching_policy==FETCH_FROM_LOCAL)
|
|
|
|
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
2014-08-30 18:02:17 -04:00
|
|
|
return TEMPLATE_VALID;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings, unsigned int simd_width) const
|
|
|
|
{
|
2014-09-01 19:37:05 -04:00
|
|
|
tools::kernel_generation_stream stream;
|
2014-08-30 18:02:17 -04:00
|
|
|
|
|
|
|
std::string init0, upper_bound0, inc0, init1, upper_bound1, inc1;
|
|
|
|
|
|
|
|
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl;
|
|
|
|
stream << "__kernel void " << kernel_prefix << "(unsigned int M, unsigned int N, " << generate_arguments("#scalartype", mappings, statements) << ")" << std::endl;
|
|
|
|
stream << "{" << std::endl;
|
|
|
|
stream.inc_tab();
|
|
|
|
|
2014-09-01 19:37:05 -04:00
|
|
|
tools::process(stream, PARENT_NODE_TYPE, tools::create_process_accessors("scalar", "#scalartype #namereg = *#pointer;")
|
2014-08-30 18:02:17 -04:00
|
|
|
("matrix", "#pointer += $OFFSET{#start1, #start2};")
|
|
|
|
("vector", "#pointer += #start;"), statements, mappings);
|
|
|
|
|
|
|
|
fetching_loop_info(p_.fetching_policy, "M", stream, init0, upper_bound0, inc0, "get_global_id(0)", "get_global_size(0)");
|
|
|
|
stream << "for(unsigned int i = " << init0 << "; i < " << upper_bound0 << "; i += " << inc0 << ")" << std::endl;
|
|
|
|
stream << "{" << std::endl;
|
|
|
|
stream.inc_tab();
|
|
|
|
fetching_loop_info(p_.fetching_policy, "N", stream, init1, upper_bound1, inc1, "get_global_id(1)", "get_global_size(1)");
|
|
|
|
stream << "for(unsigned int j = " << init1 << "; j < " << upper_bound1 << "; j += " << inc1 << ")" << std::endl;
|
|
|
|
stream << "{" << std::endl;
|
|
|
|
stream.inc_tab();
|
|
|
|
|
2014-09-01 19:37:05 -04:00
|
|
|
tools::process(stream, PARENT_NODE_TYPE, tools::create_process_accessors("matrix", tools::append_width("#scalartype",simd_width) + " #namereg = #pointer[$OFFSET{i*#stride1,j*#stride2}];")
|
2014-08-30 18:02:17 -04:00
|
|
|
("vector_diag", "#scalartype #namereg = ((i + ((#diag_offset<0)?#diag_offset:0))!=(j-((#diag_offset>0)?#diag_offset:0)))?0:#pointer[min(i*#stride, j*#stride)];")
|
|
|
|
, statements, mappings);
|
|
|
|
|
2014-09-01 19:37:05 -04:00
|
|
|
tools::evaluate(stream, PARENT_NODE_TYPE, tools::create_evaluate_accessors("matrix", "#namereg")
|
2014-08-30 18:02:17 -04:00
|
|
|
("vector_diag", "#namereg")
|
|
|
|
("scalar", "#namereg")
|
|
|
|
, statements, mappings);
|
|
|
|
|
2014-09-01 19:37:05 -04:00
|
|
|
tools::process(stream, LHS_NODE_TYPE, tools::create_process_accessors("matrix", "#pointer[$OFFSET{i*#stride1,j*#stride2}] = #namereg;")
|
2014-08-30 18:02:17 -04:00
|
|
|
, statements, mappings);
|
|
|
|
|
|
|
|
stream.dec_tab();
|
|
|
|
stream << "}" << std::endl;
|
|
|
|
stream.dec_tab();
|
|
|
|
stream << "}" << std::endl;
|
|
|
|
|
|
|
|
stream.dec_tab();
|
|
|
|
stream << "}" << std::endl;
|
|
|
|
|
|
|
|
return stream.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> generate_impl(std::string const & kernel_prefix, statements_container const & statements, std::vector<mapping_type> const & mappings) const
|
|
|
|
{
|
|
|
|
std::vector<std::string> res;
|
|
|
|
res.push_back(generate_impl(kernel_prefix, statements, mappings, 1));
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
2014-10-27 01:42:11 -04:00
|
|
|
matrix_axpy_template(parameters_type const & parameters, binding_policy_t binding_policy = BIND_ALL_UNIQUE) : template_base_impl<matrix_axpy_template, matrix_axpy_parameters>(parameters, binding_policy), up_to_internal_size_(false){ }
|
2014-08-30 18:02:17 -04:00
|
|
|
|
|
|
|
void up_to_internal_size(bool v)
|
2014-10-27 01:42:11 -04:00
|
|
|
{ up_to_internal_size_ = v; }
|
|
|
|
|
|
|
|
std::vector<atidlas_int_t> input_sizes(statements_container const & statements)
|
2014-08-30 18:02:17 -04:00
|
|
|
{
|
2014-11-09 16:29:55 -05:00
|
|
|
scheduler::statement const & statement = statements.data().front();
|
2014-10-27 01:42:11 -04:00
|
|
|
std::pair<atidlas_int_t, atidlas_int_t> size = matrix_size(lhs_most(statement.array(), statement.root()), up_to_internal_size_);
|
|
|
|
return tools::make_vector<atidlas_int_t>() << size.first << size.second;
|
2014-08-30 18:02:17 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void enqueue(std::string const & kernel_prefix, std::vector<lazy_program_compiler> & programs, statements_container const & statements)
|
|
|
|
{
|
2014-11-09 16:29:55 -05:00
|
|
|
cl::Kernel & kernel = programs[0].program().get_kernel(kernel_prefix);
|
2014-08-30 18:02:17 -04:00
|
|
|
|
|
|
|
kernel.local_work_size(0, p_.local_size_0);
|
|
|
|
kernel.local_work_size(1, p_.local_size_1);
|
|
|
|
kernel.global_work_size(0,p_.local_size_0*p_.num_groups_0);
|
|
|
|
kernel.global_work_size(1,p_.local_size_1*p_.num_groups_1);
|
|
|
|
|
|
|
|
unsigned int current_arg = 0;
|
2014-10-27 01:42:11 -04:00
|
|
|
std::vector<atidlas_int_t> MN = input_sizes(statements);
|
|
|
|
kernel.arg(current_arg++, cl_uint(MN[0]));
|
|
|
|
kernel.arg(current_arg++, cl_uint(MN[1]));
|
2014-08-30 18:02:17 -04:00
|
|
|
set_arguments(statements, kernel, current_arg);
|
|
|
|
|
2014-11-09 16:29:55 -05:00
|
|
|
// viennacl::ocl::enqueue(kernel);
|
2014-08-30 18:02:17 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
bool up_to_internal_size_;
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|