Code Quality: More sensible names

This commit is contained in:
Philippe Tillet
2015-12-12 18:32:06 -05:00
parent 46dad59e10
commit 042aa070bb
31 changed files with 379 additions and 379 deletions

View File

@@ -23,15 +23,15 @@ enum expression_type
inline expression_type expression_type_from_string(std::string const & name)
{
if(name=="axpy") return AXPY_TYPE;
if(name=="dot") return DOT_TYPE;
if(name=="ger") return GER_TYPE;
if(name=="gemv_n") return GEMV_N_TYPE;
if(name=="gemv_t") return GEMV_T_TYPE;
if(name=="gemm_nn") return GEMM_NN_TYPE;
if(name=="gemm_nt") return GEMM_NT_TYPE;
if(name=="gemm_tn") return GEMM_TN_TYPE;
if(name=="gemm_tt") return GEMM_TT_TYPE;
if(name=="elementwise_1d") return AXPY_TYPE;
if(name=="reduce_1d") return DOT_TYPE;
if(name=="elementwise_2d") return GER_TYPE;
if(name=="reduce_2d_n") return GEMV_N_TYPE;
if(name=="reduce_2d_t") return GEMV_T_TYPE;
if(name=="matrix_product_nn") return GEMM_NN_TYPE;
if(name=="matrix_product_nt") return GEMM_NT_TYPE;
if(name=="matrix_product_tn") return GEMM_TN_TYPE;
if(name=="matrix_product_tt") return GEMM_TT_TYPE;
throw std::invalid_argument("Unrecognized expression: " + name);
}

View File

@@ -70,7 +70,7 @@ cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logelementwise_2d
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,

View File

@@ -200,7 +200,7 @@ extern "C" {
/**
* CUDA device pointer
* CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
* CUdeviceptr is defined as an unsigned inteelementwise_2d type whose size matches the size of a pointer on the target platform.
*/
#if __CUDA_API_VERSION >= 3020
@@ -337,12 +337,12 @@ typedef enum CUoccupancy_flags_enum {
* Array formats
*/
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */
CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */
CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit inteelementwise_2ds */
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit inteelementwise_2ds */
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit inteelementwise_2ds */
CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */
CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */
} CUarray_format;
@@ -558,8 +558,8 @@ typedef enum CUfunction_attribute_enum {
*/
typedef enum CUfunc_cache_enum {
CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */
CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */
CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */
CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larelementwise_2d shared memory and smaller L1 cache */
CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larelementwise_2d L1 cache and smaller shared memory */
CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */
} CUfunc_cache;
@@ -909,7 +909,7 @@ typedef enum cudaError_enum {
/**
* \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to attempt to enable/disable the profiling via ::cuProfilerStart or
* ::cuProfilerStop without initialization.
*/
@@ -917,14 +917,14 @@ typedef enum cudaError_enum {
/**
* \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to call cuProfilerStart() when profiling is already enabled.
*/
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
/**
* \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to call cuProfilerStop() when profiling is already disabled.
*/
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
@@ -962,7 +962,7 @@ typedef enum cudaError_enum {
* This indicated that the context being supplied as a parameter to the
* API call was already the active context.
* \deprecated
* This error return is deprecated as of CUDA 3.2. It is no longer an
* This error return is deprecated as of CUDA 3.2. It is no lonelementwise_2d an
* error to attempt to push the active context via ::cuCtxPushCurrent().
*/
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
@@ -1163,7 +1163,7 @@ typedef enum cudaError_enum {
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
/**
* A device-side assert triggered during kernel execution. The context
* A device-side assert trigelementwise_2ded during kernel execution. The context
* cannot be used anymore, and must be destroyed. All existing device
* memory allocations from this context are invalid and must be
* reconstructed if the program is to continue using CUDA.
@@ -1499,24 +1499,24 @@ typedef struct CUDA_TEXTURE_DESC_st {
typedef enum CUresourceViewFormat_enum
{
CU_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
@@ -1606,7 +1606,7 @@ typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
#define CU_TRSA_OVERRIDE_FORMAT 0x01
/**
* Read the texture as integers rather than promoting the values to floats
* Read the texture as inteelementwise_2ds rather than promoting the values to floats
* in the range [0,1].
* Flag for ::cuTexRefSetFlags()
*/
@@ -1901,7 +1901,7 @@ CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
/**
* \brief Returns information about the device
*
* Returns in \p *pi the integer value of the attribute \p attrib on device
* Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on device
* \p dev. The supported attributes are:
* - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per
* block;
@@ -2819,7 +2819,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
* violated. This limit can be set smaller than the default or up the maximum
* launch depth of 24. When setting this limit, keep in mind that additional
* levels of sync depth require the driver to reserve large amounts of device
* memory which can no longer be used for user allocations. If these
* memory which can no lonelementwise_2d be used for user allocations. If these
* reservations of device memory fail, ::cuCtxSetLimit will return
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
* This limit is only applicable to devices of compute capability 3.5 and
@@ -2836,7 +2836,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
* the default (2048 launches) are needed for a module using the device
* runtime, this limit can be increased. Keep in mind that being able to
* sustain additional pending launches will require the driver to reserve
* larger amounts of device memory upfront which can no longer be used for
* larelementwise_2d amounts of device memory upfront which can no lonelementwise_2d be used for
* allocations. If these reservations fail, ::cuCtxSetLimit will return
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
* This limit is only applicable to devices of compute capability 3.5 and
@@ -2921,8 +2921,8 @@ CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
*
* \param pconfig - Returned cache configuration
@@ -2971,8 +2971,8 @@ CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
*
* \param config - Requested cache configuration
@@ -3054,7 +3054,7 @@ CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig);
*
* Changing the shared memory bank size will not increase shared memory usage
* or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory,
* Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
* but will change what kinds of accesses to shared memory will result in bank
* conflicts.
*
@@ -7358,7 +7358,7 @@ CUresult CUDAAPI cuPointerGetAttribute(void *data, CUpointer_attribute attribute
* See further documentation in the section titled "API synchronization behavior"
* to learn more about cases when synchronous memory operations can
* exhibit asynchronous behavior.
* \p value will be considered as a pointer to an unsigned integer to which this attribute is to be set.
* \p value will be considered as a pointer to an unsigned inteelementwise_2d to which this attribute is to be set.
*
* \param value - Pointer to memory containing the value to be set
* \param attribute - Pointer attribute to set
@@ -7534,7 +7534,7 @@ CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, unsigned int fla
* See ::cuStreamCreateWithPriority for details about priority clamping.
*
* \param hStream - Handle to the stream to be queried
* \param priority - Pointer to a signed integer in which the stream's priority is returned
* \param priority - Pointer to a signed inteelementwise_2d in which the stream's priority is returned
* \return
* ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED,
@@ -7560,7 +7560,7 @@ CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority);
* and return the flags in \p flags.
*
* \param hStream - Handle to the stream to be queried
* \param flags - Pointer to an unsigned integer in which the stream's flags are returned
* \param flags - Pointer to an unsigned inteelementwise_2d in which the stream's flags are returned
* The value returned in \p flags is a logical 'OR' of all flags that
* were used while creating this stream. See ::cuStreamCreate for the list
* of valid flags
@@ -8104,7 +8104,7 @@ CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUeven
/**
* \brief Returns information about a function
*
* Returns in \p *pi the integer value of the attribute \p attrib on the kernel
* Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on the kernel
* given by \p hfunc. The supported attributes are:
* - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads
* per block, beyond which a launch of the function would fail. This number
@@ -8175,8 +8175,8 @@ CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunc
*
* The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
*
* \param hfunc - Kernel to configure cache for
@@ -8215,7 +8215,7 @@ CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
*
* Changing the shared memory bank size will not increase shared memory usage
* or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory,
* Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
* but will change what kinds of accesses to shared memory will result in bank
* conflicts.
*
@@ -8491,11 +8491,11 @@ CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
/**
* \brief Adds an integer parameter to the function's argument list
* \brief Adds an inteelementwise_2d parameter to the function's argument list
*
* \deprecated
*
* Sets an integer parameter that will be specified the next time the
* Sets an inteelementwise_2d parameter that will be specified the next time the
* kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset.
*
* \param hfunc - Kernel to add parameter to
@@ -9299,8 +9299,8 @@ CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAnis
* returned through the texture reference \p hTexRef. The valid flags are:
*
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
* having the texture promote integer data to floating point data in the
* range [0, 1]. Note that texture with 32-bit integer format
* having the texture promote inteelementwise_2d data to floating point data in the
* range [0, 1]. Note that texture with 32-bit inteelementwise_2d format
* would not be promoted, regardless of whether or not this
* flag is specified;
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the
@@ -9859,8 +9859,8 @@ CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
* This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR.
*
* - ::CUDA_TEXTURE_DESC::flags can be any combination of the following:
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote integer data to floating point data in the
* range [0, 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified.
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote inteelementwise_2d data to floating point data in the
* range [0, 1]. Note that texture with 32-bit inteelementwise_2d format would not be promoted, regardless of whether or not this flag is specified.
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is
* the width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note
* that for CUDA mipmapped arrays, this flag has to be set.

View File

@@ -89,46 +89,46 @@ protected:
*
* Maps prod(matrix_expression, matrix_expression)
*/
class mapped_gemm : public mapped_object, public binary_leaf
class mapped_matrix_product : public mapped_object, public binary_leaf
{
public:
mapped_gemm(std::string const & scalartype, unsigned int id, node_info info);
mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info);
};
/** @brief Reduction
*
* Base class for mapping a dot
* Base class for mapping a reduce_1d
*/
class mapped_dot : public mapped_object, public binary_leaf
class mapped_reduce : public mapped_object, public binary_leaf
{
public:
mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key);
mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key);
size_t root_idx() const;
isaac::math_expression const & math_expression() const;
math_expression::node root_node() const;
bool is_index_dot() const;
bool is_index_reduction() const;
op_element root_op() const;
};
/** @brief Scalar dot
/** @brief 1D Reduction
*
* Maps a scalar dot (max, min, argmax, inner_prod, etc..)
* Maps a 1d reduction (max, min, argmax, inner_prod, etc..)
*/
class mapped_scalar_dot : public mapped_dot
class mapped_reduce_1d : public mapped_reduce
{
public:
mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info);
mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info);
};
/** @brief Vector dot
/** @brief 2D
*
* Maps a row-wise dot (max, min, argmax, matrix-vector product, etc..)
* Maps a 2D reduction (max, min, argmax, matrix-vector product, etc..)
*/
class mapped_gemv : public mapped_dot
class mapped_reduce_2d : public mapped_reduce
{
public:
mapped_gemv(std::string const & scalartype, unsigned int id, node_info info);
mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info);
};
/** @brief Host scalar

View File

@@ -13,8 +13,8 @@ namespace detail
{
bool is_node_leaf(op_element const & op);
bool is_scalar_dot(math_expression::node const & node);
bool is_vector_dot(math_expression::node const & node);
bool is_scalar_reduce_1d(math_expression::node const & node);
bool is_vector_reduce_1d(math_expression::node const & node);
bool is_assignment(op_element const & op);
bool is_elementwise_operator(op_element const & op);
bool is_elementwise_function(op_element const & op);

View File

@@ -8,22 +8,22 @@ namespace isaac
namespace templates
{
class axpy_parameters : public base::parameters_type
class elementwise_1d_parameters : public base::parameters_type
{
public:
axpy_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy);
elementwise_1d_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy);
unsigned int num_groups;
fetching_policy_type fetching_policy;
};
class axpy : public base_impl<axpy, axpy_parameters>
class elementwise_1d : public base_impl<elementwise_1d, elementwise_1d_parameters>
{
private:
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const;
public:
axpy(axpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
axpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT);
elementwise_1d(elementwise_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
elementwise_1d(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
};

View File

@@ -9,24 +9,24 @@ namespace isaac
namespace templates
{
class ger_parameters : public base::parameters_type
class elementwise_2d_parameters : public base::parameters_type
{
public:
ger_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
elementwise_2d_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
unsigned int num_groups_0;
unsigned int num_groups_1;
fetching_policy_type fetching_policy;
};
class ger : public base_impl<ger, ger_parameters>
class elementwise_2d : public base_impl<elementwise_2d, elementwise_2d_parameters>
{
private:
int is_invalid_impl(driver::Device const &, math_expression const &) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
public:
ger(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
ger(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
};

View File

@@ -10,9 +10,9 @@ namespace isaac
namespace templates
{
struct gemm_parameters : public base::parameters_type
struct matrix_product_parameters : public base::parameters_type
{
gemm_parameters(unsigned int simd_width
matrix_product_parameters(unsigned int simd_width
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
, unsigned int ms, unsigned int ks, unsigned int ns
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
@@ -38,7 +38,7 @@ struct gemm_parameters : public base::parameters_type
bool unroll_outer;
};
class gemm : public base_impl<gemm, gemm_parameters>
class matrix_product : public base_impl<matrix_product, matrix_product_parameters>
{
private:
unsigned int temporary_workspace(math_expression const & expressions) const;
@@ -48,9 +48,9 @@ private:
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const &) const;
void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, array_base const & A, array_base const & B, array_base const & C,
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, execution_options_type const & options);
std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::gemm::args &arguments) const;
std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::matrix_product::args &arguments) const;
public:
gemm(gemm::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
matrix_product(matrix_product::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &ctr);
private:
@@ -60,36 +60,36 @@ private:
bool check_bounds_;
};
class gemm_nn : public gemm
class matrix_product_nn : public matrix_product
{
public:
gemm_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
matrix_product_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
};
class gemm_tn : public gemm
class matrix_product_tn : public matrix_product
{
public:
gemm_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
matrix_product_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
};
class gemm_nt : public gemm
class matrix_product_nt : public matrix_product
{
public:
gemm_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
matrix_product_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
};
class gemm_tt : public gemm
class matrix_product_tt : public matrix_product
{
public:
gemm_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
matrix_product_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
};

View File

@@ -8,27 +8,27 @@ namespace isaac
namespace templates
{
struct dot_parameters : public base::parameters_type
struct reduce_1d_parameters : public base::parameters_type
{
dot_parameters(unsigned int _simd_width,
reduce_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy);
unsigned int num_groups;
fetching_policy_type fetching_policy;
};
class dot : public base_impl<dot, dot_parameters>
class reduce_1d : public base_impl<reduce_1d, reduce_1d_parameters>
{
private:
unsigned int lmem_usage(math_expression const & expressions) const;
int is_invalid_impl(driver::Device const &, math_expression const &) const;
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs,
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
public:
dot(dot::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
dot(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
reduce_1d(reduce_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
private:

View File

@@ -10,9 +10,9 @@ namespace isaac
{
namespace templates
{
struct gemv_parameters : public base::parameters_type
struct reduce_2d_parameters : public base::parameters_type
{
gemv_parameters(unsigned int _simd_width,
reduce_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy);
unsigned int num_groups_0;
@@ -21,15 +21,15 @@ struct gemv_parameters : public base::parameters_type
};
class gemv : public base_impl<gemv, gemv_parameters>
class reduce_2d : public base_impl<reduce_2d, reduce_2d_parameters>
{
protected:
enum dot_type
enum reduce_1d_type
{
REDUCE_ROWS,
REDUCE_COLUMNS
};
gemv(gemv::parameters_type const & , dot_type, binding_policy_t);
reduce_2d(reduce_2d::parameters_type const & , reduce_1d_type, binding_policy_t);
private:
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
unsigned int lmem_usage(math_expression const &) const;
@@ -38,21 +38,21 @@ public:
virtual std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
private:
dot_type dot_type_;
reduce_1d_type reduce_1d_type_;
};
class gemv_n : public gemv
class reduce_2d_n : public reduce_2d
{
public:
gemv_n(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
reduce_2d_n(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
};
class gemv_t : public gemv
class reduce_2d_t : public reduce_2d
{
public:
gemv_t(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
reduce_2d_t(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
};
}

View File

@@ -13,7 +13,7 @@ namespace preset
{
class gemm
class matrix_product
{
public:

View File

@@ -115,7 +115,7 @@ public:
The user buffer will not be deallocated when this allocator is destructed.
\param buffer User supplied buffer.
\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
\param size Size of the buffer in bytes. It must at least larelementwise_2d than sizeof(ChunkHeader).
\param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
\param baseAllocator The allocator for allocating memory chunks.
*/

View File

@@ -128,7 +128,7 @@ public:
typedef typename BaseType::pointer Pointer;
//! Reference to (const) GenericMember
typedef typename BaseType::reference Reference;
//! Signed integer type (e.g. \c ptrdiff_t)
//! Signed inteelementwise_2d type (e.g. \c ptrdiff_t)
typedef typename BaseType::difference_type DifferenceType;
//! Default constructor (singular value)
@@ -265,7 +265,7 @@ struct GenericStringRef {
\tparam N length of the string, automatically inferred
\param str Constant character array, lifetime assumed to be longer
\param str Constant character array, lifetime assumed to be lonelementwise_2d
than the use of the string in e.g. a GenericValue
\post \ref s == str
@@ -289,7 +289,7 @@ struct GenericStringRef {
\see StringRef(const CharType*)
\param str Constant character pointer, lifetime assumed to be longer
\param str Constant character pointer, lifetime assumed to be lonelementwise_2d
than the use of the string in e.g. a GenericValue
\post \ref s == str
@@ -305,7 +305,7 @@ struct GenericStringRef {
: s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); }
//! Create constant string reference from pointer and length
/*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
/*! \param str constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\param len length of the string, excluding the trailing NULL terminator
\post \ref s == str && \ref length == len
@@ -334,7 +334,7 @@ private:
value in a JSON GenericValue object, if the string's lifetime is known
to be valid long enough.
\tparam CharType Character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\return GenericStringRef string reference object
\relatesalso GenericStringRef
@@ -355,7 +355,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str) {
supports string containing null characters.
\tparam CharType character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\param length The length of source string.
\return GenericStringRef string reference object
\relatesalso GenericStringRef
@@ -373,7 +373,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length)
to be valid long enough.
\tparam CharType character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\return GenericStringRef string reference object
\relatesalso GenericStringRef
\note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
@@ -696,7 +696,7 @@ public:
case kNumberType:
if (IsDouble() || rhs.IsDouble())
return GetDouble() == rhs.GetDouble(); // May convert one operand from integer to double.
return GetDouble() == rhs.GetDouble(); // May convert one operand from inteelementwise_2d to double.
else
return data_.n.u64 == rhs.data_.n.u64;
@@ -1482,7 +1482,7 @@ private:
inline SizeType GetLength() const { return (SizeType)(MaxSize - str[LenPos]); }
}; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
// By using proper binary layout, retrieval of different integer types do not need conversions.
// By using proper binary layout, retrieval of different inteelementwise_2d types do not need conversions.
union Number {
#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
struct I {

View File

@@ -20,7 +20,7 @@
// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
// inteelementwise_2ds." ACM Sigplan Notices 45.6 (2010): 233-243.
#ifndef RAPIDJSON_DTOA_
#define RAPIDJSON_DTOA_

View File

@@ -24,7 +24,7 @@
namespace rapidjson {
namespace internal {
//! Computes integer powers of 10 in double (10.0^n).
//! Computes inteelementwise_2d powers of 10 in double (10.0^n).
/*! This function uses lookup table for fast and accurate results.
\param n non-negative exponent. Must <= 308.
\return 10.0^n

View File

@@ -53,9 +53,9 @@
/*! \def RAPIDJSON_NO_INT64DEFINE
\ingroup RAPIDJSON_CONFIG
\brief Use external 64-bit integer types.
\brief Use external 64-bit inteelementwise_2d types.
RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types
RapidJSON requires the 64-bit inteelementwise_2d types \c int64_t and \c uint64_t types
to be available at global scope.
If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to
@@ -171,11 +171,11 @@
///////////////////////////////////////////////////////////////////////////////
// RAPIDJSON_UINT64_C2
//! Construct a 64-bit literal by a pair of 32-bit integer.
//! Construct a 64-bit literal by a pair of 32-bit inteelementwise_2d.
/*!
64-bit literal with or without ULL suffix is prone to compiler warnings.
UINT64_C() is C macro which cause compilation problems.
Use this macro to define 64-bit constants by a pair of 32-bit integer.
Use this macro to define 64-bit constants by a pair of 32-bit inteelementwise_2d.
*/
#ifndef RAPIDJSON_UINT64_C2
#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))

View File

@@ -792,7 +792,7 @@ private:
}
}
// Force double for big integer
// Force double for big inteelementwise_2d
if (useDouble) {
while (s.Peek() >= '0' && s.Peek() <= '9') {
if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0

View File

@@ -117,23 +117,23 @@ std::string binary_leaf::evaluate_recursive(leaf_t leaf, std::map<std::string, s
}
mapped_gemm::mapped_gemm(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "gemm"), binary_leaf(info) { }
mapped_matrix_product::mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "matrix_product"), binary_leaf(info) { }
//
mapped_dot::mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) :
mapped_reduce::mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) :
mapped_object(scalartype, id, type_key), binary_leaf(info)
{ }
size_t mapped_dot::root_idx() const
size_t mapped_reduce::root_idx() const
{ return info_.root_idx; }
isaac::math_expression const & mapped_dot::math_expression() const
isaac::math_expression const & mapped_reduce::math_expression() const
{ return *info_.math_expression; }
math_expression::node mapped_dot::root_node() const
math_expression::node mapped_reduce::root_node() const
{ return math_expression().tree()[root_idx()]; }
bool mapped_dot::is_index_dot() const
bool mapped_reduce::is_index_reduction() const
{
op_element const & op = root_op();
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
@@ -142,17 +142,17 @@ bool mapped_dot::is_index_dot() const
|| op.type==OPERATOR_ELEMENT_ARGMIN_TYPE;
}
op_element mapped_dot::root_op() const
op_element mapped_reduce::root_op() const
{
return info_.math_expression->tree()[info_.root_idx].op;
}
//
mapped_scalar_dot::mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "scalar_dot"){ }
mapped_reduce_1d::mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "scalar_reduce_1d"){ }
//
mapped_gemv::mapped_gemv(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "gemv") { }
mapped_reduce_2d::mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "reduce_2d") { }
//
void mapped_host_scalar::preprocess(std::string & str) const

View File

@@ -14,12 +14,12 @@ namespace detail
bool is_scalar_dot(math_expression::node const & node)
bool is_scalar_reduce_1d(math_expression::node const & node)
{
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY;
}
bool is_vector_dot(math_expression::node const & node)
bool is_vector_reduce_1d(math_expression::node const & node)
{
return node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY;

View File

@@ -5,11 +5,11 @@
#include "isaac/array.h"
#include "isaac/tuple.h"
#include "isaac/kernels/keywords.h"
#include "isaac/kernels/templates/axpy.h"
#include "isaac/kernels/templates/dot.h"
#include "isaac/kernels/templates/ger.h"
#include "isaac/kernels/templates/gemv.h"
#include "isaac/kernels/templates/gemm.h"
#include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/kernels/templates/reduce_2d.h"
#include "isaac/kernels/templates/matrix_product.h"
#include "isaac/kernels/templates/base.h"
#include "isaac/kernels/parse.h"
#include "isaac/exception/unknown_datatype.h"
@@ -150,11 +150,11 @@ int base_impl<TType, PType>::is_invalid(math_expression const & expressions, dr
return is_invalid_impl(device, expressions);
}
template class base_impl<axpy, axpy_parameters>;
template class base_impl<dot, dot_parameters>;
template class base_impl<ger, ger_parameters>;
template class base_impl<gemv, gemv_parameters>;
template class base_impl<gemm, gemm_parameters>;
template class base_impl<elementwise_1d, elementwise_1d_parameters>;
template class base_impl<reduce_1d, reduce_1d_parameters>;
template class base_impl<elementwise_2d, elementwise_2d_parameters>;
template class base_impl<reduce_2d, reduce_2d_parameters>;
template class base_impl<matrix_product, matrix_product_parameters>;
}
}

View File

@@ -2,7 +2,7 @@
#include <cstring>
#include <algorithm>
#include "isaac/kernels/templates/axpy.h"
#include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/keywords.h"
#include "isaac/driver/backend.h"
@@ -18,7 +18,7 @@ namespace isaac
namespace templates
{
axpy_parameters::axpy_parameters(unsigned int _simd_width,
elementwise_1d_parameters::elementwise_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy) :
base::parameters_type(_simd_width, _group_size, 1, 1), num_groups(_num_groups), fetching_policy(_fetching_policy)
@@ -26,14 +26,14 @@ axpy_parameters::axpy_parameters(unsigned int _simd_width,
}
int axpy::is_invalid_impl(driver::Device const &, math_expression const &) const
int elementwise_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
{
if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID;
}
std::string axpy::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
std::string elementwise_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
{
driver::backend_type backend = device.backend();
std::string _size_t = size_type(device);
@@ -55,7 +55,7 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
stream << KernelPrefix(backend) << " void " << "elementwise_1d" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
@@ -174,23 +174,23 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
return stream.str();
}
axpy::axpy(axpy_parameters const & parameters,
elementwise_1d::elementwise_1d(elementwise_1d_parameters const & parameters,
binding_policy_t binding_policy) :
base_impl<axpy, axpy_parameters>(parameters, binding_policy)
base_impl<elementwise_1d, elementwise_1d_parameters>(parameters, binding_policy)
{}
axpy::axpy(unsigned int simd, unsigned int ls, unsigned int ng,
elementwise_1d::elementwise_1d(unsigned int simd, unsigned int ls, unsigned int ng,
fetching_policy_type fetch, binding_policy_t bind):
base_impl<axpy, axpy_parameters>(axpy_parameters(simd,ls,ng,fetch), bind)
base_impl<elementwise_1d, elementwise_1d_parameters>(elementwise_1d_parameters(simd,ls,ng,fetch), bind)
{}
std::vector<int_t> axpy::input_sizes(math_expression const & expressions) const
std::vector<int_t> elementwise_1d::input_sizes(math_expression const & expressions) const
{
return {expressions.shape().max()};
}
void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
void elementwise_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{
math_expression const & expressions = control.x();
//Size
@@ -202,7 +202,7 @@ void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program
return;
}
//Kernel
std::string name = "axpy";
std::string name = "elementwise_1d";
name += suffix;
driver::Kernel kernel(program, name.c_str());
//NDRange

View File

@@ -1,6 +1,6 @@
#include <cstring>
#include <iostream>
#include "isaac/kernels/templates/ger.h"
#include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/symbolic/io.h"
#include "isaac/kernels/keywords.h"
@@ -13,14 +13,14 @@ namespace isaac
namespace templates
{
ger_parameters::ger_parameters(unsigned int _simd_width,
elementwise_2d_parameters::elementwise_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1,
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
int ger::is_invalid_impl(driver::Device const &, math_expression const &) const
int elementwise_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
{
if (p_.simd_width>1)
return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -29,7 +29,7 @@ int ger::is_invalid_impl(driver::Device const &, math_expression const &) const
return TEMPLATE_VALID;
}
std::string ger::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
std::string elementwise_2d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
{
kernel_generation_stream stream;
std::string _size_t = size_type(device);
@@ -45,7 +45,7 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
stream << KernelPrefix(backend) << " void elementwise_1d" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
@@ -105,25 +105,25 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
return stream.str();
}
ger::ger(parameters_type const & parameters, binding_policy_t binding_policy) :
base_impl<ger, ger_parameters>(parameters, binding_policy){ }
elementwise_2d::elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy) :
base_impl<elementwise_2d, elementwise_2d_parameters>(parameters, binding_policy){ }
ger::ger(unsigned int simd, unsigned int ls1, unsigned int ls2,
elementwise_2d::elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2,
unsigned int ng1, unsigned int ng2, fetching_policy_type fetch,
binding_policy_t bind):
base_impl<ger, ger_parameters>(ger_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
base_impl<elementwise_2d, elementwise_2d_parameters>(elementwise_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
{}
std::vector<int_t> ger::input_sizes(math_expression const & expression) const
std::vector<int_t> elementwise_2d::input_sizes(math_expression const & expression) const
{
std::pair<int_t, int_t> size = matrix_size(expression.tree(), lhs_most(expression.tree(), expression.root()));
return {size.first, size.second};
}
void ger::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control)
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control)
{
math_expression const & expressions = control.x();
std::string name = "axpy";
std::string name = "elementwise_1d";
name +=suffix;
driver::Kernel kernel(program, name.c_str());
driver::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);

View File

@@ -1,5 +1,5 @@
#include "isaac/array.h"
#include "isaac/kernels/templates/gemm.h"
#include "isaac/kernels/templates/matrix_product.h"
#include "isaac/kernels/keywords.h"
#include "isaac/symbolic/preset.h"
#include "isaac/exception/operation_not_supported.h"
@@ -15,7 +15,7 @@ namespace isaac
namespace templates
{
gemm_parameters::gemm_parameters(unsigned int simd_width
matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
, unsigned int ms, unsigned int ks, unsigned int ns
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
@@ -27,7 +27,7 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
}
unsigned int gemm::lmem_usage(math_expression const & expression) const
unsigned int matrix_product::lmem_usage(math_expression const & expression) const
{
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
unsigned int N = 0;
@@ -36,7 +36,7 @@ unsigned int gemm::lmem_usage(math_expression const & expression) const
return N*size_of(numeric_t);
}
unsigned int gemm::registers_usage(math_expression const & expression) const
unsigned int matrix_product::registers_usage(math_expression const & expression) const
{
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
@@ -44,7 +44,7 @@ unsigned int gemm::registers_usage(math_expression const & expression) const
return N*size_of(numeric_t);
}
unsigned int gemm::temporary_workspace(math_expression const & expressions) const
unsigned int matrix_product::temporary_workspace(math_expression const & expressions) const
{
std::vector<int_t> MNK = input_sizes(expressions);
int_t M = MNK[0]; int_t N = MNK[1];
@@ -53,7 +53,7 @@ unsigned int gemm::temporary_workspace(math_expression const & expressions) cons
return 0;
}
int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const
int matrix_product::is_invalid_impl(driver::Device const &, math_expression const &) const
{
// if(device.vendor()==driver::Device::Vendor::NVIDIA && p_.simd_width > 1)
// return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -103,7 +103,7 @@ int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const
return TEMPLATE_VALID;
}
std::string gemm::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const
std::string matrix_product::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const
{
using std::string;
using tools::to_string;
@@ -132,10 +132,10 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
//////////////////
/// DECLARATIONS
/// //////////////
std::string gemm_name = "gemm";
std::string matrix_product_name = "matrix_product";
std::string reduce_name = "reduce";
gemm_name += suffix;
matrix_product_name += suffix;
reduce_name += suffix;
switch(backend)
@@ -146,7 +146,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
}
stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
stream << KernelPrefix(backend) << " void " << matrix_product_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
<< Global(backend) << " " << sdtype << "* C, " << _size_t << " ldc," << _size_t << " offc," << _size_t << " Cstride1, "
<< sdtype << " alpha,"
<< Global(backend) << " " << sdtype << "* A, " << _size_t << " lda," << _size_t << " offa," << _size_t << " Astride1,"
@@ -572,7 +572,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
#undef VST0RE
}
void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K,
void matrix_product::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K,
array_base const & A, array_base const & B, array_base const & C,
value_scalar const & alpha, value_scalar const & beta,
driver::Program const & program, std::string const & suffix, execution_options_type const & options)
@@ -582,53 +582,53 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
if(M==0 || N==0 || K==0)
return;
std::string gemm_name = "gemm";
std::string matrix_product_name = "matrix_product";
std::string reduce_name = "reduce";
gemm_name += suffix;
matrix_product_name += suffix;
reduce_name += suffix;
driver::Kernel gemm(program, gemm_name.c_str());
driver::Kernel matrix_product(program, matrix_product_name.c_str());
driver::NDRange local(p_.local_size_0, p_.local_size_1, 1);
driver::NDRange global(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1), p_.depth);
unsigned int current_arg = 0;
bind_independent binder;
set_arguments_functor helper(binder, current_arg, gemm);
set_arguments_functor helper(binder, current_arg, matrix_product);
driver::Buffer& workspace = driver::backend::workspaces::get(options.queue(C.context()));
gemm.setSizeArg(current_arg++, M);
gemm.setSizeArg(current_arg++, N);
gemm.setSizeArg(current_arg++, K);
matrix_product.setSizeArg(current_arg++, M);
matrix_product.setSizeArg(current_arg++, N);
matrix_product.setSizeArg(current_arg++, K);
if(p_.depth==1)
{
gemm.setArg(current_arg++,C.data());
gemm.setSizeArg(current_arg++, C.stride()[1]);
gemm.setSizeArg(current_arg++, C.start());
gemm.setSizeArg(current_arg++, C.stride()[0]);
matrix_product.setArg(current_arg++,C.data());
matrix_product.setSizeArg(current_arg++, C.stride()[1]);
matrix_product.setSizeArg(current_arg++, C.start());
matrix_product.setSizeArg(current_arg++, C.stride()[0]);
}
else
{
gemm.setArg(current_arg++, workspace);
gemm.setSizeArg(current_arg++, M);
gemm.setSizeArg(current_arg++, 0);
gemm.setSizeArg(current_arg++, 1);
matrix_product.setArg(current_arg++, workspace);
matrix_product.setSizeArg(current_arg++, M);
matrix_product.setSizeArg(current_arg++, 0);
matrix_product.setSizeArg(current_arg++, 1);
}
helper.set_arguments(alpha.dtype(), alpha.values());
gemm.setArg(current_arg++, A.data());
gemm.setSizeArg(current_arg++, A.stride()[1]);
gemm.setSizeArg(current_arg++, A.start());
gemm.setSizeArg(current_arg++, A.stride()[0]);
matrix_product.setArg(current_arg++, A.data());
matrix_product.setSizeArg(current_arg++, A.stride()[1]);
matrix_product.setSizeArg(current_arg++, A.start());
matrix_product.setSizeArg(current_arg++, A.stride()[0]);
gemm.setArg(current_arg++, B.data());
gemm.setSizeArg(current_arg++, B.stride()[1]);
gemm.setSizeArg(current_arg++, B.start());
gemm.setSizeArg(current_arg++, B.stride()[0]);
matrix_product.setArg(current_arg++, B.data());
matrix_product.setSizeArg(current_arg++, B.stride()[1]);
matrix_product.setSizeArg(current_arg++, B.start());
matrix_product.setSizeArg(current_arg++, B.stride()[0]);
helper.set_arguments(beta.dtype(), beta.values());
options.enqueue(program.context(), gemm, global, local);
options.enqueue(program.context(), matrix_product, global, local);
if(p_.depth > 1)
{
@@ -652,18 +652,18 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
}
std::vector<int_t> gemm::infos(math_expression const & expression, symbolic::preset::gemm::args& arguments) const
std::vector<int_t> matrix_product::infos(math_expression const & expression, symbolic::preset::matrix_product::args& arguments) const
{
math_expression::container_type const & array = expression.tree();
std::size_t root = expression.root();
arguments = symbolic::preset::gemm::check(array, root);
arguments = symbolic::preset::matrix_product::check(array, root);
int_t M = arguments.C->array->shape()[0];
int_t N = arguments.C->array->shape()[1];
int_t K = (A_trans_=='T')?arguments.A->array->shape()[0]:arguments.A->array->shape()[1];
return {M, N, K};
}
gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<gemm, gemm_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
matrix_product::matrix_product(matrix_product_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
{
if(A_trans_=='N' && B_trans_=='N') type_ = GEMM_NN_TYPE;
else if(A_trans_=='T' && B_trans_=='N') type_ = GEMM_TN_TYPE;
@@ -672,21 +672,21 @@ gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans,
else throw;
}
std::vector<int_t> gemm::input_sizes(math_expression const & expressions) const
std::vector<int_t> matrix_product::input_sizes(math_expression const & expressions) const
{
symbolic::preset::gemm::args dummy;
symbolic::preset::matrix_product::args dummy;
return infos((math_expression&)expressions, dummy);
}
void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control)
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control)
{
using namespace tools;
gemm & fallback = (gemm&)fallback_base;
matrix_product & fallback = (matrix_product&)fallback_base;
math_expression const & expressions = control.x();
symbolic::preset::gemm::args args;
symbolic::preset::matrix_product::args args;
std::vector<int_t> MNK = infos(expressions, args);
int_t M = MNK[0];
@@ -720,40 +720,40 @@ void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program
}
//
gemm_nn::gemm_nn(unsigned int simd
matrix_product_nn::matrix_product_nn(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
{
}
//
gemm_tn::gemm_tn(unsigned int simd
matrix_product_tn::matrix_product_tn(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
{ }
//
gemm_nt::gemm_nt(unsigned int simd
matrix_product_nt::matrix_product_nt(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
{ }
//
gemm_tt::gemm_tt(unsigned int simd
matrix_product_tt::matrix_product_tt(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
{ }
}

View File

@@ -1,6 +1,6 @@
#include <cstring>
#include <iostream>
#include "isaac/kernels/templates/dot.h"
#include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/keywords.h"
#include "tools/loop.hpp"
@@ -15,25 +15,25 @@ namespace isaac
{
namespace templates
{
dot_parameters::dot_parameters(unsigned int _simd_width,
reduce_1d_parameters::reduce_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy)
{ }
unsigned int dot::lmem_usage(math_expression const & x) const
unsigned int reduce_1d::lmem_usage(math_expression const & x) const
{
numeric_type numeric_t= lhs_most(x.tree(), x.root()).lhs.dtype;
return p_.local_size_0*size_of(numeric_t);
}
int dot::is_invalid_impl(driver::Device const &, math_expression const &) const
int reduce_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
{
if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID;
}
inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs,
inline void reduce_1d::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const
{
stream << "#pragma unroll" << std::endl;
@@ -46,25 +46,25 @@ inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsig
stream.inc_tab();
for (auto & expr : exprs)
if (expr->is_index_dot())
compute_index_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
if (expr->is_index_reduction())
compute_index_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
, expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"),
expr->root_op());
else
compute_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
compute_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
stream.dec_tab();
stream << "}" << std::endl;
stream.dec_tab();
stream << "}" << std::endl;
}
std::string dot::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const
std::string reduce_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const
{
kernel_generation_stream stream;
std::vector<mapped_scalar_dot*> exprs;
std::vector<mapped_reduce_1d*> exprs;
for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit)
if (mapped_scalar_dot * p = dynamic_cast<mapped_scalar_dot*>(iit->second.get()))
if (mapped_reduce_1d * p = dynamic_cast<mapped_reduce_1d*>(iit->second.get()))
exprs.push_back(p);
std::size_t N = exprs.size();
driver::backend_type backend = device.backend();
@@ -81,7 +81,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
{
numeric_type dtype = lhs_most(exprs[k]->math_expression().tree(), exprs[k]->math_expression().root()).lhs.dtype;
std::string sdtype = to_string(dtype);
if (exprs[k]->is_index_dot())
if (exprs[k]->is_index_reduction())
{
stream << exprs[k]->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + ");");
offset += 4*p_.num_groups;
@@ -125,7 +125,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
for (unsigned int k = 0; k < N; ++k)
{
if (exprs[k]->is_index_dot())
if (exprs[k]->is_index_reduction())
{
stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl;
stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl;
@@ -174,11 +174,11 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
accessors["matrix_diag"] = str[a];
accessors["array1"] = "#namereg";
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (elem->is_index_dot())
compute_index_dot(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
if (elem->is_index_reduction())
compute_index_reduce_1d(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
+ tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
else
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op());
compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
}
}
});
@@ -186,7 +186,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
//Fills local memory
for (unsigned int k = 0; k < N; ++k)
{
if (exprs[k]->is_index_dot())
if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
}
@@ -200,7 +200,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream.inc_tab();
for (unsigned int k = 0; k < N; ++k)
{
if (exprs[k]->is_index_dot())
if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_temp_value[gpid] = #name_buf_value[0];") << std::endl;
stream << exprs[k]->process("#name_temp[gpid] = #name_buf[0];") << std::endl;
}
@@ -225,9 +225,9 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "unsigned int lid = " <<LocalIdx0(backend) << ";" << std::endl;
stream << "unsigned int lsize = " <<LocalSize0(backend) << ";" << std::endl;
for (mapped_scalar_dot* e: exprs)
for (mapped_reduce_1d* e: exprs)
{
if (e->is_index_dot())
if (e->is_index_reduction())
{
stream << e->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];");
stream << e->process("unsigned int #name_acc = 0;") << std::endl;
@@ -244,18 +244,18 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "for(unsigned int i = lid; i < " << p_.num_groups << "; i += lsize)" << std::endl;
stream << "{" << std::endl;
stream.inc_tab();
for (mapped_scalar_dot* e: exprs)
if (e->is_index_dot())
compute_index_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op());
for (mapped_reduce_1d* e: exprs)
if (e->is_index_reduction())
compute_index_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op());
else
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op());
compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op());
stream.dec_tab();
stream << "}" << std::endl;
for (unsigned int k = 0; k < N; ++k)
{
if (exprs[k]->is_index_dot())
if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
}
@@ -268,7 +268,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "{" << std::endl;
stream.inc_tab();
std::map<std::string, std::string> accessors;
accessors["scalar_dot"] = "#name_buf[0]";
accessors["scalar_reduce_1d"] = "#name_buf[0]";
accessors["array1"] = "#pointer[#start]";
accessors["array11"] = "#pointer[#start]";
stream << evaluate(PARENT_NODE_TYPE, accessors, expressions, expressions.root(), mapping) << ";" << std::endl;
@@ -283,23 +283,23 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
return stream.str();
}
dot::dot(dot::parameters_type const & parameters,
binding_policy_t binding) : base_impl<dot, dot_parameters>(parameters, binding)
reduce_1d::reduce_1d(reduce_1d::parameters_type const & parameters,
binding_policy_t binding) : base_impl<reduce_1d, reduce_1d_parameters>(parameters, binding)
{ }
dot::dot(unsigned int simd, unsigned int ls, unsigned int ng,
reduce_1d::reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng,
fetching_policy_type fetch, binding_policy_t bind):
base_impl<dot, dot_parameters>(dot_parameters(simd,ls,ng,fetch), bind)
base_impl<reduce_1d, reduce_1d_parameters>(reduce_1d_parameters(simd,ls,ng,fetch), bind)
{}
std::vector<int_t> dot::input_sizes(math_expression const & x) const
std::vector<int_t> reduce_1d::input_sizes(math_expression const & x) const
{
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false);
int_t N = vector_size(lhs_most(x.tree(), dots_idx[0]));
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
int_t N = vector_size(lhs_most(x.tree(), reduce_1ds_idx[0]));
return {N};
}
void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{
math_expression const & x = control.x();
@@ -313,10 +313,10 @@ void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program,
return;
}
std::vector<math_expression::node const *> dots;
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false);
for (size_t idx: dots_idx)
dots.push_back(&x.tree()[idx]);
std::vector<math_expression::node const *> reduce_1ds;
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
for (size_t idx: reduce_1ds_idx)
reduce_1ds.push_back(&x.tree()[idx]);
//Kernel
std::string name[2] = {"prod", "reduce"};

View File

@@ -2,7 +2,7 @@
#include <iostream>
#include "isaac/kernels/stream.h"
#include "isaac/kernels/keywords.h"
#include "isaac/kernels/templates/gemv.h"
#include "isaac/kernels/templates/reduce_2d.h"
#include "tools/arguments.hpp"
#include "tools/loop.hpp"
@@ -16,33 +16,33 @@ namespace isaac
namespace templates
{
gemv_parameters::gemv_parameters(unsigned int _simd_width,
reduce_2d_parameters::reduce_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy): base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1),
num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetch_policy(_fetch_policy) { }
int gemv::is_invalid_impl(driver::Device const &, math_expression const &) const
int reduce_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
{
if (p_.fetch_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID;
}
unsigned int gemv::lmem_usage(const math_expression&) const
unsigned int reduce_2d::lmem_usage(const math_expression&) const
{
return (p_.local_size_0+1)*p_.local_size_1;
}
std::string gemv::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const
std::string reduce_2d::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const
{
using tools::to_string;
std::vector<mapped_gemv*> dots;
std::vector<size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false);
std::vector<mapped_reduce_2d*> reduce_1ds;
std::vector<size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
for (auto & elem : idx)
dots.push_back((mapped_gemv*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
reduce_1ds.push_back((mapped_reduce_2d*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
kernel_generation_stream stream;
driver::backend_type backend = device.backend();
@@ -55,11 +55,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
auto unroll_tmp = [&]()
{
unsigned int offset = 0;
for (const auto & e : dots)
for (const auto & e : reduce_1ds)
{
numeric_type dtype = lhs_most(e->math_expression().tree(), e->math_expression().root()).lhs.dtype;
std::string sdtype = to_string(dtype);
if (e->is_index_dot())
if (e->is_index_reduction())
{
stream << e->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + "*M);");
offset += 4*p_.num_groups_0;
@@ -73,7 +73,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
}
};
int col_simd_width = (dot_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
int col_simd_width = (reduce_1d_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
switch(backend)
{
case driver::CUDA:
@@ -96,7 +96,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
unsigned int local_size_0_ld = p_.local_size_0;
std::string local_size_0_ld_str = to_string(local_size_0_ld);
for (const auto & e : dots)
for (const auto & e : reduce_1ds)
stream << e->process(Local(backend).get() + " " + append_width("#scalartype", col_simd_width) + " #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "*" << col_simd_width << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1*col_simd_width << "; r += " << GlobalSize1(backend) << "*" << col_simd_width << ")" << std::endl;
@@ -106,7 +106,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "" << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
stream << "" << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
for (const auto & e : dots){
for (const auto & e : reduce_1ds){
std::string data_type = append_width("#scalartype",col_simd_width);
stream << e->process(data_type + " #name_acc = " + InitPrefix(backend, data_type).get() + "(" + neutral_element((e)->root_op(), backend, "#scalartype") + ");") << std::endl;
@@ -116,14 +116,14 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl;
stream.inc_tab();
element_wise_loop_1D(stream, p_.fetch_policy, (dot_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width)
element_wise_loop_1D(stream, p_.fetch_policy, (reduce_1d_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width)
{
std::set<std::string> already_fetched;
for (const auto & e : dots)
for (const auto & e : reduce_1ds)
{
std::map<std::string, std::string> accessors;
if(dot_type_==REDUCE_COLUMNS)
if(reduce_1d_type_==REDUCE_COLUMNS)
{
std::string data_type = append_width("#scalartype",row_simd_width);
accessors["arraynn"] = data_type + " #namereg = " + vload(row_simd_width, "#scalartype", "c*#stride", "#pointer + r*#ld", "1", backend,false)+";";
@@ -147,20 +147,20 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
str[a] = access_vector_type("#namereg",a);
for (auto & elem : dots)
for (auto & elem : reduce_1ds)
for (unsigned int a = 0; a < row_simd_width; ++a)
{
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, {{"arraynn", str[a]}, {"repeat", str[a]}, {"array1", "#namereg"}});
if (elem->is_index_dot())
compute_index_dot(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op());
if (elem->is_index_reduction())
compute_index_reduce_1d(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op());
else
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op());
compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
}
});
stream.dec_tab();
stream << "}" << std::endl;
for (auto & expr : dots)
for (auto & expr : reduce_1ds)
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
stream << "#pragma unroll" << std::endl;
@@ -173,13 +173,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl;
stream.inc_tab();
for (auto & e : dots)
if (e->is_index_dot())
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
for (auto & e : reduce_1ds)
if (e->is_index_reduction())
compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->root_op());
else
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
stream.dec_tab();
stream << "}" << std::endl;
@@ -196,9 +196,9 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
std::map<std::string, std::string> accessors;
for(int s = 0 ; s < col_simd_width ; ++s)
{
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
if(col_simd_width > 1)
accessors["gemv"] = access_vector_type(accessors["gemv"], s);
accessors["reduce_2d"] = access_vector_type(accessors["reduce_2d"], s);
accessors["arrayn"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
accessors["array1n"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
accessors["arrayn1"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
@@ -207,11 +207,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
}
else
{
for (mapped_dot const * e : dots)
for (mapped_reduce const * e : reduce_1ds)
{
if(col_simd_width > 1)
stream << "if(M - r > " << col_simd_width << "){" << std::endl;
if (e->is_index_dot())
if (e->is_index_reduction())
stream << e->process(vstore(col_simd_width,"uint", "#name_buf_value[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp_value + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
stream << e->process(vstore(col_simd_width,"#scalartype", "#name_buf[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
if(col_simd_width > 1)
@@ -220,7 +220,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "else{" << std::endl;
stream.inc_tab();
for(int s = 0 ; s < col_simd_width ; ++s){
if (e->is_index_dot())
if (e->is_index_reduction())
stream << "if(r + " << s << "< M) " << e->process("#name_temp_value[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf_value[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
stream << "if(r + " << s << "< M) " << e->process("#name_temp[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
}
@@ -262,7 +262,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
{"arrayn1", "#pointer += #start;"},
{"arraynn", "#pointer += #start; "}}, expression, mapping);
for (const auto & e : dots)
for (const auto & e : reduce_1ds)
stream << e->process(Local(backend).get() + " #scalartype #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1 << "; r += " << GlobalSize1(backend) << "){" << std::endl;
@@ -270,7 +270,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
stream << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
for (const auto & e : dots)
for (const auto & e : reduce_1ds)
stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl;
stream << "if (r < M)" << std::endl;
@@ -280,8 +280,8 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "for(" << _size_t << " c = lidx; c < " << p_.num_groups_0 << "; c += " << LocalSize0(backend) << "){" << std::endl;
stream.inc_tab();
for (mapped_dot* e: dots)
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op());
for (mapped_reduce* e: reduce_1ds)
compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op());
stream.dec_tab();
stream << "}" << std::endl;
@@ -290,7 +290,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream.dec_tab();
stream << "}" << std::endl;
for (auto & expr : dots)
for (auto & expr : reduce_1ds)
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
stream << "#pragma unroll" << std::endl;
@@ -303,13 +303,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl;
stream.inc_tab();
for (auto & e : dots)
if (e->is_index_dot())
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
for (auto & e : reduce_1ds)
if (e->is_index_reduction())
compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->root_op());
else
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
stream.dec_tab();
stream << "}" << std::endl;
@@ -323,7 +323,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream.inc_tab();
std::map<std::string, std::string> accessors;
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
accessors["arrayn"] = "#pointer[r*#stride]";
accessors["array1n"] = "#pointer[r*#stride]";
accessors["arrayn1"] = "#pointer[r*#stride]";
@@ -344,30 +344,30 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
return stream.str();
}
gemv::gemv(gemv::parameters_type const & parameters,
gemv::dot_type rtype,
reduce_2d::reduce_2d(reduce_2d::parameters_type const & parameters,
reduce_2d::reduce_1d_type rtype,
binding_policy_t binding_policy) :
base_impl<gemv, gemv_parameters>(parameters, binding_policy),
dot_type_(rtype){ }
base_impl<reduce_2d, reduce_2d_parameters>(parameters, binding_policy),
reduce_1d_type_(rtype){ }
std::vector<int_t> gemv::input_sizes(math_expression const & expression) const
std::vector<int_t> reduce_2d::input_sizes(math_expression const & expression) const
{
std::vector<std::size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false);
std::vector<std::size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
std::pair<int_t, int_t> MN = matrix_size(expression.tree(), lhs_most(expression.tree(), idx[0]));
if(dot_type_==REDUCE_COLUMNS)
if(reduce_1d_type_==REDUCE_COLUMNS)
std::swap(MN.first,MN.second);
return {MN.first, MN.second};
}
void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{
math_expression const & expression = control.x();
std::vector<int_t> MN = input_sizes(expression);
std::vector<math_expression::node const *> dots;
std::vector<size_t> dots_idx = filter_nodes(&is_dot, expression, expression.root(), false);
for (size_t idx : dots_idx)
dots.push_back(&expression.tree()[idx]);
std::vector<math_expression::node const *> reduce_1ds;
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
for (size_t idx : reduce_1ds_idx)
reduce_1ds.push_back(&expression.tree()[idx]);
//Fallback
if(p_.simd_width>1 && requires_fallback(expression))
@@ -406,15 +406,15 @@ void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program
control.execution_options().enqueue(program.context(), kernels[i], global[i], local[i]);
}
gemv_n::gemv_n(gemv_parameters const & parameters,binding_policy_t binding_policy): gemv(parameters, REDUCE_ROWS, binding_policy){}
reduce_2d_n::reduce_2d_n(reduce_2d_parameters const & parameters,binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_ROWS, binding_policy){}
gemv_n::gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
reduce_2d_n::reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
gemv_t::gemv_t(gemv::parameters_type const & parameters, binding_policy_t binding_policy): gemv(parameters, REDUCE_COLUMNS, binding_policy){}
reduce_2d_t::reduce_2d_t(reduce_2d::parameters_type const & parameters, binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_COLUMNS, binding_policy){}
gemv_t::gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}
reduce_2d_t::reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}
}

View File

@@ -81,12 +81,12 @@ public:
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&math_expression, root_idx, &mapping_)));
else if(root_node.op.type==OPERATOR_ACCESS_INDEX_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_array_access>(&math_expression, root_idx, &mapping_)));
else if (detail::is_scalar_dot(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_dot>(&math_expression, root_idx, &mapping_)));
else if (detail::is_vector_dot(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemv>(&math_expression, root_idx, &mapping_)));
else if (detail::is_scalar_reduce_1d(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_1d>(&math_expression, root_idx, &mapping_)));
else if (detail::is_vector_reduce_1d(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_2d>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemm>(&math_expression, root_idx, &mapping_)));
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type == OPERATOR_REPEAT_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_repeat>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)

View File

@@ -12,7 +12,7 @@ namespace isaac
namespace templates
{
inline void compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
inline void compute_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
{
if (detail::is_elementwise_function(op))
os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
@@ -20,7 +20,7 @@ inline void compute_dot(kernel_generation_stream & os, std::string acc, std::str
os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl;
}
inline void compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
inline void compute_index_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
{
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
@@ -51,11 +51,11 @@ inline std::string neutral_element(op_element const & op, driver::backend_type b
case OPERATOR_ELEMENT_MIN_TYPE : return INF;
case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF;
default: throw std::runtime_error("Unsupported dot operator : no neutral element known");
default: throw std::runtime_error("Unsupported reduce_1d operator : no neutral element known");
}
}
inline bool is_dot(math_expression::node const & node)
inline bool is_reduce_1d(math_expression::node const & node)
{
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY
@@ -63,7 +63,7 @@ inline bool is_dot(math_expression::node const & node)
}
inline bool is_index_dot(op_element const & op)
inline bool is_index_reduction(op_element const & op)
{
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
|| op.type==OPERATOR_ELEMENT_ARGMAX_TYPE

View File

@@ -9,11 +9,11 @@
#include "isaac/driver/program_cache.h"
#include "isaac/profiles/profiles.h"
#include "isaac/kernels/parse.h"
#include "isaac/kernels/templates/axpy.h"
#include "isaac/kernels/templates/dot.h"
#include "isaac/kernels/templates/ger.h"
#include "isaac/kernels/templates/gemv.h"
#include "isaac/kernels/templates/gemm.h"
#include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/kernels/templates/reduce_2d.h"
#include "isaac/kernels/templates/matrix_product.h"
#include "isaac/exception/operation_not_supported.h"
@@ -134,24 +134,24 @@ profiles::value_type::templates_container const & profiles::value_type::template
std::shared_ptr<templates::base> profiles::create(std::string const & template_name, std::vector<int> const & x)
{
templates::fetching_policy_type fetch[] = {templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_GLOBAL_STRIDED, templates::FETCH_FROM_GLOBAL_CONTIGUOUS};
if(template_name=="axpy")
return std::shared_ptr<templates::base>(new templates::axpy(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="dot")
return std::shared_ptr<templates::base>(new templates::dot(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="ger")
return std::shared_ptr<templates::base>(new templates::ger(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemv_n")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemv_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemv_t")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemv_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemm_nn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_tn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_nt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_tt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
if(template_name=="elementwise_1d")
return std::shared_ptr<templates::base>(new templates::elementwise_1d(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="reduce_1d")
return std::shared_ptr<templates::base>(new templates::reduce_1d(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="elementwise_2d")
return std::shared_ptr<templates::base>(new templates::elementwise_2d(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_n")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("reduce_2d_t")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::reduce_2d_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("matrix_product_nn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::matrix_product_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("matrix_product_tn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::matrix_product_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("matrix_product_nt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::matrix_product_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("matrix_product_tt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::matrix_product_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else
throw std::invalid_argument("Invalid expression: " + template_name);
}
@@ -163,7 +163,7 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
rapidjson::Document document;
document.Parse<0>(str.c_str());
//Deserialize
std::vector<std::string> operations = {"axpy", "dot", "ger", "gemv_n", "gemv_t", "gemm_nn", "gemm_tn", "gemm_nt", "gemm_tt"};
std::vector<std::string> operations = {"elementwise_1d", "reduce_1d", "elementwise_2d", "reduce_2d_n", "reduce_2d_t", "matrix_product_nn", "matrix_product_tn", "matrix_product_nt", "matrix_product_tt"};
std::vector<std::string> dtype = {"float32", "float64"};
for(auto & operation : operations)
{
@@ -265,15 +265,15 @@ std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::ba
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
for(auto DTYPE : types)
{
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::axpy(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::dot(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::ger(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::gemv_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::gemv_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::gemm_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::gemm_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::gemm_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::gemm_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
}
return res;
}

View File

@@ -161,7 +161,7 @@ namespace isaac
expression_type final_type;
//GEMM
if(symbolic::preset::gemm::args args = symbolic::preset::gemm::check(tree, rootidx)){
if(symbolic::preset::matrix_product::args args = symbolic::preset::matrix_product::check(tree, rootidx)){
final_type = args.type;
}
//Default

View File

@@ -9,7 +9,7 @@ namespace symbolic
namespace preset
{
void gemm::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a)
void matrix_product::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a)
{
//Matrix-Matrix product node
if(tree[rootidx].op.type_family==OPERATOR_GEMM_TYPE_FAMILY)
@@ -46,11 +46,11 @@ void gemm::handle_node(math_expression::container_type const & tree, size_t root
}
}
gemm::args gemm::check(math_expression::container_type const & tree, size_t rootidx)
matrix_product::args matrix_product::check(math_expression::container_type const & tree, size_t rootidx)
{
lhs_rhs_element const * assigned = &tree[rootidx].lhs;
numeric_type dtype = assigned->dtype;
gemm::args result ;
matrix_product::args result ;
if(dtype==INVALID_NUMERIC_TYPE)
return result;
result.alpha = value_scalar(1, dtype);

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]