Code Quality: More sensible names

This commit is contained in:
Philippe Tillet
2015-12-12 18:32:06 -05:00
parent 46dad59e10
commit 042aa070bb
31 changed files with 379 additions and 379 deletions

View File

@@ -23,15 +23,15 @@ enum expression_type
inline expression_type expression_type_from_string(std::string const & name) inline expression_type expression_type_from_string(std::string const & name)
{ {
if(name=="axpy") return AXPY_TYPE; if(name=="elementwise_1d") return AXPY_TYPE;
if(name=="dot") return DOT_TYPE; if(name=="reduce_1d") return DOT_TYPE;
if(name=="ger") return GER_TYPE; if(name=="elementwise_2d") return GER_TYPE;
if(name=="gemv_n") return GEMV_N_TYPE; if(name=="reduce_2d_n") return GEMV_N_TYPE;
if(name=="gemv_t") return GEMV_T_TYPE; if(name=="reduce_2d_t") return GEMV_T_TYPE;
if(name=="gemm_nn") return GEMM_NN_TYPE; if(name=="matrix_product_nn") return GEMM_NN_TYPE;
if(name=="gemm_nt") return GEMM_NT_TYPE; if(name=="matrix_product_nt") return GEMM_NT_TYPE;
if(name=="gemm_tn") return GEMM_TN_TYPE; if(name=="matrix_product_tn") return GEMM_TN_TYPE;
if(name=="gemm_tt") return GEMM_TT_TYPE; if(name=="matrix_product_tt") return GEMM_TT_TYPE;
throw std::invalid_argument("Unrecognized expression: " + name); throw std::invalid_argument("Unrecognized expression: " + name);
} }

View File

@@ -70,7 +70,7 @@ cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using. * before using.
* *
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logelementwise_2d
*/ */
#define cl_APPLE_ContextLoggingFunctions 1 #define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */, extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,

View File

@@ -200,7 +200,7 @@ extern "C" {
/** /**
* CUDA device pointer * CUDA device pointer
* CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform. * CUdeviceptr is defined as an unsigned inteelementwise_2d type whose size matches the size of a pointer on the target platform.
*/ */
#if __CUDA_API_VERSION >= 3020 #if __CUDA_API_VERSION >= 3020
@@ -337,12 +337,12 @@ typedef enum CUoccupancy_flags_enum {
* Array formats * Array formats
*/ */
typedef enum CUarray_format_enum { typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */ CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit inteelementwise_2ds */
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */ CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit inteelementwise_2ds */
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */ CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */ CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */ CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit inteelementwise_2ds */
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */ CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit inteelementwise_2ds */
CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */ CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */
CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */ CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */
} CUarray_format; } CUarray_format;
@@ -558,8 +558,8 @@ typedef enum CUfunction_attribute_enum {
*/ */
typedef enum CUfunc_cache_enum { typedef enum CUfunc_cache_enum {
CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */
CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larelementwise_2d shared memory and smaller L1 cache */
CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larelementwise_2d L1 cache and smaller shared memory */
CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */
} CUfunc_cache; } CUfunc_cache;
@@ -909,7 +909,7 @@ typedef enum cudaError_enum {
/** /**
* \deprecated * \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error * This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to attempt to enable/disable the profiling via ::cuProfilerStart or * to attempt to enable/disable the profiling via ::cuProfilerStart or
* ::cuProfilerStop without initialization. * ::cuProfilerStop without initialization.
*/ */
@@ -917,14 +917,14 @@ typedef enum cudaError_enum {
/** /**
* \deprecated * \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error * This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to call cuProfilerStart() when profiling is already enabled. * to call cuProfilerStart() when profiling is already enabled.
*/ */
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
/** /**
* \deprecated * \deprecated
* This error return is deprecated as of CUDA 5.0. It is no longer an error * This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
* to call cuProfilerStop() when profiling is already disabled. * to call cuProfilerStop() when profiling is already disabled.
*/ */
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
@@ -962,7 +962,7 @@ typedef enum cudaError_enum {
* This indicated that the context being supplied as a parameter to the * This indicated that the context being supplied as a parameter to the
* API call was already the active context. * API call was already the active context.
* \deprecated * \deprecated
* This error return is deprecated as of CUDA 3.2. It is no longer an * This error return is deprecated as of CUDA 3.2. It is no lonelementwise_2d an
* error to attempt to push the active context via ::cuCtxPushCurrent(). * error to attempt to push the active context via ::cuCtxPushCurrent().
*/ */
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
@@ -1163,7 +1163,7 @@ typedef enum cudaError_enum {
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
/** /**
* A device-side assert triggered during kernel execution. The context * A device-side assert trigelementwise_2ded during kernel execution. The context
* cannot be used anymore, and must be destroyed. All existing device * cannot be used anymore, and must be destroyed. All existing device
* memory allocations from this context are invalid and must be * memory allocations from this context are invalid and must be
* reconstructed if the program is to continue using CUDA. * reconstructed if the program is to continue using CUDA.
@@ -1499,24 +1499,24 @@ typedef struct CUDA_TEXTURE_DESC_st {
typedef enum CUresourceViewFormat_enum typedef enum CUresourceViewFormat_enum
{ {
CU_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */ CU_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */ CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */ CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */ CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */ CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */ CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */ CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */ CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */ CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */ CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */ CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */ CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */ CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */ CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */ CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */ CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */ CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */ CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */ CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit inteelementwise_2ds */
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */ CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */ CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */ CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
@@ -1606,7 +1606,7 @@ typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
#define CU_TRSA_OVERRIDE_FORMAT 0x01 #define CU_TRSA_OVERRIDE_FORMAT 0x01
/** /**
* Read the texture as integers rather than promoting the values to floats * Read the texture as inteelementwise_2ds rather than promoting the values to floats
* in the range [0,1]. * in the range [0,1].
* Flag for ::cuTexRefSetFlags() * Flag for ::cuTexRefSetFlags()
*/ */
@@ -1901,7 +1901,7 @@ CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
/** /**
* \brief Returns information about the device * \brief Returns information about the device
* *
* Returns in \p *pi the integer value of the attribute \p attrib on device * Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on device
* \p dev. The supported attributes are: * \p dev. The supported attributes are:
* - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per * - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per
* block; * block;
@@ -2819,7 +2819,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
* violated. This limit can be set smaller than the default or up the maximum * violated. This limit can be set smaller than the default or up the maximum
* launch depth of 24. When setting this limit, keep in mind that additional * launch depth of 24. When setting this limit, keep in mind that additional
* levels of sync depth require the driver to reserve large amounts of device * levels of sync depth require the driver to reserve large amounts of device
* memory which can no longer be used for user allocations. If these * memory which can no lonelementwise_2d be used for user allocations. If these
* reservations of device memory fail, ::cuCtxSetLimit will return * reservations of device memory fail, ::cuCtxSetLimit will return
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value. * ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
* This limit is only applicable to devices of compute capability 3.5 and * This limit is only applicable to devices of compute capability 3.5 and
@@ -2836,7 +2836,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
* the default (2048 launches) are needed for a module using the device * the default (2048 launches) are needed for a module using the device
* runtime, this limit can be increased. Keep in mind that being able to * runtime, this limit can be increased. Keep in mind that being able to
* sustain additional pending launches will require the driver to reserve * sustain additional pending launches will require the driver to reserve
* larger amounts of device memory upfront which can no longer be used for * larelementwise_2d amounts of device memory upfront which can no lonelementwise_2d be used for
* allocations. If these reservations fail, ::cuCtxSetLimit will return * allocations. If these reservations fail, ::cuCtxSetLimit will return
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value. * ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
* This limit is only applicable to devices of compute capability 3.5 and * This limit is only applicable to devices of compute capability 3.5 and
@@ -2921,8 +2921,8 @@ CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
* *
* The supported cache configurations are: * The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory * - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
* *
* \param pconfig - Returned cache configuration * \param pconfig - Returned cache configuration
@@ -2971,8 +2971,8 @@ CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);
* *
* The supported cache configurations are: * The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory * - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
* *
* \param config - Requested cache configuration * \param config - Requested cache configuration
@@ -3054,7 +3054,7 @@ CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig);
* *
* Changing the shared memory bank size will not increase shared memory usage * Changing the shared memory bank size will not increase shared memory usage
* or affect occupancy of kernels, but may have major effects on performance. * or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory, * Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
* but will change what kinds of accesses to shared memory will result in bank * but will change what kinds of accesses to shared memory will result in bank
* conflicts. * conflicts.
* *
@@ -7358,7 +7358,7 @@ CUresult CUDAAPI cuPointerGetAttribute(void *data, CUpointer_attribute attribute
* See further documentation in the section titled "API synchronization behavior" * See further documentation in the section titled "API synchronization behavior"
* to learn more about cases when synchronous memory operations can * to learn more about cases when synchronous memory operations can
* exhibit asynchronous behavior. * exhibit asynchronous behavior.
* \p value will be considered as a pointer to an unsigned integer to which this attribute is to be set. * \p value will be considered as a pointer to an unsigned inteelementwise_2d to which this attribute is to be set.
* *
* \param value - Pointer to memory containing the value to be set * \param value - Pointer to memory containing the value to be set
* \param attribute - Pointer attribute to set * \param attribute - Pointer attribute to set
@@ -7534,7 +7534,7 @@ CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, unsigned int fla
* See ::cuStreamCreateWithPriority for details about priority clamping. * See ::cuStreamCreateWithPriority for details about priority clamping.
* *
* \param hStream - Handle to the stream to be queried * \param hStream - Handle to the stream to be queried
* \param priority - Pointer to a signed integer in which the stream's priority is returned * \param priority - Pointer to a signed inteelementwise_2d in which the stream's priority is returned
* \return * \return
* ::CUDA_SUCCESS, * ::CUDA_SUCCESS,
* ::CUDA_ERROR_DEINITIALIZED, * ::CUDA_ERROR_DEINITIALIZED,
@@ -7560,7 +7560,7 @@ CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority);
* and return the flags in \p flags. * and return the flags in \p flags.
* *
* \param hStream - Handle to the stream to be queried * \param hStream - Handle to the stream to be queried
* \param flags - Pointer to an unsigned integer in which the stream's flags are returned * \param flags - Pointer to an unsigned inteelementwise_2d in which the stream's flags are returned
* The value returned in \p flags is a logical 'OR' of all flags that * The value returned in \p flags is a logical 'OR' of all flags that
* were used while creating this stream. See ::cuStreamCreate for the list * were used while creating this stream. See ::cuStreamCreate for the list
* of valid flags * of valid flags
@@ -8104,7 +8104,7 @@ CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUeven
/** /**
* \brief Returns information about a function * \brief Returns information about a function
* *
* Returns in \p *pi the integer value of the attribute \p attrib on the kernel * Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on the kernel
* given by \p hfunc. The supported attributes are: * given by \p hfunc. The supported attributes are:
* - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads * - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads
* per block, beyond which a launch of the function would fail. This number * per block, beyond which a launch of the function would fail. This number
@@ -8175,8 +8175,8 @@ CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunc
* *
* The supported cache configurations are: * The supported cache configurations are:
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default) * - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache * - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory * - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory * - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
* *
* \param hfunc - Kernel to configure cache for * \param hfunc - Kernel to configure cache for
@@ -8215,7 +8215,7 @@ CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
* *
* Changing the shared memory bank size will not increase shared memory usage * Changing the shared memory bank size will not increase shared memory usage
* or affect occupancy of kernels, but may have major effects on performance. * or affect occupancy of kernels, but may have major effects on performance.
* Larger bank sizes will allow for greater potential bandwidth to shared memory, * Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
* but will change what kinds of accesses to shared memory will result in bank * but will change what kinds of accesses to shared memory will result in bank
* conflicts. * conflicts.
* *
@@ -8491,11 +8491,11 @@ CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes); CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
/** /**
* \brief Adds an integer parameter to the function's argument list * \brief Adds an inteelementwise_2d parameter to the function's argument list
* *
* \deprecated * \deprecated
* *
* Sets an integer parameter that will be specified the next time the * Sets an inteelementwise_2d parameter that will be specified the next time the
* kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset. * kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset.
* *
* \param hfunc - Kernel to add parameter to * \param hfunc - Kernel to add parameter to
@@ -9299,8 +9299,8 @@ CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAnis
* returned through the texture reference \p hTexRef. The valid flags are: * returned through the texture reference \p hTexRef. The valid flags are:
* *
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
* having the texture promote integer data to floating point data in the * having the texture promote inteelementwise_2d data to floating point data in the
* range [0, 1]. Note that texture with 32-bit integer format * range [0, 1]. Note that texture with 32-bit inteelementwise_2d format
* would not be promoted, regardless of whether or not this * would not be promoted, regardless of whether or not this
* flag is specified; * flag is specified;
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the
@@ -9859,8 +9859,8 @@ CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
* This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR. * This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR.
* *
* - ::CUDA_TEXTURE_DESC::flags can be any combination of the following: * - ::CUDA_TEXTURE_DESC::flags can be any combination of the following:
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote integer data to floating point data in the * - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote inteelementwise_2d data to floating point data in the
* range [0, 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified. * range [0, 1]. Note that texture with 32-bit inteelementwise_2d format would not be promoted, regardless of whether or not this flag is specified.
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is * - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is
* the width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note * the width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note
* that for CUDA mipmapped arrays, this flag has to be set. * that for CUDA mipmapped arrays, this flag has to be set.

View File

@@ -89,46 +89,46 @@ protected:
* *
* Maps prod(matrix_expression, matrix_expression) * Maps prod(matrix_expression, matrix_expression)
*/ */
class mapped_gemm : public mapped_object, public binary_leaf class mapped_matrix_product : public mapped_object, public binary_leaf
{ {
public: public:
mapped_gemm(std::string const & scalartype, unsigned int id, node_info info); mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info);
}; };
/** @brief Reduction /** @brief Reduction
* *
* Base class for mapping a dot * Base class for mapping a reduce_1d
*/ */
class mapped_dot : public mapped_object, public binary_leaf class mapped_reduce : public mapped_object, public binary_leaf
{ {
public: public:
mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key); mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key);
size_t root_idx() const; size_t root_idx() const;
isaac::math_expression const & math_expression() const; isaac::math_expression const & math_expression() const;
math_expression::node root_node() const; math_expression::node root_node() const;
bool is_index_dot() const; bool is_index_reduction() const;
op_element root_op() const; op_element root_op() const;
}; };
/** @brief Scalar dot /** @brief 1D Reduction
* *
* Maps a scalar dot (max, min, argmax, inner_prod, etc..) * Maps a 1d reduction (max, min, argmax, inner_prod, etc..)
*/ */
class mapped_scalar_dot : public mapped_dot class mapped_reduce_1d : public mapped_reduce
{ {
public: public:
mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info); mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info);
}; };
/** @brief Vector dot /** @brief 2D
* *
* Maps a row-wise dot (max, min, argmax, matrix-vector product, etc..) * Maps a 2D reduction (max, min, argmax, matrix-vector product, etc..)
*/ */
class mapped_gemv : public mapped_dot class mapped_reduce_2d : public mapped_reduce
{ {
public: public:
mapped_gemv(std::string const & scalartype, unsigned int id, node_info info); mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info);
}; };
/** @brief Host scalar /** @brief Host scalar

View File

@@ -13,8 +13,8 @@ namespace detail
{ {
bool is_node_leaf(op_element const & op); bool is_node_leaf(op_element const & op);
bool is_scalar_dot(math_expression::node const & node); bool is_scalar_reduce_1d(math_expression::node const & node);
bool is_vector_dot(math_expression::node const & node); bool is_vector_reduce_1d(math_expression::node const & node);
bool is_assignment(op_element const & op); bool is_assignment(op_element const & op);
bool is_elementwise_operator(op_element const & op); bool is_elementwise_operator(op_element const & op);
bool is_elementwise_function(op_element const & op); bool is_elementwise_function(op_element const & op);

View File

@@ -8,22 +8,22 @@ namespace isaac
namespace templates namespace templates
{ {
class axpy_parameters : public base::parameters_type class elementwise_1d_parameters : public base::parameters_type
{ {
public: public:
axpy_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy); elementwise_1d_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy);
unsigned int num_groups; unsigned int num_groups;
fetching_policy_type fetching_policy; fetching_policy_type fetching_policy;
}; };
class axpy : public base_impl<axpy, axpy_parameters> class elementwise_1d : public base_impl<elementwise_1d, elementwise_1d_parameters>
{ {
private: private:
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const; virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const; std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const;
public: public:
axpy(axpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT); elementwise_1d(elementwise_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
axpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT); elementwise_1d(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const; std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &); void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
}; };

View File

@@ -9,24 +9,24 @@ namespace isaac
namespace templates namespace templates
{ {
class ger_parameters : public base::parameters_type class elementwise_2d_parameters : public base::parameters_type
{ {
public: public:
ger_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy); elementwise_2d_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
unsigned int num_groups_0; unsigned int num_groups_0;
unsigned int num_groups_1; unsigned int num_groups_1;
fetching_policy_type fetching_policy; fetching_policy_type fetching_policy;
}; };
class ger : public base_impl<ger, ger_parameters> class elementwise_2d : public base_impl<elementwise_2d, elementwise_2d_parameters>
{ {
private: private:
int is_invalid_impl(driver::Device const &, math_expression const &) const; int is_invalid_impl(driver::Device const &, math_expression const &) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const; std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
public: public:
ger(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT); elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
ger(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT); elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const; std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &); void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
}; };

View File

@@ -10,9 +10,9 @@ namespace isaac
namespace templates namespace templates
{ {
struct gemm_parameters : public base::parameters_type struct matrix_product_parameters : public base::parameters_type
{ {
gemm_parameters(unsigned int simd_width matrix_product_parameters(unsigned int simd_width
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D , unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
, unsigned int ms, unsigned int ks, unsigned int ns , unsigned int ms, unsigned int ks, unsigned int ns
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy , fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
@@ -38,7 +38,7 @@ struct gemm_parameters : public base::parameters_type
bool unroll_outer; bool unroll_outer;
}; };
class gemm : public base_impl<gemm, gemm_parameters> class matrix_product : public base_impl<matrix_product, matrix_product_parameters>
{ {
private: private:
unsigned int temporary_workspace(math_expression const & expressions) const; unsigned int temporary_workspace(math_expression const & expressions) const;
@@ -48,9 +48,9 @@ private:
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const &) const; std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const &) const;
void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, array_base const & A, array_base const & B, array_base const & C, void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, array_base const & A, array_base const & B, array_base const & C,
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, execution_options_type const & options); value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, execution_options_type const & options);
std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::gemm::args &arguments) const; std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::matrix_product::args &arguments) const;
public: public:
gemm(gemm::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans); matrix_product(matrix_product::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
std::vector<int_t> input_sizes(math_expression const & expressions) const; std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &ctr); void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &ctr);
private: private:
@@ -60,36 +60,36 @@ private:
bool check_bounds_; bool check_bounds_;
}; };
class gemm_nn : public gemm class matrix_product_nn : public matrix_product
{ {
public: public:
gemm_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D matrix_product_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch , int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false); , int_t lfetch0, int_t lfetch1, bool check_bound = false);
}; };
class gemm_tn : public gemm class matrix_product_tn : public matrix_product
{ {
public: public:
gemm_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D matrix_product_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch , int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false); , int_t lfetch0, int_t lfetch1, bool check_bound = false);
}; };
class gemm_nt : public gemm class matrix_product_nt : public matrix_product
{ {
public: public:
gemm_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D matrix_product_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch , int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false); , int_t lfetch0, int_t lfetch1, bool check_bound = false);
}; };
class gemm_tt : public gemm class matrix_product_tt : public matrix_product
{ {
public: public:
gemm_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D matrix_product_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch , int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound = false); , int_t lfetch0, int_t lfetch1, bool check_bound = false);
}; };

View File

@@ -8,27 +8,27 @@ namespace isaac
namespace templates namespace templates
{ {
struct dot_parameters : public base::parameters_type struct reduce_1d_parameters : public base::parameters_type
{ {
dot_parameters(unsigned int _simd_width, reduce_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups, unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy); fetching_policy_type _fetching_policy);
unsigned int num_groups; unsigned int num_groups;
fetching_policy_type fetching_policy; fetching_policy_type fetching_policy;
}; };
class dot : public base_impl<dot, dot_parameters> class reduce_1d : public base_impl<reduce_1d, reduce_1d_parameters>
{ {
private: private:
unsigned int lmem_usage(math_expression const & expressions) const; unsigned int lmem_usage(math_expression const & expressions) const;
int is_invalid_impl(driver::Device const &, math_expression const &) const; int is_invalid_impl(driver::Device const &, math_expression const &) const;
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs, inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const; std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const;
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const; std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
public: public:
dot(dot::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT); reduce_1d(reduce_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
dot(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT); reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
std::vector<int_t> input_sizes(math_expression const & expressions) const; std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &); void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
private: private:

View File

@@ -10,9 +10,9 @@ namespace isaac
{ {
namespace templates namespace templates
{ {
struct gemv_parameters : public base::parameters_type struct reduce_2d_parameters : public base::parameters_type
{ {
gemv_parameters(unsigned int _simd_width, reduce_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy); unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy);
unsigned int num_groups_0; unsigned int num_groups_0;
@@ -21,15 +21,15 @@ struct gemv_parameters : public base::parameters_type
}; };
class gemv : public base_impl<gemv, gemv_parameters> class reduce_2d : public base_impl<reduce_2d, reduce_2d_parameters>
{ {
protected: protected:
enum dot_type enum reduce_1d_type
{ {
REDUCE_ROWS, REDUCE_ROWS,
REDUCE_COLUMNS REDUCE_COLUMNS
}; };
gemv(gemv::parameters_type const & , dot_type, binding_policy_t); reduce_2d(reduce_2d::parameters_type const & , reduce_1d_type, binding_policy_t);
private: private:
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const; virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
unsigned int lmem_usage(math_expression const &) const; unsigned int lmem_usage(math_expression const &) const;
@@ -38,21 +38,21 @@ public:
virtual std::vector<int_t> input_sizes(math_expression const & expressions) const; virtual std::vector<int_t> input_sizes(math_expression const & expressions) const;
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &); void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
private: private:
dot_type dot_type_; reduce_1d_type reduce_1d_type_;
}; };
class gemv_n : public gemv class reduce_2d_n : public reduce_2d
{ {
public: public:
gemv_n(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT); reduce_2d_n(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT); reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
}; };
class gemv_t : public gemv class reduce_2d_t : public reduce_2d
{ {
public: public:
gemv_t(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT); reduce_2d_t(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT); reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
}; };
} }

View File

@@ -13,7 +13,7 @@ namespace preset
{ {
class gemm class matrix_product
{ {
public: public:

View File

@@ -115,7 +115,7 @@ public:
The user buffer will not be deallocated when this allocator is destructed. The user buffer will not be deallocated when this allocator is destructed.
\param buffer User supplied buffer. \param buffer User supplied buffer.
\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). \param size Size of the buffer in bytes. It must at least larelementwise_2d than sizeof(ChunkHeader).
\param chunkSize The size of memory chunk. The default is kDefaultChunkSize. \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
\param baseAllocator The allocator for allocating memory chunks. \param baseAllocator The allocator for allocating memory chunks.
*/ */

View File

@@ -128,7 +128,7 @@ public:
typedef typename BaseType::pointer Pointer; typedef typename BaseType::pointer Pointer;
//! Reference to (const) GenericMember //! Reference to (const) GenericMember
typedef typename BaseType::reference Reference; typedef typename BaseType::reference Reference;
//! Signed integer type (e.g. \c ptrdiff_t) //! Signed inteelementwise_2d type (e.g. \c ptrdiff_t)
typedef typename BaseType::difference_type DifferenceType; typedef typename BaseType::difference_type DifferenceType;
//! Default constructor (singular value) //! Default constructor (singular value)
@@ -265,7 +265,7 @@ struct GenericStringRef {
\tparam N length of the string, automatically inferred \tparam N length of the string, automatically inferred
\param str Constant character array, lifetime assumed to be longer \param str Constant character array, lifetime assumed to be lonelementwise_2d
than the use of the string in e.g. a GenericValue than the use of the string in e.g. a GenericValue
\post \ref s == str \post \ref s == str
@@ -289,7 +289,7 @@ struct GenericStringRef {
\see StringRef(const CharType*) \see StringRef(const CharType*)
\param str Constant character pointer, lifetime assumed to be longer \param str Constant character pointer, lifetime assumed to be lonelementwise_2d
than the use of the string in e.g. a GenericValue than the use of the string in e.g. a GenericValue
\post \ref s == str \post \ref s == str
@@ -305,7 +305,7 @@ struct GenericStringRef {
: s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); } : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); }
//! Create constant string reference from pointer and length //! Create constant string reference from pointer and length
/*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue /*! \param str constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\param len length of the string, excluding the trailing NULL terminator \param len length of the string, excluding the trailing NULL terminator
\post \ref s == str && \ref length == len \post \ref s == str && \ref length == len
@@ -334,7 +334,7 @@ private:
value in a JSON GenericValue object, if the string's lifetime is known value in a JSON GenericValue object, if the string's lifetime is known
to be valid long enough. to be valid long enough.
\tparam CharType Character type of the string \tparam CharType Character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue \param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\return GenericStringRef string reference object \return GenericStringRef string reference object
\relatesalso GenericStringRef \relatesalso GenericStringRef
@@ -355,7 +355,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str) {
supports string containing null characters. supports string containing null characters.
\tparam CharType character type of the string \tparam CharType character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue \param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\param length The length of source string. \param length The length of source string.
\return GenericStringRef string reference object \return GenericStringRef string reference object
\relatesalso GenericStringRef \relatesalso GenericStringRef
@@ -373,7 +373,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length)
to be valid long enough. to be valid long enough.
\tparam CharType character type of the string \tparam CharType character type of the string
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue \param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
\return GenericStringRef string reference object \return GenericStringRef string reference object
\relatesalso GenericStringRef \relatesalso GenericStringRef
\note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING. \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
@@ -696,7 +696,7 @@ public:
case kNumberType: case kNumberType:
if (IsDouble() || rhs.IsDouble()) if (IsDouble() || rhs.IsDouble())
return GetDouble() == rhs.GetDouble(); // May convert one operand from integer to double. return GetDouble() == rhs.GetDouble(); // May convert one operand from inteelementwise_2d to double.
else else
return data_.n.u64 == rhs.data_.n.u64; return data_.n.u64 == rhs.data_.n.u64;
@@ -1482,7 +1482,7 @@ private:
inline SizeType GetLength() const { return (SizeType)(MaxSize - str[LenPos]); } inline SizeType GetLength() const { return (SizeType)(MaxSize - str[LenPos]); }
}; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
// By using proper binary layout, retrieval of different integer types do not need conversions. // By using proper binary layout, retrieval of different inteelementwise_2d types do not need conversions.
union Number { union Number {
#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN #if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
struct I { struct I {

View File

@@ -20,7 +20,7 @@
// This is a C++ header-only implementation of Grisu2 algorithm from the publication: // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
// integers." ACM Sigplan Notices 45.6 (2010): 233-243. // inteelementwise_2ds." ACM Sigplan Notices 45.6 (2010): 233-243.
#ifndef RAPIDJSON_DTOA_ #ifndef RAPIDJSON_DTOA_
#define RAPIDJSON_DTOA_ #define RAPIDJSON_DTOA_

View File

@@ -24,7 +24,7 @@
namespace rapidjson { namespace rapidjson {
namespace internal { namespace internal {
//! Computes integer powers of 10 in double (10.0^n). //! Computes inteelementwise_2d powers of 10 in double (10.0^n).
/*! This function uses lookup table for fast and accurate results. /*! This function uses lookup table for fast and accurate results.
\param n non-negative exponent. Must <= 308. \param n non-negative exponent. Must <= 308.
\return 10.0^n \return 10.0^n

View File

@@ -53,9 +53,9 @@
/*! \def RAPIDJSON_NO_INT64DEFINE /*! \def RAPIDJSON_NO_INT64DEFINE
\ingroup RAPIDJSON_CONFIG \ingroup RAPIDJSON_CONFIG
\brief Use external 64-bit integer types. \brief Use external 64-bit inteelementwise_2d types.
RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types RapidJSON requires the 64-bit inteelementwise_2d types \c int64_t and \c uint64_t types
to be available at global scope. to be available at global scope.
If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to
@@ -171,11 +171,11 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// RAPIDJSON_UINT64_C2 // RAPIDJSON_UINT64_C2
//! Construct a 64-bit literal by a pair of 32-bit integer. //! Construct a 64-bit literal by a pair of 32-bit inteelementwise_2d.
/*! /*!
64-bit literal with or without ULL suffix is prone to compiler warnings. 64-bit literal with or without ULL suffix is prone to compiler warnings.
UINT64_C() is C macro which cause compilation problems. UINT64_C() is C macro which cause compilation problems.
Use this macro to define 64-bit constants by a pair of 32-bit integer. Use this macro to define 64-bit constants by a pair of 32-bit inteelementwise_2d.
*/ */
#ifndef RAPIDJSON_UINT64_C2 #ifndef RAPIDJSON_UINT64_C2
#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32)) #define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))

View File

@@ -792,7 +792,7 @@ private:
} }
} }
// Force double for big integer // Force double for big inteelementwise_2d
if (useDouble) { if (useDouble) {
while (s.Peek() >= '0' && s.Peek() <= '9') { while (s.Peek() >= '0' && s.Peek() <= '9') {
if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0 if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0

View File

@@ -117,23 +117,23 @@ std::string binary_leaf::evaluate_recursive(leaf_t leaf, std::map<std::string, s
} }
mapped_gemm::mapped_gemm(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "gemm"), binary_leaf(info) { } mapped_matrix_product::mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "matrix_product"), binary_leaf(info) { }
// //
mapped_dot::mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) : mapped_reduce::mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) :
mapped_object(scalartype, id, type_key), binary_leaf(info) mapped_object(scalartype, id, type_key), binary_leaf(info)
{ } { }
size_t mapped_dot::root_idx() const size_t mapped_reduce::root_idx() const
{ return info_.root_idx; } { return info_.root_idx; }
isaac::math_expression const & mapped_dot::math_expression() const isaac::math_expression const & mapped_reduce::math_expression() const
{ return *info_.math_expression; } { return *info_.math_expression; }
math_expression::node mapped_dot::root_node() const math_expression::node mapped_reduce::root_node() const
{ return math_expression().tree()[root_idx()]; } { return math_expression().tree()[root_idx()]; }
bool mapped_dot::is_index_dot() const bool mapped_reduce::is_index_reduction() const
{ {
op_element const & op = root_op(); op_element const & op = root_op();
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
@@ -142,17 +142,17 @@ bool mapped_dot::is_index_dot() const
|| op.type==OPERATOR_ELEMENT_ARGMIN_TYPE; || op.type==OPERATOR_ELEMENT_ARGMIN_TYPE;
} }
op_element mapped_dot::root_op() const op_element mapped_reduce::root_op() const
{ {
return info_.math_expression->tree()[info_.root_idx].op; return info_.math_expression->tree()[info_.root_idx].op;
} }
// //
mapped_scalar_dot::mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "scalar_dot"){ } mapped_reduce_1d::mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "scalar_reduce_1d"){ }
// //
mapped_gemv::mapped_gemv(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "gemv") { } mapped_reduce_2d::mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "reduce_2d") { }
// //
void mapped_host_scalar::preprocess(std::string & str) const void mapped_host_scalar::preprocess(std::string & str) const

View File

@@ -14,12 +14,12 @@ namespace detail
bool is_scalar_dot(math_expression::node const & node) bool is_scalar_reduce_1d(math_expression::node const & node)
{ {
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY; return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY;
} }
bool is_vector_dot(math_expression::node const & node) bool is_vector_reduce_1d(math_expression::node const & node)
{ {
return node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY return node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY; || node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY;

View File

@@ -5,11 +5,11 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/tuple.h" #include "isaac/tuple.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
#include "isaac/kernels/templates/axpy.h" #include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/templates/dot.h" #include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/templates/ger.h" #include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/kernels/templates/gemv.h" #include "isaac/kernels/templates/reduce_2d.h"
#include "isaac/kernels/templates/gemm.h" #include "isaac/kernels/templates/matrix_product.h"
#include "isaac/kernels/templates/base.h" #include "isaac/kernels/templates/base.h"
#include "isaac/kernels/parse.h" #include "isaac/kernels/parse.h"
#include "isaac/exception/unknown_datatype.h" #include "isaac/exception/unknown_datatype.h"
@@ -150,11 +150,11 @@ int base_impl<TType, PType>::is_invalid(math_expression const & expressions, dr
return is_invalid_impl(device, expressions); return is_invalid_impl(device, expressions);
} }
template class base_impl<axpy, axpy_parameters>; template class base_impl<elementwise_1d, elementwise_1d_parameters>;
template class base_impl<dot, dot_parameters>; template class base_impl<reduce_1d, reduce_1d_parameters>;
template class base_impl<ger, ger_parameters>; template class base_impl<elementwise_2d, elementwise_2d_parameters>;
template class base_impl<gemv, gemv_parameters>; template class base_impl<reduce_2d, reduce_2d_parameters>;
template class base_impl<gemm, gemm_parameters>; template class base_impl<matrix_product, matrix_product_parameters>;
} }
} }

View File

@@ -2,7 +2,7 @@
#include <cstring> #include <cstring>
#include <algorithm> #include <algorithm>
#include "isaac/kernels/templates/axpy.h" #include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
#include "isaac/driver/backend.h" #include "isaac/driver/backend.h"
@@ -18,7 +18,7 @@ namespace isaac
namespace templates namespace templates
{ {
axpy_parameters::axpy_parameters(unsigned int _simd_width, elementwise_1d_parameters::elementwise_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups, unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy) : fetching_policy_type _fetching_policy) :
base::parameters_type(_simd_width, _group_size, 1, 1), num_groups(_num_groups), fetching_policy(_fetching_policy) base::parameters_type(_simd_width, _group_size, 1, 1), num_groups(_num_groups), fetching_policy(_fetching_policy)
@@ -26,14 +26,14 @@ axpy_parameters::axpy_parameters(unsigned int _simd_width,
} }
int axpy::is_invalid_impl(driver::Device const &, math_expression const &) const int elementwise_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
{ {
if (p_.fetching_policy==FETCH_FROM_LOCAL) if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::string axpy::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const std::string elementwise_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
{ {
driver::backend_type backend = device.backend(); driver::backend_type backend = device.backend();
std::string _size_t = size_type(device); std::string _size_t = size_type(device);
@@ -55,7 +55,7 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
} }
stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl; stream << KernelPrefix(backend) << " void " << "elementwise_1d" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
@@ -174,23 +174,23 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
return stream.str(); return stream.str();
} }
axpy::axpy(axpy_parameters const & parameters, elementwise_1d::elementwise_1d(elementwise_1d_parameters const & parameters,
binding_policy_t binding_policy) : binding_policy_t binding_policy) :
base_impl<axpy, axpy_parameters>(parameters, binding_policy) base_impl<elementwise_1d, elementwise_1d_parameters>(parameters, binding_policy)
{} {}
axpy::axpy(unsigned int simd, unsigned int ls, unsigned int ng, elementwise_1d::elementwise_1d(unsigned int simd, unsigned int ls, unsigned int ng,
fetching_policy_type fetch, binding_policy_t bind): fetching_policy_type fetch, binding_policy_t bind):
base_impl<axpy, axpy_parameters>(axpy_parameters(simd,ls,ng,fetch), bind) base_impl<elementwise_1d, elementwise_1d_parameters>(elementwise_1d_parameters(simd,ls,ng,fetch), bind)
{} {}
std::vector<int_t> axpy::input_sizes(math_expression const & expressions) const std::vector<int_t> elementwise_1d::input_sizes(math_expression const & expressions) const
{ {
return {expressions.shape().max()}; return {expressions.shape().max()};
} }
void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control) void elementwise_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{ {
math_expression const & expressions = control.x(); math_expression const & expressions = control.x();
//Size //Size
@@ -202,7 +202,7 @@ void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program
return; return;
} }
//Kernel //Kernel
std::string name = "axpy"; std::string name = "elementwise_1d";
name += suffix; name += suffix;
driver::Kernel kernel(program, name.c_str()); driver::Kernel kernel(program, name.c_str());
//NDRange //NDRange

View File

@@ -1,6 +1,6 @@
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include "isaac/kernels/templates/ger.h" #include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/symbolic/io.h" #include "isaac/symbolic/io.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
@@ -13,14 +13,14 @@ namespace isaac
namespace templates namespace templates
{ {
ger_parameters::ger_parameters(unsigned int _simd_width, elementwise_2d_parameters::elementwise_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1, unsigned int _num_groups_0, unsigned int _num_groups_1,
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ } fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
int ger::is_invalid_impl(driver::Device const &, math_expression const &) const int elementwise_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
{ {
if (p_.simd_width>1) if (p_.simd_width>1)
return TEMPLATE_INVALID_SIMD_WIDTH; return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -29,7 +29,7 @@ int ger::is_invalid_impl(driver::Device const &, math_expression const &) const
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::string ger::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const std::string elementwise_2d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
{ {
kernel_generation_stream stream; kernel_generation_stream stream;
std::string _size_t = size_type(device); std::string _size_t = size_type(device);
@@ -45,7 +45,7 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
} }
stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl; stream << KernelPrefix(backend) << " void elementwise_1d" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
@@ -105,25 +105,25 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
return stream.str(); return stream.str();
} }
ger::ger(parameters_type const & parameters, binding_policy_t binding_policy) : elementwise_2d::elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy) :
base_impl<ger, ger_parameters>(parameters, binding_policy){ } base_impl<elementwise_2d, elementwise_2d_parameters>(parameters, binding_policy){ }
ger::ger(unsigned int simd, unsigned int ls1, unsigned int ls2, elementwise_2d::elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2,
unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch,
binding_policy_t bind): binding_policy_t bind):
base_impl<ger, ger_parameters>(ger_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind) base_impl<elementwise_2d, elementwise_2d_parameters>(elementwise_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
{} {}
std::vector<int_t> ger::input_sizes(math_expression const & expression) const std::vector<int_t> elementwise_2d::input_sizes(math_expression const & expression) const
{ {
std::pair<int_t, int_t> size = matrix_size(expression.tree(), lhs_most(expression.tree(), expression.root())); std::pair<int_t, int_t> size = matrix_size(expression.tree(), lhs_most(expression.tree(), expression.root()));
return {size.first, size.second}; return {size.first, size.second};
} }
void ger::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control) void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control)
{ {
math_expression const & expressions = control.x(); math_expression const & expressions = control.x();
std::string name = "axpy"; std::string name = "elementwise_1d";
name +=suffix; name +=suffix;
driver::Kernel kernel(program, name.c_str()); driver::Kernel kernel(program, name.c_str());
driver::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1); driver::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);

View File

@@ -1,5 +1,5 @@
#include "isaac/array.h" #include "isaac/array.h"
#include "isaac/kernels/templates/gemm.h" #include "isaac/kernels/templates/matrix_product.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
#include "isaac/symbolic/preset.h" #include "isaac/symbolic/preset.h"
#include "isaac/exception/operation_not_supported.h" #include "isaac/exception/operation_not_supported.h"
@@ -15,7 +15,7 @@ namespace isaac
namespace templates namespace templates
{ {
gemm_parameters::gemm_parameters(unsigned int simd_width matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D , unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
, unsigned int ms, unsigned int ks, unsigned int ns , unsigned int ms, unsigned int ks, unsigned int ns
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy , fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
@@ -27,7 +27,7 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
} }
unsigned int gemm::lmem_usage(math_expression const & expression) const unsigned int matrix_product::lmem_usage(math_expression const & expression) const
{ {
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype; numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
unsigned int N = 0; unsigned int N = 0;
@@ -36,7 +36,7 @@ unsigned int gemm::lmem_usage(math_expression const & expression) const
return N*size_of(numeric_t); return N*size_of(numeric_t);
} }
unsigned int gemm::registers_usage(math_expression const & expression) const unsigned int matrix_product::registers_usage(math_expression const & expression) const
{ {
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype; numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
@@ -44,7 +44,7 @@ unsigned int gemm::registers_usage(math_expression const & expression) const
return N*size_of(numeric_t); return N*size_of(numeric_t);
} }
unsigned int gemm::temporary_workspace(math_expression const & expressions) const unsigned int matrix_product::temporary_workspace(math_expression const & expressions) const
{ {
std::vector<int_t> MNK = input_sizes(expressions); std::vector<int_t> MNK = input_sizes(expressions);
int_t M = MNK[0]; int_t N = MNK[1]; int_t M = MNK[0]; int_t N = MNK[1];
@@ -53,7 +53,7 @@ unsigned int gemm::temporary_workspace(math_expression const & expressions) cons
return 0; return 0;
} }
int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const int matrix_product::is_invalid_impl(driver::Device const &, math_expression const &) const
{ {
// if(device.vendor()==driver::Device::Vendor::NVIDIA && p_.simd_width > 1) // if(device.vendor()==driver::Device::Vendor::NVIDIA && p_.simd_width > 1)
// return TEMPLATE_INVALID_SIMD_WIDTH; // return TEMPLATE_INVALID_SIMD_WIDTH;
@@ -103,7 +103,7 @@ int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
std::string gemm::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const std::string matrix_product::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const
{ {
using std::string; using std::string;
using tools::to_string; using tools::to_string;
@@ -132,10 +132,10 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
////////////////// //////////////////
/// DECLARATIONS /// DECLARATIONS
/// ////////////// /// //////////////
std::string gemm_name = "gemm"; std::string matrix_product_name = "matrix_product";
std::string reduce_name = "reduce"; std::string reduce_name = "reduce";
gemm_name += suffix; matrix_product_name += suffix;
reduce_name += suffix; reduce_name += suffix;
switch(backend) switch(backend)
@@ -146,7 +146,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
} }
stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, " stream << KernelPrefix(backend) << " void " << matrix_product_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
<< Global(backend) << " " << sdtype << "* C, " << _size_t << " ldc," << _size_t << " offc," << _size_t << " Cstride1, " << Global(backend) << " " << sdtype << "* C, " << _size_t << " ldc," << _size_t << " offc," << _size_t << " Cstride1, "
<< sdtype << " alpha," << sdtype << " alpha,"
<< Global(backend) << " " << sdtype << "* A, " << _size_t << " lda," << _size_t << " offa," << _size_t << " Astride1," << Global(backend) << " " << sdtype << "* A, " << _size_t << " lda," << _size_t << " offa," << _size_t << " Astride1,"
@@ -572,7 +572,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
#undef VST0RE #undef VST0RE
} }
void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K, void matrix_product::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K,
array_base const & A, array_base const & B, array_base const & C, array_base const & A, array_base const & B, array_base const & C,
value_scalar const & alpha, value_scalar const & beta, value_scalar const & alpha, value_scalar const & beta,
driver::Program const & program, std::string const & suffix, execution_options_type const & options) driver::Program const & program, std::string const & suffix, execution_options_type const & options)
@@ -582,53 +582,53 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
if(M==0 || N==0 || K==0) if(M==0 || N==0 || K==0)
return; return;
std::string gemm_name = "gemm"; std::string matrix_product_name = "matrix_product";
std::string reduce_name = "reduce"; std::string reduce_name = "reduce";
gemm_name += suffix; matrix_product_name += suffix;
reduce_name += suffix; reduce_name += suffix;
driver::Kernel gemm(program, gemm_name.c_str()); driver::Kernel matrix_product(program, matrix_product_name.c_str());
driver::NDRange local(p_.local_size_0, p_.local_size_1, 1); driver::NDRange local(p_.local_size_0, p_.local_size_1, 1);
driver::NDRange global(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1), p_.depth); driver::NDRange global(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1), p_.depth);
unsigned int current_arg = 0; unsigned int current_arg = 0;
bind_independent binder; bind_independent binder;
set_arguments_functor helper(binder, current_arg, gemm); set_arguments_functor helper(binder, current_arg, matrix_product);
driver::Buffer& workspace = driver::backend::workspaces::get(options.queue(C.context())); driver::Buffer& workspace = driver::backend::workspaces::get(options.queue(C.context()));
gemm.setSizeArg(current_arg++, M); matrix_product.setSizeArg(current_arg++, M);
gemm.setSizeArg(current_arg++, N); matrix_product.setSizeArg(current_arg++, N);
gemm.setSizeArg(current_arg++, K); matrix_product.setSizeArg(current_arg++, K);
if(p_.depth==1) if(p_.depth==1)
{ {
gemm.setArg(current_arg++,C.data()); matrix_product.setArg(current_arg++,C.data());
gemm.setSizeArg(current_arg++, C.stride()[1]); matrix_product.setSizeArg(current_arg++, C.stride()[1]);
gemm.setSizeArg(current_arg++, C.start()); matrix_product.setSizeArg(current_arg++, C.start());
gemm.setSizeArg(current_arg++, C.stride()[0]); matrix_product.setSizeArg(current_arg++, C.stride()[0]);
} }
else else
{ {
gemm.setArg(current_arg++, workspace); matrix_product.setArg(current_arg++, workspace);
gemm.setSizeArg(current_arg++, M); matrix_product.setSizeArg(current_arg++, M);
gemm.setSizeArg(current_arg++, 0); matrix_product.setSizeArg(current_arg++, 0);
gemm.setSizeArg(current_arg++, 1); matrix_product.setSizeArg(current_arg++, 1);
} }
helper.set_arguments(alpha.dtype(), alpha.values()); helper.set_arguments(alpha.dtype(), alpha.values());
gemm.setArg(current_arg++, A.data()); matrix_product.setArg(current_arg++, A.data());
gemm.setSizeArg(current_arg++, A.stride()[1]); matrix_product.setSizeArg(current_arg++, A.stride()[1]);
gemm.setSizeArg(current_arg++, A.start()); matrix_product.setSizeArg(current_arg++, A.start());
gemm.setSizeArg(current_arg++, A.stride()[0]); matrix_product.setSizeArg(current_arg++, A.stride()[0]);
gemm.setArg(current_arg++, B.data()); matrix_product.setArg(current_arg++, B.data());
gemm.setSizeArg(current_arg++, B.stride()[1]); matrix_product.setSizeArg(current_arg++, B.stride()[1]);
gemm.setSizeArg(current_arg++, B.start()); matrix_product.setSizeArg(current_arg++, B.start());
gemm.setSizeArg(current_arg++, B.stride()[0]); matrix_product.setSizeArg(current_arg++, B.stride()[0]);
helper.set_arguments(beta.dtype(), beta.values()); helper.set_arguments(beta.dtype(), beta.values());
options.enqueue(program.context(), gemm, global, local); options.enqueue(program.context(), matrix_product, global, local);
if(p_.depth > 1) if(p_.depth > 1)
{ {
@@ -652,18 +652,18 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
} }
std::vector<int_t> gemm::infos(math_expression const & expression, symbolic::preset::gemm::args& arguments) const std::vector<int_t> matrix_product::infos(math_expression const & expression, symbolic::preset::matrix_product::args& arguments) const
{ {
math_expression::container_type const & array = expression.tree(); math_expression::container_type const & array = expression.tree();
std::size_t root = expression.root(); std::size_t root = expression.root();
arguments = symbolic::preset::gemm::check(array, root); arguments = symbolic::preset::matrix_product::check(array, root);
int_t M = arguments.C->array->shape()[0]; int_t M = arguments.C->array->shape()[0];
int_t N = arguments.C->array->shape()[1]; int_t N = arguments.C->array->shape()[1];
int_t K = (A_trans_=='T')?arguments.A->array->shape()[0]:arguments.A->array->shape()[1]; int_t K = (A_trans_=='T')?arguments.A->array->shape()[0]:arguments.A->array->shape()[1];
return {M, N, K}; return {M, N, K};
} }
gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<gemm, gemm_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds) matrix_product::matrix_product(matrix_product_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
{ {
if(A_trans_=='N' && B_trans_=='N') type_ = GEMM_NN_TYPE; if(A_trans_=='N' && B_trans_=='N') type_ = GEMM_NN_TYPE;
else if(A_trans_=='T' && B_trans_=='N') type_ = GEMM_TN_TYPE; else if(A_trans_=='T' && B_trans_=='N') type_ = GEMM_TN_TYPE;
@@ -672,21 +672,21 @@ gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans,
else throw; else throw;
} }
std::vector<int_t> gemm::input_sizes(math_expression const & expressions) const std::vector<int_t> matrix_product::input_sizes(math_expression const & expressions) const
{ {
symbolic::preset::gemm::args dummy; symbolic::preset::matrix_product::args dummy;
return infos((math_expression&)expressions, dummy); return infos((math_expression&)expressions, dummy);
} }
void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control) void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control)
{ {
using namespace tools; using namespace tools;
gemm & fallback = (gemm&)fallback_base; matrix_product & fallback = (matrix_product&)fallback_base;
math_expression const & expressions = control.x(); math_expression const & expressions = control.x();
symbolic::preset::gemm::args args; symbolic::preset::matrix_product::args args;
std::vector<int_t> MNK = infos(expressions, args); std::vector<int_t> MNK = infos(expressions, args);
int_t M = MNK[0]; int_t M = MNK[0];
@@ -720,40 +720,40 @@ void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program
} }
// //
gemm_nn::gemm_nn(unsigned int simd matrix_product_nn::matrix_product_nn(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D , int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns , int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch , fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) : , int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N') matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
{ {
} }
// //
gemm_tn::gemm_tn(unsigned int simd matrix_product_tn::matrix_product_tn(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D , int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns , int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch , fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) : , int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N') matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
{ } { }
// //
gemm_nt::gemm_nt(unsigned int simd matrix_product_nt::matrix_product_nt(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D , int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns , int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch , fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) : , int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T') matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
{ } { }
// //
gemm_tt::gemm_tt(unsigned int simd matrix_product_tt::matrix_product_tt(unsigned int simd
, int_t ls0, int_t KL, int_t ls1, int_t D , int_t ls0, int_t KL, int_t ls1, int_t D
, int_t ms, int_t ks, int_t ns , int_t ms, int_t ks, int_t ns
, fetching_policy_type Afetch , fetching_policy_type Bfetch , fetching_policy_type Afetch , fetching_policy_type Bfetch
, int_t lfetch0, int_t lfetch1, bool check_bound) : , int_t lfetch0, int_t lfetch1, bool check_bound) :
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T') matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
{ } { }
} }

View File

@@ -1,6 +1,6 @@
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include "isaac/kernels/templates/dot.h" #include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
#include "tools/loop.hpp" #include "tools/loop.hpp"
@@ -15,25 +15,25 @@ namespace isaac
{ {
namespace templates namespace templates
{ {
dot_parameters::dot_parameters(unsigned int _simd_width, reduce_1d_parameters::reduce_1d_parameters(unsigned int _simd_width,
unsigned int _group_size, unsigned int _num_groups, unsigned int _group_size, unsigned int _num_groups,
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy) fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy)
{ } { }
unsigned int dot::lmem_usage(math_expression const & x) const unsigned int reduce_1d::lmem_usage(math_expression const & x) const
{ {
numeric_type numeric_t= lhs_most(x.tree(), x.root()).lhs.dtype; numeric_type numeric_t= lhs_most(x.tree(), x.root()).lhs.dtype;
return p_.local_size_0*size_of(numeric_t); return p_.local_size_0*size_of(numeric_t);
} }
int dot::is_invalid_impl(driver::Device const &, math_expression const &) const int reduce_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
{ {
if (p_.fetching_policy==FETCH_FROM_LOCAL) if (p_.fetching_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs, inline void reduce_1d::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const
{ {
stream << "#pragma unroll" << std::endl; stream << "#pragma unroll" << std::endl;
@@ -46,25 +46,25 @@ inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsig
stream.inc_tab(); stream.inc_tab();
for (auto & expr : exprs) for (auto & expr : exprs)
if (expr->is_index_dot()) if (expr->is_index_reduction())
compute_index_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]") compute_index_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
, expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"), , expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"),
expr->root_op()); expr->root_op());
else else
compute_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op()); compute_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
} }
std::string dot::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const std::string reduce_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const
{ {
kernel_generation_stream stream; kernel_generation_stream stream;
std::vector<mapped_scalar_dot*> exprs; std::vector<mapped_reduce_1d*> exprs;
for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit) for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit)
if (mapped_scalar_dot * p = dynamic_cast<mapped_scalar_dot*>(iit->second.get())) if (mapped_reduce_1d * p = dynamic_cast<mapped_reduce_1d*>(iit->second.get()))
exprs.push_back(p); exprs.push_back(p);
std::size_t N = exprs.size(); std::size_t N = exprs.size();
driver::backend_type backend = device.backend(); driver::backend_type backend = device.backend();
@@ -81,7 +81,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
{ {
numeric_type dtype = lhs_most(exprs[k]->math_expression().tree(), exprs[k]->math_expression().root()).lhs.dtype; numeric_type dtype = lhs_most(exprs[k]->math_expression().tree(), exprs[k]->math_expression().root()).lhs.dtype;
std::string sdtype = to_string(dtype); std::string sdtype = to_string(dtype);
if (exprs[k]->is_index_dot()) if (exprs[k]->is_index_reduction())
{ {
stream << exprs[k]->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + ");"); stream << exprs[k]->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + ");");
offset += 4*p_.num_groups; offset += 4*p_.num_groups;
@@ -125,7 +125,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
for (unsigned int k = 0; k < N; ++k) for (unsigned int k = 0; k < N; ++k)
{ {
if (exprs[k]->is_index_dot()) if (exprs[k]->is_index_reduction())
{ {
stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl; stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl;
stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl; stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl;
@@ -174,11 +174,11 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
accessors["matrix_diag"] = str[a]; accessors["matrix_diag"] = str[a];
accessors["array1"] = "#namereg"; accessors["array1"] = "#namereg";
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors); std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
if (elem->is_index_dot()) if (elem->is_index_reduction())
compute_index_dot(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+" compute_index_reduce_1d(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
+ tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op()); + tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
else else
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op()); compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
} }
} }
}); });
@@ -186,7 +186,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
//Fills local memory //Fills local memory
for (unsigned int k = 0; k < N; ++k) for (unsigned int k = 0; k < N; ++k)
{ {
if (exprs[k]->is_index_dot()) if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl; stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl; stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
} }
@@ -200,7 +200,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream.inc_tab(); stream.inc_tab();
for (unsigned int k = 0; k < N; ++k) for (unsigned int k = 0; k < N; ++k)
{ {
if (exprs[k]->is_index_dot()) if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_temp_value[gpid] = #name_buf_value[0];") << std::endl; stream << exprs[k]->process("#name_temp_value[gpid] = #name_buf_value[0];") << std::endl;
stream << exprs[k]->process("#name_temp[gpid] = #name_buf[0];") << std::endl; stream << exprs[k]->process("#name_temp[gpid] = #name_buf[0];") << std::endl;
} }
@@ -225,9 +225,9 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "unsigned int lid = " <<LocalIdx0(backend) << ";" << std::endl; stream << "unsigned int lid = " <<LocalIdx0(backend) << ";" << std::endl;
stream << "unsigned int lsize = " <<LocalSize0(backend) << ";" << std::endl; stream << "unsigned int lsize = " <<LocalSize0(backend) << ";" << std::endl;
for (mapped_scalar_dot* e: exprs) for (mapped_reduce_1d* e: exprs)
{ {
if (e->is_index_dot()) if (e->is_index_reduction())
{ {
stream << e->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];"); stream << e->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];");
stream << e->process("unsigned int #name_acc = 0;") << std::endl; stream << e->process("unsigned int #name_acc = 0;") << std::endl;
@@ -244,18 +244,18 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "for(unsigned int i = lid; i < " << p_.num_groups << "; i += lsize)" << std::endl; stream << "for(unsigned int i = lid; i < " << p_.num_groups << "; i += lsize)" << std::endl;
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
for (mapped_scalar_dot* e: exprs) for (mapped_reduce_1d* e: exprs)
if (e->is_index_dot()) if (e->is_index_reduction())
compute_index_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op()); compute_index_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op());
else else
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op()); compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op());
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
for (unsigned int k = 0; k < N; ++k) for (unsigned int k = 0; k < N; ++k)
{ {
if (exprs[k]->is_index_dot()) if (exprs[k]->is_index_reduction())
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl; stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl; stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
} }
@@ -268,7 +268,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
accessors["scalar_dot"] = "#name_buf[0]"; accessors["scalar_reduce_1d"] = "#name_buf[0]";
accessors["array1"] = "#pointer[#start]"; accessors["array1"] = "#pointer[#start]";
accessors["array11"] = "#pointer[#start]"; accessors["array11"] = "#pointer[#start]";
stream << evaluate(PARENT_NODE_TYPE, accessors, expressions, expressions.root(), mapping) << ";" << std::endl; stream << evaluate(PARENT_NODE_TYPE, accessors, expressions, expressions.root(), mapping) << ";" << std::endl;
@@ -283,23 +283,23 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
return stream.str(); return stream.str();
} }
dot::dot(dot::parameters_type const & parameters, reduce_1d::reduce_1d(reduce_1d::parameters_type const & parameters,
binding_policy_t binding) : base_impl<dot, dot_parameters>(parameters, binding) binding_policy_t binding) : base_impl<reduce_1d, reduce_1d_parameters>(parameters, binding)
{ } { }
dot::dot(unsigned int simd, unsigned int ls, unsigned int ng, reduce_1d::reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng,
fetching_policy_type fetch, binding_policy_t bind): fetching_policy_type fetch, binding_policy_t bind):
base_impl<dot, dot_parameters>(dot_parameters(simd,ls,ng,fetch), bind) base_impl<reduce_1d, reduce_1d_parameters>(reduce_1d_parameters(simd,ls,ng,fetch), bind)
{} {}
std::vector<int_t> dot::input_sizes(math_expression const & x) const std::vector<int_t> reduce_1d::input_sizes(math_expression const & x) const
{ {
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false); std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
int_t N = vector_size(lhs_most(x.tree(), dots_idx[0])); int_t N = vector_size(lhs_most(x.tree(), reduce_1ds_idx[0]));
return {N}; return {N};
} }
void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control) void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{ {
math_expression const & x = control.x(); math_expression const & x = control.x();
@@ -313,10 +313,10 @@ void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program,
return; return;
} }
std::vector<math_expression::node const *> dots; std::vector<math_expression::node const *> reduce_1ds;
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false); std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
for (size_t idx: dots_idx) for (size_t idx: reduce_1ds_idx)
dots.push_back(&x.tree()[idx]); reduce_1ds.push_back(&x.tree()[idx]);
//Kernel //Kernel
std::string name[2] = {"prod", "reduce"}; std::string name[2] = {"prod", "reduce"};

View File

@@ -2,7 +2,7 @@
#include <iostream> #include <iostream>
#include "isaac/kernels/stream.h" #include "isaac/kernels/stream.h"
#include "isaac/kernels/keywords.h" #include "isaac/kernels/keywords.h"
#include "isaac/kernels/templates/gemv.h" #include "isaac/kernels/templates/reduce_2d.h"
#include "tools/arguments.hpp" #include "tools/arguments.hpp"
#include "tools/loop.hpp" #include "tools/loop.hpp"
@@ -16,33 +16,33 @@ namespace isaac
namespace templates namespace templates
{ {
gemv_parameters::gemv_parameters(unsigned int _simd_width, reduce_2d_parameters::reduce_2d_parameters(unsigned int _simd_width,
unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _local_size_0, unsigned int _local_size_1,
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy): base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy): base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1),
num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetch_policy(_fetch_policy) { } num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetch_policy(_fetch_policy) { }
int gemv::is_invalid_impl(driver::Device const &, math_expression const &) const int reduce_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
{ {
if (p_.fetch_policy==FETCH_FROM_LOCAL) if (p_.fetch_policy==FETCH_FROM_LOCAL)
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE; return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
return TEMPLATE_VALID; return TEMPLATE_VALID;
} }
unsigned int gemv::lmem_usage(const math_expression&) const unsigned int reduce_2d::lmem_usage(const math_expression&) const
{ {
return (p_.local_size_0+1)*p_.local_size_1; return (p_.local_size_0+1)*p_.local_size_1;
} }
std::string gemv::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const std::string reduce_2d::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const
{ {
using tools::to_string; using tools::to_string;
std::vector<mapped_gemv*> dots; std::vector<mapped_reduce_2d*> reduce_1ds;
std::vector<size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false); std::vector<size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
for (auto & elem : idx) for (auto & elem : idx)
dots.push_back((mapped_gemv*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get())); reduce_1ds.push_back((mapped_reduce_2d*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
kernel_generation_stream stream; kernel_generation_stream stream;
driver::backend_type backend = device.backend(); driver::backend_type backend = device.backend();
@@ -55,11 +55,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
auto unroll_tmp = [&]() auto unroll_tmp = [&]()
{ {
unsigned int offset = 0; unsigned int offset = 0;
for (const auto & e : dots) for (const auto & e : reduce_1ds)
{ {
numeric_type dtype = lhs_most(e->math_expression().tree(), e->math_expression().root()).lhs.dtype; numeric_type dtype = lhs_most(e->math_expression().tree(), e->math_expression().root()).lhs.dtype;
std::string sdtype = to_string(dtype); std::string sdtype = to_string(dtype);
if (e->is_index_dot()) if (e->is_index_reduction())
{ {
stream << e->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + "*M);"); stream << e->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + "*M);");
offset += 4*p_.num_groups_0; offset += 4*p_.num_groups_0;
@@ -73,7 +73,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
} }
}; };
int col_simd_width = (dot_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width; int col_simd_width = (reduce_1d_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
switch(backend) switch(backend)
{ {
case driver::CUDA: case driver::CUDA:
@@ -96,7 +96,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
unsigned int local_size_0_ld = p_.local_size_0; unsigned int local_size_0_ld = p_.local_size_0;
std::string local_size_0_ld_str = to_string(local_size_0_ld); std::string local_size_0_ld_str = to_string(local_size_0_ld);
for (const auto & e : dots) for (const auto & e : reduce_1ds)
stream << e->process(Local(backend).get() + " " + append_width("#scalartype", col_simd_width) + " #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl; stream << e->process(Local(backend).get() + " " + append_width("#scalartype", col_simd_width) + " #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "*" << col_simd_width << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1*col_simd_width << "; r += " << GlobalSize1(backend) << "*" << col_simd_width << ")" << std::endl; stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "*" << col_simd_width << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1*col_simd_width << "; r += " << GlobalSize1(backend) << "*" << col_simd_width << ")" << std::endl;
@@ -106,7 +106,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "" << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl; stream << "" << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
stream << "" << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl; stream << "" << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
for (const auto & e : dots){ for (const auto & e : reduce_1ds){
std::string data_type = append_width("#scalartype",col_simd_width); std::string data_type = append_width("#scalartype",col_simd_width);
stream << e->process(data_type + " #name_acc = " + InitPrefix(backend, data_type).get() + "(" + neutral_element((e)->root_op(), backend, "#scalartype") + ");") << std::endl; stream << e->process(data_type + " #name_acc = " + InitPrefix(backend, data_type).get() + "(" + neutral_element((e)->root_op(), backend, "#scalartype") + ");") << std::endl;
@@ -116,14 +116,14 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
element_wise_loop_1D(stream, p_.fetch_policy, (dot_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width) element_wise_loop_1D(stream, p_.fetch_policy, (reduce_1d_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width)
{ {
std::set<std::string> already_fetched; std::set<std::string> already_fetched;
for (const auto & e : dots) for (const auto & e : reduce_1ds)
{ {
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
if(dot_type_==REDUCE_COLUMNS) if(reduce_1d_type_==REDUCE_COLUMNS)
{ {
std::string data_type = append_width("#scalartype",row_simd_width); std::string data_type = append_width("#scalartype",row_simd_width);
accessors["arraynn"] = data_type + " #namereg = " + vload(row_simd_width, "#scalartype", "c*#stride", "#pointer + r*#ld", "1", backend,false)+";"; accessors["arraynn"] = data_type + " #namereg = " + vload(row_simd_width, "#scalartype", "c*#stride", "#pointer + r*#ld", "1", backend,false)+";";
@@ -147,20 +147,20 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
str[a] = access_vector_type("#namereg",a); str[a] = access_vector_type("#namereg",a);
for (auto & elem : dots) for (auto & elem : reduce_1ds)
for (unsigned int a = 0; a < row_simd_width; ++a) for (unsigned int a = 0; a < row_simd_width; ++a)
{ {
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, {{"arraynn", str[a]}, {"repeat", str[a]}, {"array1", "#namereg"}}); std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, {{"arraynn", str[a]}, {"repeat", str[a]}, {"array1", "#namereg"}});
if (elem->is_index_dot()) if (elem->is_index_reduction())
compute_index_dot(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op()); compute_index_reduce_1d(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op());
else else
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op()); compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
} }
}); });
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
for (auto & expr : dots) for (auto & expr : reduce_1ds)
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl; stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
stream << "#pragma unroll" << std::endl; stream << "#pragma unroll" << std::endl;
@@ -173,13 +173,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
for (auto & e : dots) for (auto & e : reduce_1ds)
if (e->is_index_dot()) if (e->is_index_reduction())
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]") compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]") , e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->root_op()); , e->root_op());
else else
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op()); compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -196,9 +196,9 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
for(int s = 0 ; s < col_simd_width ; ++s) for(int s = 0 ; s < col_simd_width ; ++s)
{ {
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]"; accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
if(col_simd_width > 1) if(col_simd_width > 1)
accessors["gemv"] = access_vector_type(accessors["gemv"], s); accessors["reduce_2d"] = access_vector_type(accessors["reduce_2d"], s);
accessors["arrayn"] = "#pointer[(r +" + to_string(s) + ")*#stride]"; accessors["arrayn"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
accessors["array1n"] = "#pointer[(r +" + to_string(s) + ")*#stride]"; accessors["array1n"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
accessors["arrayn1"] = "#pointer[(r +" + to_string(s) + ")*#stride]"; accessors["arrayn1"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
@@ -207,11 +207,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
} }
else else
{ {
for (mapped_dot const * e : dots) for (mapped_reduce const * e : reduce_1ds)
{ {
if(col_simd_width > 1) if(col_simd_width > 1)
stream << "if(M - r > " << col_simd_width << "){" << std::endl; stream << "if(M - r > " << col_simd_width << "){" << std::endl;
if (e->is_index_dot()) if (e->is_index_reduction())
stream << e->process(vstore(col_simd_width,"uint", "#name_buf_value[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp_value + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl; stream << e->process(vstore(col_simd_width,"uint", "#name_buf_value[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp_value + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
stream << e->process(vstore(col_simd_width,"#scalartype", "#name_buf[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl; stream << e->process(vstore(col_simd_width,"#scalartype", "#name_buf[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
if(col_simd_width > 1) if(col_simd_width > 1)
@@ -220,7 +220,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "else{" << std::endl; stream << "else{" << std::endl;
stream.inc_tab(); stream.inc_tab();
for(int s = 0 ; s < col_simd_width ; ++s){ for(int s = 0 ; s < col_simd_width ; ++s){
if (e->is_index_dot()) if (e->is_index_reduction())
stream << "if(r + " << s << "< M) " << e->process("#name_temp_value[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf_value[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl; stream << "if(r + " << s << "< M) " << e->process("#name_temp_value[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf_value[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
stream << "if(r + " << s << "< M) " << e->process("#name_temp[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl; stream << "if(r + " << s << "< M) " << e->process("#name_temp[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
} }
@@ -262,7 +262,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
{"arrayn1", "#pointer += #start;"}, {"arrayn1", "#pointer += #start;"},
{"arraynn", "#pointer += #start; "}}, expression, mapping); {"arraynn", "#pointer += #start; "}}, expression, mapping);
for (const auto & e : dots) for (const auto & e : reduce_1ds)
stream << e->process(Local(backend).get() + " #scalartype #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl; stream << e->process(Local(backend).get() + " #scalartype #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1 << "; r += " << GlobalSize1(backend) << "){" << std::endl; stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1 << "; r += " << GlobalSize1(backend) << "){" << std::endl;
@@ -270,7 +270,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl; stream << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
stream << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl; stream << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
for (const auto & e : dots) for (const auto & e : reduce_1ds)
stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl; stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl;
stream << "if (r < M)" << std::endl; stream << "if (r < M)" << std::endl;
@@ -280,8 +280,8 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "for(" << _size_t << " c = lidx; c < " << p_.num_groups_0 << "; c += " << LocalSize0(backend) << "){" << std::endl; stream << "for(" << _size_t << " c = lidx; c < " << p_.num_groups_0 << "; c += " << LocalSize0(backend) << "){" << std::endl;
stream.inc_tab(); stream.inc_tab();
for (mapped_dot* e: dots) for (mapped_reduce* e: reduce_1ds)
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op()); compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op());
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -290,7 +290,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
for (auto & expr : dots) for (auto & expr : reduce_1ds)
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl; stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
stream << "#pragma unroll" << std::endl; stream << "#pragma unroll" << std::endl;
@@ -303,13 +303,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream << "{" << std::endl; stream << "{" << std::endl;
stream.inc_tab(); stream.inc_tab();
for (auto & e : dots) for (auto & e : reduce_1ds)
if (e->is_index_dot()) if (e->is_index_reduction())
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]") compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]") , e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
, e->root_op()); , e->root_op());
else else
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op()); compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
stream.dec_tab(); stream.dec_tab();
stream << "}" << std::endl; stream << "}" << std::endl;
@@ -323,7 +323,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
stream.inc_tab(); stream.inc_tab();
std::map<std::string, std::string> accessors; std::map<std::string, std::string> accessors;
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]"; accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
accessors["arrayn"] = "#pointer[r*#stride]"; accessors["arrayn"] = "#pointer[r*#stride]";
accessors["array1n"] = "#pointer[r*#stride]"; accessors["array1n"] = "#pointer[r*#stride]";
accessors["arrayn1"] = "#pointer[r*#stride]"; accessors["arrayn1"] = "#pointer[r*#stride]";
@@ -344,30 +344,30 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
return stream.str(); return stream.str();
} }
gemv::gemv(gemv::parameters_type const & parameters, reduce_2d::reduce_2d(reduce_2d::parameters_type const & parameters,
gemv::dot_type rtype, reduce_2d::reduce_1d_type rtype,
binding_policy_t binding_policy) : binding_policy_t binding_policy) :
base_impl<gemv, gemv_parameters>(parameters, binding_policy), base_impl<reduce_2d, reduce_2d_parameters>(parameters, binding_policy),
dot_type_(rtype){ } reduce_1d_type_(rtype){ }
std::vector<int_t> gemv::input_sizes(math_expression const & expression) const std::vector<int_t> reduce_2d::input_sizes(math_expression const & expression) const
{ {
std::vector<std::size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false); std::vector<std::size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
std::pair<int_t, int_t> MN = matrix_size(expression.tree(), lhs_most(expression.tree(), idx[0])); std::pair<int_t, int_t> MN = matrix_size(expression.tree(), lhs_most(expression.tree(), idx[0]));
if(dot_type_==REDUCE_COLUMNS) if(reduce_1d_type_==REDUCE_COLUMNS)
std::swap(MN.first,MN.second); std::swap(MN.first,MN.second);
return {MN.first, MN.second}; return {MN.first, MN.second};
} }
void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control) void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
{ {
math_expression const & expression = control.x(); math_expression const & expression = control.x();
std::vector<int_t> MN = input_sizes(expression); std::vector<int_t> MN = input_sizes(expression);
std::vector<math_expression::node const *> dots; std::vector<math_expression::node const *> reduce_1ds;
std::vector<size_t> dots_idx = filter_nodes(&is_dot, expression, expression.root(), false); std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
for (size_t idx : dots_idx) for (size_t idx : reduce_1ds_idx)
dots.push_back(&expression.tree()[idx]); reduce_1ds.push_back(&expression.tree()[idx]);
//Fallback //Fallback
if(p_.simd_width>1 && requires_fallback(expression)) if(p_.simd_width>1 && requires_fallback(expression))
@@ -406,15 +406,15 @@ void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program
control.execution_options().enqueue(program.context(), kernels[i], global[i], local[i]); control.execution_options().enqueue(program.context(), kernels[i], global[i], local[i]);
} }
gemv_n::gemv_n(gemv_parameters const & parameters,binding_policy_t binding_policy): gemv(parameters, REDUCE_ROWS, binding_policy){} reduce_2d_n::reduce_2d_n(reduce_2d_parameters const & parameters,binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_ROWS, binding_policy){}
gemv_n::gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, reduce_2d_n::reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {} fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
gemv_t::gemv_t(gemv::parameters_type const & parameters, binding_policy_t binding_policy): gemv(parameters, REDUCE_COLUMNS, binding_policy){} reduce_2d_t::reduce_2d_t(reduce_2d::parameters_type const & parameters, binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_COLUMNS, binding_policy){}
gemv_t::gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, reduce_2d_t::reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {} fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}
} }

View File

@@ -81,12 +81,12 @@ public:
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&math_expression, root_idx, &mapping_)));
else if(root_node.op.type==OPERATOR_ACCESS_INDEX_TYPE) else if(root_node.op.type==OPERATOR_ACCESS_INDEX_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_array_access>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_array_access>(&math_expression, root_idx, &mapping_)));
else if (detail::is_scalar_dot(root_node)) else if (detail::is_scalar_reduce_1d(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_dot>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_1d>(&math_expression, root_idx, &mapping_)));
else if (detail::is_vector_dot(root_node)) else if (detail::is_vector_reduce_1d(root_node))
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemv>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_2d>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY) else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemm>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type == OPERATOR_REPEAT_TYPE) else if (root_node.op.type == OPERATOR_REPEAT_TYPE)
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_repeat>(&math_expression, root_idx, &mapping_))); mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_repeat>(&math_expression, root_idx, &mapping_)));
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE) else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)

View File

@@ -12,7 +12,7 @@ namespace isaac
namespace templates namespace templates
{ {
inline void compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op) inline void compute_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
{ {
if (detail::is_elementwise_function(op)) if (detail::is_elementwise_function(op))
os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
@@ -20,7 +20,7 @@ inline void compute_dot(kernel_generation_stream & os, std::string acc, std::str
os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl; os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl;
} }
inline void compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op) inline void compute_index_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
{ {
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
@@ -51,11 +51,11 @@ inline std::string neutral_element(op_element const & op, driver::backend_type b
case OPERATOR_ELEMENT_MIN_TYPE : return INF; case OPERATOR_ELEMENT_MIN_TYPE : return INF;
case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF; case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF;
default: throw std::runtime_error("Unsupported dot operator : no neutral element known"); default: throw std::runtime_error("Unsupported reduce_1d operator : no neutral element known");
} }
} }
inline bool is_dot(math_expression::node const & node) inline bool is_reduce_1d(math_expression::node const & node)
{ {
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY || node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY
@@ -63,7 +63,7 @@ inline bool is_dot(math_expression::node const & node)
} }
inline bool is_index_dot(op_element const & op) inline bool is_index_reduction(op_element const & op)
{ {
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
|| op.type==OPERATOR_ELEMENT_ARGMAX_TYPE || op.type==OPERATOR_ELEMENT_ARGMAX_TYPE

View File

@@ -9,11 +9,11 @@
#include "isaac/driver/program_cache.h" #include "isaac/driver/program_cache.h"
#include "isaac/profiles/profiles.h" #include "isaac/profiles/profiles.h"
#include "isaac/kernels/parse.h" #include "isaac/kernels/parse.h"
#include "isaac/kernels/templates/axpy.h" #include "isaac/kernels/templates/elementwise_1d.h"
#include "isaac/kernels/templates/dot.h" #include "isaac/kernels/templates/reduce_1d.h"
#include "isaac/kernels/templates/ger.h" #include "isaac/kernels/templates/elementwise_2d.h"
#include "isaac/kernels/templates/gemv.h" #include "isaac/kernels/templates/reduce_2d.h"
#include "isaac/kernels/templates/gemm.h" #include "isaac/kernels/templates/matrix_product.h"
#include "isaac/exception/operation_not_supported.h" #include "isaac/exception/operation_not_supported.h"
@@ -134,24 +134,24 @@ profiles::value_type::templates_container const & profiles::value_type::template
std::shared_ptr<templates::base> profiles::create(std::string const & template_name, std::vector<int> const & x) std::shared_ptr<templates::base> profiles::create(std::string const & template_name, std::vector<int> const & x)
{ {
templates::fetching_policy_type fetch[] = {templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_GLOBAL_STRIDED, templates::FETCH_FROM_GLOBAL_CONTIGUOUS}; templates::fetching_policy_type fetch[] = {templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_GLOBAL_STRIDED, templates::FETCH_FROM_GLOBAL_CONTIGUOUS};
if(template_name=="axpy") if(template_name=="elementwise_1d")
return std::shared_ptr<templates::base>(new templates::axpy(x[0], x[1], x[2], fetch[x[3]])); return std::shared_ptr<templates::base>(new templates::elementwise_1d(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="dot") else if(template_name=="reduce_1d")
return std::shared_ptr<templates::base>(new templates::dot(x[0], x[1], x[2], fetch[x[3]])); return std::shared_ptr<templates::base>(new templates::reduce_1d(x[0], x[1], x[2], fetch[x[3]]));
else if(template_name=="ger") else if(template_name=="elementwise_2d")
return std::shared_ptr<templates::base>(new templates::ger(x[0], x[1], x[2], x[3], x[4], fetch[x[5]])); return std::shared_ptr<templates::base>(new templates::elementwise_2d(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemv_n")!=std::string::npos) else if(template_name.find("reduce_2d_n")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemv_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]])); return std::shared_ptr<templates::base>(new templates::reduce_2d_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemv_t")!=std::string::npos) else if(template_name.find("reduce_2d_t")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemv_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]])); return std::shared_ptr<templates::base>(new templates::reduce_2d_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
else if(template_name.find("gemm_nn")!=std::string::npos) else if(template_name.find("matrix_product_nn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11])); return std::shared_ptr<templates::base>(new templates::matrix_product_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_tn")!=std::string::npos) else if(template_name.find("matrix_product_tn")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11])); return std::shared_ptr<templates::base>(new templates::matrix_product_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_nt")!=std::string::npos) else if(template_name.find("matrix_product_nt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11])); return std::shared_ptr<templates::base>(new templates::matrix_product_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else if(template_name.find("gemm_tt")!=std::string::npos) else if(template_name.find("matrix_product_tt")!=std::string::npos)
return std::shared_ptr<templates::base>(new templates::gemm_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11])); return std::shared_ptr<templates::base>(new templates::matrix_product_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
else else
throw std::invalid_argument("Invalid expression: " + template_name); throw std::invalid_argument("Invalid expression: " + template_name);
} }
@@ -163,7 +163,7 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
rapidjson::Document document; rapidjson::Document document;
document.Parse<0>(str.c_str()); document.Parse<0>(str.c_str());
//Deserialize //Deserialize
std::vector<std::string> operations = {"axpy", "dot", "ger", "gemv_n", "gemv_t", "gemm_nn", "gemm_tn", "gemm_nt", "gemm_tt"}; std::vector<std::string> operations = {"elementwise_1d", "reduce_1d", "elementwise_2d", "reduce_2d_n", "reduce_2d_t", "matrix_product_nn", "matrix_product_tn", "matrix_product_nt", "matrix_product_tt"};
std::vector<std::string> dtype = {"float32", "float64"}; std::vector<std::string> dtype = {"float32", "float64"};
for(auto & operation : operations) for(auto & operation : operations)
{ {
@@ -265,15 +265,15 @@ std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::ba
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE}; numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
for(auto DTYPE : types) for(auto DTYPE : types)
{ {
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::axpy(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED)); res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::dot(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED)); res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::ger(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED)); res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::gemv_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED)); res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::gemv_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED)); res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::gemm_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true)); res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::gemm_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true)); res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::gemm_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true)); res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::gemm_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true)); res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
} }
return res; return res;
} }

View File

@@ -161,7 +161,7 @@ namespace isaac
expression_type final_type; expression_type final_type;
//GEMM //GEMM
if(symbolic::preset::gemm::args args = symbolic::preset::gemm::check(tree, rootidx)){ if(symbolic::preset::matrix_product::args args = symbolic::preset::matrix_product::check(tree, rootidx)){
final_type = args.type; final_type = args.type;
} }
//Default //Default

View File

@@ -9,7 +9,7 @@ namespace symbolic
namespace preset namespace preset
{ {
void gemm::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a) void matrix_product::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a)
{ {
//Matrix-Matrix product node //Matrix-Matrix product node
if(tree[rootidx].op.type_family==OPERATOR_GEMM_TYPE_FAMILY) if(tree[rootidx].op.type_family==OPERATOR_GEMM_TYPE_FAMILY)
@@ -46,11 +46,11 @@ void gemm::handle_node(math_expression::container_type const & tree, size_t root
} }
} }
gemm::args gemm::check(math_expression::container_type const & tree, size_t rootidx) matrix_product::args matrix_product::check(math_expression::container_type const & tree, size_t rootidx)
{ {
lhs_rhs_element const * assigned = &tree[rootidx].lhs; lhs_rhs_element const * assigned = &tree[rootidx].lhs;
numeric_type dtype = assigned->dtype; numeric_type dtype = assigned->dtype;
gemm::args result ; matrix_product::args result ;
if(dtype==INVALID_NUMERIC_TYPE) if(dtype==INVALID_NUMERIC_TYPE)
return result; return result;
result.alpha = value_scalar(1, dtype); result.alpha = value_scalar(1, dtype);

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared'] libraries += ['gnustl_shared']
#Source files #Source files
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']] src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/' boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']: for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x] src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]