Code Quality: More sensible names
This commit is contained in:
@@ -23,15 +23,15 @@ enum expression_type
|
||||
|
||||
inline expression_type expression_type_from_string(std::string const & name)
|
||||
{
|
||||
if(name=="axpy") return AXPY_TYPE;
|
||||
if(name=="dot") return DOT_TYPE;
|
||||
if(name=="ger") return GER_TYPE;
|
||||
if(name=="gemv_n") return GEMV_N_TYPE;
|
||||
if(name=="gemv_t") return GEMV_T_TYPE;
|
||||
if(name=="gemm_nn") return GEMM_NN_TYPE;
|
||||
if(name=="gemm_nt") return GEMM_NT_TYPE;
|
||||
if(name=="gemm_tn") return GEMM_TN_TYPE;
|
||||
if(name=="gemm_tt") return GEMM_TT_TYPE;
|
||||
if(name=="elementwise_1d") return AXPY_TYPE;
|
||||
if(name=="reduce_1d") return DOT_TYPE;
|
||||
if(name=="elementwise_2d") return GER_TYPE;
|
||||
if(name=="reduce_2d_n") return GEMV_N_TYPE;
|
||||
if(name=="reduce_2d_t") return GEMV_T_TYPE;
|
||||
if(name=="matrix_product_nn") return GEMM_NN_TYPE;
|
||||
if(name=="matrix_product_nt") return GEMM_NT_TYPE;
|
||||
if(name=="matrix_product_tn") return GEMM_TN_TYPE;
|
||||
if(name=="matrix_product_tt") return GEMM_TT_TYPE;
|
||||
throw std::invalid_argument("Unrecognized expression: " + name);
|
||||
}
|
||||
|
||||
|
2
include/isaac/driver/external/CL/cl_ext.h
vendored
2
include/isaac/driver/external/CL/cl_ext.h
vendored
@@ -70,7 +70,7 @@ cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logelementwise_2d
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
|
108
include/isaac/driver/external/CUDA/cuda.h
vendored
108
include/isaac/driver/external/CUDA/cuda.h
vendored
@@ -200,7 +200,7 @@ extern "C" {
|
||||
|
||||
/**
|
||||
* CUDA device pointer
|
||||
* CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
|
||||
* CUdeviceptr is defined as an unsigned inteelementwise_2d type whose size matches the size of a pointer on the target platform.
|
||||
*/
|
||||
#if __CUDA_API_VERSION >= 3020
|
||||
|
||||
@@ -337,12 +337,12 @@ typedef enum CUoccupancy_flags_enum {
|
||||
* Array formats
|
||||
*/
|
||||
typedef enum CUarray_format_enum {
|
||||
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */
|
||||
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */
|
||||
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */
|
||||
CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */
|
||||
CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */
|
||||
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */
|
||||
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit inteelementwise_2ds */
|
||||
CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */
|
||||
CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */
|
||||
} CUarray_format;
|
||||
@@ -558,8 +558,8 @@ typedef enum CUfunction_attribute_enum {
|
||||
*/
|
||||
typedef enum CUfunc_cache_enum {
|
||||
CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */
|
||||
CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */
|
||||
CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */
|
||||
CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larelementwise_2d shared memory and smaller L1 cache */
|
||||
CU_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larelementwise_2d L1 cache and smaller shared memory */
|
||||
CU_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */
|
||||
} CUfunc_cache;
|
||||
|
||||
@@ -909,7 +909,7 @@ typedef enum cudaError_enum {
|
||||
|
||||
/**
|
||||
* \deprecated
|
||||
* This error return is deprecated as of CUDA 5.0. It is no longer an error
|
||||
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
|
||||
* to attempt to enable/disable the profiling via ::cuProfilerStart or
|
||||
* ::cuProfilerStop without initialization.
|
||||
*/
|
||||
@@ -917,14 +917,14 @@ typedef enum cudaError_enum {
|
||||
|
||||
/**
|
||||
* \deprecated
|
||||
* This error return is deprecated as of CUDA 5.0. It is no longer an error
|
||||
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
|
||||
* to call cuProfilerStart() when profiling is already enabled.
|
||||
*/
|
||||
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
|
||||
|
||||
/**
|
||||
* \deprecated
|
||||
* This error return is deprecated as of CUDA 5.0. It is no longer an error
|
||||
* This error return is deprecated as of CUDA 5.0. It is no lonelementwise_2d an error
|
||||
* to call cuProfilerStop() when profiling is already disabled.
|
||||
*/
|
||||
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
|
||||
@@ -962,7 +962,7 @@ typedef enum cudaError_enum {
|
||||
* This indicated that the context being supplied as a parameter to the
|
||||
* API call was already the active context.
|
||||
* \deprecated
|
||||
* This error return is deprecated as of CUDA 3.2. It is no longer an
|
||||
* This error return is deprecated as of CUDA 3.2. It is no lonelementwise_2d an
|
||||
* error to attempt to push the active context via ::cuCtxPushCurrent().
|
||||
*/
|
||||
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
|
||||
@@ -1163,7 +1163,7 @@ typedef enum cudaError_enum {
|
||||
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
|
||||
|
||||
/**
|
||||
* A device-side assert triggered during kernel execution. The context
|
||||
* A device-side assert trigelementwise_2ded during kernel execution. The context
|
||||
* cannot be used anymore, and must be destroyed. All existing device
|
||||
* memory allocations from this context are invalid and must be
|
||||
* reconstructed if the program is to continue using CUDA.
|
||||
@@ -1499,24 +1499,24 @@ typedef struct CUDA_TEXTURE_DESC_st {
|
||||
typedef enum CUresourceViewFormat_enum
|
||||
{
|
||||
CU_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit inteelementwise_2ds */
|
||||
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
|
||||
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
|
||||
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
|
||||
@@ -1606,7 +1606,7 @@ typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
|
||||
#define CU_TRSA_OVERRIDE_FORMAT 0x01
|
||||
|
||||
/**
|
||||
* Read the texture as integers rather than promoting the values to floats
|
||||
* Read the texture as inteelementwise_2ds rather than promoting the values to floats
|
||||
* in the range [0,1].
|
||||
* Flag for ::cuTexRefSetFlags()
|
||||
*/
|
||||
@@ -1901,7 +1901,7 @@ CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev);
|
||||
/**
|
||||
* \brief Returns information about the device
|
||||
*
|
||||
* Returns in \p *pi the integer value of the attribute \p attrib on device
|
||||
* Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on device
|
||||
* \p dev. The supported attributes are:
|
||||
* - ::CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK: Maximum number of threads per
|
||||
* block;
|
||||
@@ -2819,7 +2819,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
|
||||
* violated. This limit can be set smaller than the default or up the maximum
|
||||
* launch depth of 24. When setting this limit, keep in mind that additional
|
||||
* levels of sync depth require the driver to reserve large amounts of device
|
||||
* memory which can no longer be used for user allocations. If these
|
||||
* memory which can no lonelementwise_2d be used for user allocations. If these
|
||||
* reservations of device memory fail, ::cuCtxSetLimit will return
|
||||
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
|
||||
* This limit is only applicable to devices of compute capability 3.5 and
|
||||
@@ -2836,7 +2836,7 @@ CUresult CUDAAPI cuCtxSynchronize(void);
|
||||
* the default (2048 launches) are needed for a module using the device
|
||||
* runtime, this limit can be increased. Keep in mind that being able to
|
||||
* sustain additional pending launches will require the driver to reserve
|
||||
* larger amounts of device memory upfront which can no longer be used for
|
||||
* larelementwise_2d amounts of device memory upfront which can no lonelementwise_2d be used for
|
||||
* allocations. If these reservations fail, ::cuCtxSetLimit will return
|
||||
* ::CUDA_ERROR_OUT_OF_MEMORY, and the limit can be reset to a lower value.
|
||||
* This limit is only applicable to devices of compute capability 3.5 and
|
||||
@@ -2921,8 +2921,8 @@ CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit);
|
||||
*
|
||||
* The supported cache configurations are:
|
||||
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
|
||||
*
|
||||
* \param pconfig - Returned cache configuration
|
||||
@@ -2971,8 +2971,8 @@ CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig);
|
||||
*
|
||||
* The supported cache configurations are:
|
||||
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
|
||||
*
|
||||
* \param config - Requested cache configuration
|
||||
@@ -3054,7 +3054,7 @@ CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig);
|
||||
*
|
||||
* Changing the shared memory bank size will not increase shared memory usage
|
||||
* or affect occupancy of kernels, but may have major effects on performance.
|
||||
* Larger bank sizes will allow for greater potential bandwidth to shared memory,
|
||||
* Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
|
||||
* but will change what kinds of accesses to shared memory will result in bank
|
||||
* conflicts.
|
||||
*
|
||||
@@ -7358,7 +7358,7 @@ CUresult CUDAAPI cuPointerGetAttribute(void *data, CUpointer_attribute attribute
|
||||
* See further documentation in the section titled "API synchronization behavior"
|
||||
* to learn more about cases when synchronous memory operations can
|
||||
* exhibit asynchronous behavior.
|
||||
* \p value will be considered as a pointer to an unsigned integer to which this attribute is to be set.
|
||||
* \p value will be considered as a pointer to an unsigned inteelementwise_2d to which this attribute is to be set.
|
||||
*
|
||||
* \param value - Pointer to memory containing the value to be set
|
||||
* \param attribute - Pointer attribute to set
|
||||
@@ -7534,7 +7534,7 @@ CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, unsigned int fla
|
||||
* See ::cuStreamCreateWithPriority for details about priority clamping.
|
||||
*
|
||||
* \param hStream - Handle to the stream to be queried
|
||||
* \param priority - Pointer to a signed integer in which the stream's priority is returned
|
||||
* \param priority - Pointer to a signed inteelementwise_2d in which the stream's priority is returned
|
||||
* \return
|
||||
* ::CUDA_SUCCESS,
|
||||
* ::CUDA_ERROR_DEINITIALIZED,
|
||||
@@ -7560,7 +7560,7 @@ CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority);
|
||||
* and return the flags in \p flags.
|
||||
*
|
||||
* \param hStream - Handle to the stream to be queried
|
||||
* \param flags - Pointer to an unsigned integer in which the stream's flags are returned
|
||||
* \param flags - Pointer to an unsigned inteelementwise_2d in which the stream's flags are returned
|
||||
* The value returned in \p flags is a logical 'OR' of all flags that
|
||||
* were used while creating this stream. See ::cuStreamCreate for the list
|
||||
* of valid flags
|
||||
@@ -8104,7 +8104,7 @@ CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUeven
|
||||
/**
|
||||
* \brief Returns information about a function
|
||||
*
|
||||
* Returns in \p *pi the integer value of the attribute \p attrib on the kernel
|
||||
* Returns in \p *pi the inteelementwise_2d value of the attribute \p attrib on the kernel
|
||||
* given by \p hfunc. The supported attributes are:
|
||||
* - ::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: The maximum number of threads
|
||||
* per block, beyond which a launch of the function would fail. This number
|
||||
@@ -8175,8 +8175,8 @@ CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunc
|
||||
*
|
||||
* The supported cache configurations are:
|
||||
* - ::CU_FUNC_CACHE_PREFER_NONE: no preference for shared memory or L1 (default)
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larger shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larger L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_SHARED: prefer larelementwise_2d shared memory and smaller L1 cache
|
||||
* - ::CU_FUNC_CACHE_PREFER_L1: prefer larelementwise_2d L1 cache and smaller shared memory
|
||||
* - ::CU_FUNC_CACHE_PREFER_EQUAL: prefer equal sized L1 cache and shared memory
|
||||
*
|
||||
* \param hfunc - Kernel to configure cache for
|
||||
@@ -8215,7 +8215,7 @@ CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
|
||||
*
|
||||
* Changing the shared memory bank size will not increase shared memory usage
|
||||
* or affect occupancy of kernels, but may have major effects on performance.
|
||||
* Larger bank sizes will allow for greater potential bandwidth to shared memory,
|
||||
* Larelementwise_2d bank sizes will allow for greater potential bandwidth to shared memory,
|
||||
* but will change what kinds of accesses to shared memory will result in bank
|
||||
* conflicts.
|
||||
*
|
||||
@@ -8491,11 +8491,11 @@ CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
|
||||
CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
|
||||
|
||||
/**
|
||||
* \brief Adds an integer parameter to the function's argument list
|
||||
* \brief Adds an inteelementwise_2d parameter to the function's argument list
|
||||
*
|
||||
* \deprecated
|
||||
*
|
||||
* Sets an integer parameter that will be specified the next time the
|
||||
* Sets an inteelementwise_2d parameter that will be specified the next time the
|
||||
* kernel corresponding to \p hfunc will be invoked. \p offset is a byte offset.
|
||||
*
|
||||
* \param hfunc - Kernel to add parameter to
|
||||
@@ -9299,8 +9299,8 @@ CUresult CUDAAPI cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAnis
|
||||
* returned through the texture reference \p hTexRef. The valid flags are:
|
||||
*
|
||||
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of
|
||||
* having the texture promote integer data to floating point data in the
|
||||
* range [0, 1]. Note that texture with 32-bit integer format
|
||||
* having the texture promote inteelementwise_2d data to floating point data in the
|
||||
* range [0, 1]. Note that texture with 32-bit inteelementwise_2d format
|
||||
* would not be promoted, regardless of whether or not this
|
||||
* flag is specified;
|
||||
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the
|
||||
@@ -9859,8 +9859,8 @@ CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
|
||||
* This is ignored if ::CUDA_RESOURCE_DESC::resType is ::CU_RESOURCE_TYPE_LINEAR.
|
||||
*
|
||||
* - ::CUDA_TEXTURE_DESC::flags can be any combination of the following:
|
||||
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote integer data to floating point data in the
|
||||
* range [0, 1]. Note that texture with 32-bit integer format would not be promoted, regardless of whether or not this flag is specified.
|
||||
* - ::CU_TRSF_READ_AS_INTEGER, which suppresses the default behavior of having the texture promote inteelementwise_2d data to floating point data in the
|
||||
* range [0, 1]. Note that texture with 32-bit inteelementwise_2d format would not be promoted, regardless of whether or not this flag is specified.
|
||||
* - ::CU_TRSF_NORMALIZED_COORDINATES, which suppresses the default behavior of having the texture coordinates range from [0, Dim) where Dim is
|
||||
* the width or height of the CUDA array. Instead, the texture coordinates [0, 1.0) reference the entire breadth of the array dimension; Note
|
||||
* that for CUDA mipmapped arrays, this flag has to be set.
|
||||
|
@@ -89,46 +89,46 @@ protected:
|
||||
*
|
||||
* Maps prod(matrix_expression, matrix_expression)
|
||||
*/
|
||||
class mapped_gemm : public mapped_object, public binary_leaf
|
||||
class mapped_matrix_product : public mapped_object, public binary_leaf
|
||||
{
|
||||
public:
|
||||
mapped_gemm(std::string const & scalartype, unsigned int id, node_info info);
|
||||
mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info);
|
||||
};
|
||||
|
||||
/** @brief Reduction
|
||||
*
|
||||
* Base class for mapping a dot
|
||||
* Base class for mapping a reduce_1d
|
||||
*/
|
||||
class mapped_dot : public mapped_object, public binary_leaf
|
||||
class mapped_reduce : public mapped_object, public binary_leaf
|
||||
{
|
||||
public:
|
||||
mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key);
|
||||
mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key);
|
||||
|
||||
size_t root_idx() const;
|
||||
isaac::math_expression const & math_expression() const;
|
||||
math_expression::node root_node() const;
|
||||
bool is_index_dot() const;
|
||||
bool is_index_reduction() const;
|
||||
op_element root_op() const;
|
||||
};
|
||||
|
||||
/** @brief Scalar dot
|
||||
/** @brief 1D Reduction
|
||||
*
|
||||
* Maps a scalar dot (max, min, argmax, inner_prod, etc..)
|
||||
* Maps a 1d reduction (max, min, argmax, inner_prod, etc..)
|
||||
*/
|
||||
class mapped_scalar_dot : public mapped_dot
|
||||
class mapped_reduce_1d : public mapped_reduce
|
||||
{
|
||||
public:
|
||||
mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info);
|
||||
mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info);
|
||||
};
|
||||
|
||||
/** @brief Vector dot
|
||||
/** @brief 2D
|
||||
*
|
||||
* Maps a row-wise dot (max, min, argmax, matrix-vector product, etc..)
|
||||
* Maps a 2D reduction (max, min, argmax, matrix-vector product, etc..)
|
||||
*/
|
||||
class mapped_gemv : public mapped_dot
|
||||
class mapped_reduce_2d : public mapped_reduce
|
||||
{
|
||||
public:
|
||||
mapped_gemv(std::string const & scalartype, unsigned int id, node_info info);
|
||||
mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info);
|
||||
};
|
||||
|
||||
/** @brief Host scalar
|
||||
|
@@ -13,8 +13,8 @@ namespace detail
|
||||
{
|
||||
|
||||
bool is_node_leaf(op_element const & op);
|
||||
bool is_scalar_dot(math_expression::node const & node);
|
||||
bool is_vector_dot(math_expression::node const & node);
|
||||
bool is_scalar_reduce_1d(math_expression::node const & node);
|
||||
bool is_vector_reduce_1d(math_expression::node const & node);
|
||||
bool is_assignment(op_element const & op);
|
||||
bool is_elementwise_operator(op_element const & op);
|
||||
bool is_elementwise_function(op_element const & op);
|
||||
|
@@ -8,22 +8,22 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class axpy_parameters : public base::parameters_type
|
||||
class elementwise_1d_parameters : public base::parameters_type
|
||||
{
|
||||
public:
|
||||
axpy_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy);
|
||||
elementwise_1d_parameters(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy);
|
||||
unsigned int num_groups;
|
||||
fetching_policy_type fetching_policy;
|
||||
};
|
||||
|
||||
class axpy : public base_impl<axpy, axpy_parameters>
|
||||
class elementwise_1d : public base_impl<elementwise_1d, elementwise_1d_parameters>
|
||||
{
|
||||
private:
|
||||
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
|
||||
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const;
|
||||
public:
|
||||
axpy(axpy::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
axpy(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
elementwise_1d(elementwise_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
elementwise_1d(unsigned int _simd_width, unsigned int _group_size, unsigned int _num_groups, fetching_policy_type _fetching_policy, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(math_expression const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
|
||||
};
|
@@ -9,24 +9,24 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class ger_parameters : public base::parameters_type
|
||||
class elementwise_2d_parameters : public base::parameters_type
|
||||
{
|
||||
public:
|
||||
ger_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
|
||||
elementwise_2d_parameters(unsigned int _simd_width, unsigned int _local_size_0, unsigned int _local_size_1, unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetching_policy);
|
||||
|
||||
unsigned int num_groups_0;
|
||||
unsigned int num_groups_1;
|
||||
fetching_policy_type fetching_policy;
|
||||
};
|
||||
|
||||
class ger : public base_impl<ger, ger_parameters>
|
||||
class elementwise_2d : public base_impl<elementwise_2d, elementwise_2d_parameters>
|
||||
{
|
||||
private:
|
||||
int is_invalid_impl(driver::Device const &, math_expression const &) const;
|
||||
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
|
||||
public:
|
||||
ger(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
ger(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(math_expression const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
|
||||
};
|
@@ -10,9 +10,9 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
struct gemm_parameters : public base::parameters_type
|
||||
struct matrix_product_parameters : public base::parameters_type
|
||||
{
|
||||
gemm_parameters(unsigned int simd_width
|
||||
matrix_product_parameters(unsigned int simd_width
|
||||
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
|
||||
, unsigned int ms, unsigned int ks, unsigned int ns
|
||||
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
|
||||
@@ -38,7 +38,7 @@ struct gemm_parameters : public base::parameters_type
|
||||
bool unroll_outer;
|
||||
};
|
||||
|
||||
class gemm : public base_impl<gemm, gemm_parameters>
|
||||
class matrix_product : public base_impl<matrix_product, matrix_product_parameters>
|
||||
{
|
||||
private:
|
||||
unsigned int temporary_workspace(math_expression const & expressions) const;
|
||||
@@ -48,9 +48,9 @@ private:
|
||||
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const &) const;
|
||||
void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, array_base const & A, array_base const & B, array_base const & C,
|
||||
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, execution_options_type const & options);
|
||||
std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::gemm::args &arguments) const;
|
||||
std::vector<int_t> infos(math_expression const & expressions, isaac::symbolic::preset::matrix_product::args &arguments) const;
|
||||
public:
|
||||
gemm(gemm::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
|
||||
matrix_product(matrix_product::parameters_type const & parameters, bool check_bound, char A_trans, char B_trans);
|
||||
std::vector<int_t> input_sizes(math_expression const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &ctr);
|
||||
private:
|
||||
@@ -60,36 +60,36 @@ private:
|
||||
bool check_bounds_;
|
||||
};
|
||||
|
||||
class gemm_nn : public gemm
|
||||
class matrix_product_nn : public matrix_product
|
||||
{
|
||||
public:
|
||||
gemm_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
matrix_product_nn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
};
|
||||
|
||||
class gemm_tn : public gemm
|
||||
class matrix_product_tn : public matrix_product
|
||||
{
|
||||
public:
|
||||
gemm_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
matrix_product_tn(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
};
|
||||
|
||||
|
||||
class gemm_nt : public gemm
|
||||
class matrix_product_nt : public matrix_product
|
||||
{
|
||||
public:
|
||||
gemm_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
matrix_product_nt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
};
|
||||
|
||||
|
||||
class gemm_tt : public gemm
|
||||
class matrix_product_tt : public matrix_product
|
||||
{
|
||||
public:
|
||||
gemm_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
matrix_product_tt(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound = false);
|
||||
};
|
@@ -8,27 +8,27 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
struct dot_parameters : public base::parameters_type
|
||||
struct reduce_1d_parameters : public base::parameters_type
|
||||
{
|
||||
dot_parameters(unsigned int _simd_width,
|
||||
reduce_1d_parameters(unsigned int _simd_width,
|
||||
unsigned int _group_size, unsigned int _num_groups,
|
||||
fetching_policy_type _fetching_policy);
|
||||
unsigned int num_groups;
|
||||
fetching_policy_type fetching_policy;
|
||||
};
|
||||
|
||||
class dot : public base_impl<dot, dot_parameters>
|
||||
class reduce_1d : public base_impl<reduce_1d, reduce_1d_parameters>
|
||||
{
|
||||
private:
|
||||
unsigned int lmem_usage(math_expression const & expressions) const;
|
||||
int is_invalid_impl(driver::Device const &, math_expression const &) const;
|
||||
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs,
|
||||
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
|
||||
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const;
|
||||
std::string generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const;
|
||||
|
||||
public:
|
||||
dot(dot::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
dot(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
reduce_1d(reduce_1d::parameters_type const & parameters, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
std::vector<int_t> input_sizes(math_expression const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
|
||||
private:
|
@@ -10,9 +10,9 @@ namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
struct gemv_parameters : public base::parameters_type
|
||||
struct reduce_2d_parameters : public base::parameters_type
|
||||
{
|
||||
gemv_parameters(unsigned int _simd_width,
|
||||
reduce_2d_parameters(unsigned int _simd_width,
|
||||
unsigned int _local_size_0, unsigned int _local_size_1,
|
||||
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy);
|
||||
unsigned int num_groups_0;
|
||||
@@ -21,15 +21,15 @@ struct gemv_parameters : public base::parameters_type
|
||||
};
|
||||
|
||||
|
||||
class gemv : public base_impl<gemv, gemv_parameters>
|
||||
class reduce_2d : public base_impl<reduce_2d, reduce_2d_parameters>
|
||||
{
|
||||
protected:
|
||||
enum dot_type
|
||||
enum reduce_1d_type
|
||||
{
|
||||
REDUCE_ROWS,
|
||||
REDUCE_COLUMNS
|
||||
};
|
||||
gemv(gemv::parameters_type const & , dot_type, binding_policy_t);
|
||||
reduce_2d(reduce_2d::parameters_type const & , reduce_1d_type, binding_policy_t);
|
||||
private:
|
||||
virtual int is_invalid_impl(driver::Device const &, math_expression const &) const;
|
||||
unsigned int lmem_usage(math_expression const &) const;
|
||||
@@ -38,21 +38,21 @@ public:
|
||||
virtual std::vector<int_t> input_sizes(math_expression const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const &);
|
||||
private:
|
||||
dot_type dot_type_;
|
||||
reduce_1d_type reduce_1d_type_;
|
||||
};
|
||||
|
||||
class gemv_n : public gemv
|
||||
class reduce_2d_n : public reduce_2d
|
||||
{
|
||||
public:
|
||||
gemv_n(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
reduce_2d_n(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
};
|
||||
|
||||
class gemv_t : public gemv
|
||||
class reduce_2d_t : public reduce_2d
|
||||
{
|
||||
public:
|
||||
gemv_t(gemv::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
reduce_2d_t(reduce_2d::parameters_type const &, binding_policy_t binding_policy = BIND_INDEPENDENT);
|
||||
reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2, fetching_policy_type fetch, binding_policy_t bind = BIND_INDEPENDENT);
|
||||
};
|
||||
|
||||
}
|
@@ -13,7 +13,7 @@ namespace preset
|
||||
{
|
||||
|
||||
|
||||
class gemm
|
||||
class matrix_product
|
||||
{
|
||||
|
||||
public:
|
||||
|
2
lib/external/rapidjson/allocators.h
vendored
2
lib/external/rapidjson/allocators.h
vendored
@@ -115,7 +115,7 @@ public:
|
||||
The user buffer will not be deallocated when this allocator is destructed.
|
||||
|
||||
\param buffer User supplied buffer.
|
||||
\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
|
||||
\param size Size of the buffer in bytes. It must at least larelementwise_2d than sizeof(ChunkHeader).
|
||||
\param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
|
||||
\param baseAllocator The allocator for allocating memory chunks.
|
||||
*/
|
||||
|
18
lib/external/rapidjson/document.h
vendored
18
lib/external/rapidjson/document.h
vendored
@@ -128,7 +128,7 @@ public:
|
||||
typedef typename BaseType::pointer Pointer;
|
||||
//! Reference to (const) GenericMember
|
||||
typedef typename BaseType::reference Reference;
|
||||
//! Signed integer type (e.g. \c ptrdiff_t)
|
||||
//! Signed inteelementwise_2d type (e.g. \c ptrdiff_t)
|
||||
typedef typename BaseType::difference_type DifferenceType;
|
||||
|
||||
//! Default constructor (singular value)
|
||||
@@ -265,7 +265,7 @@ struct GenericStringRef {
|
||||
|
||||
\tparam N length of the string, automatically inferred
|
||||
|
||||
\param str Constant character array, lifetime assumed to be longer
|
||||
\param str Constant character array, lifetime assumed to be lonelementwise_2d
|
||||
than the use of the string in e.g. a GenericValue
|
||||
|
||||
\post \ref s == str
|
||||
@@ -289,7 +289,7 @@ struct GenericStringRef {
|
||||
|
||||
\see StringRef(const CharType*)
|
||||
|
||||
\param str Constant character pointer, lifetime assumed to be longer
|
||||
\param str Constant character pointer, lifetime assumed to be lonelementwise_2d
|
||||
than the use of the string in e.g. a GenericValue
|
||||
|
||||
\post \ref s == str
|
||||
@@ -305,7 +305,7 @@ struct GenericStringRef {
|
||||
: s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != NULL); }
|
||||
|
||||
//! Create constant string reference from pointer and length
|
||||
/*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
|
||||
/*! \param str constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
|
||||
\param len length of the string, excluding the trailing NULL terminator
|
||||
|
||||
\post \ref s == str && \ref length == len
|
||||
@@ -334,7 +334,7 @@ private:
|
||||
value in a JSON GenericValue object, if the string's lifetime is known
|
||||
to be valid long enough.
|
||||
\tparam CharType Character type of the string
|
||||
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
|
||||
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
|
||||
\return GenericStringRef string reference object
|
||||
\relatesalso GenericStringRef
|
||||
|
||||
@@ -355,7 +355,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str) {
|
||||
supports string containing null characters.
|
||||
|
||||
\tparam CharType character type of the string
|
||||
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
|
||||
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
|
||||
\param length The length of source string.
|
||||
\return GenericStringRef string reference object
|
||||
\relatesalso GenericStringRef
|
||||
@@ -373,7 +373,7 @@ inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length)
|
||||
to be valid long enough.
|
||||
|
||||
\tparam CharType character type of the string
|
||||
\param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
|
||||
\param str Constant string, lifetime assumed to be lonelementwise_2d than the use of the string in e.g. a GenericValue
|
||||
\return GenericStringRef string reference object
|
||||
\relatesalso GenericStringRef
|
||||
\note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
|
||||
@@ -696,7 +696,7 @@ public:
|
||||
|
||||
case kNumberType:
|
||||
if (IsDouble() || rhs.IsDouble())
|
||||
return GetDouble() == rhs.GetDouble(); // May convert one operand from integer to double.
|
||||
return GetDouble() == rhs.GetDouble(); // May convert one operand from inteelementwise_2d to double.
|
||||
else
|
||||
return data_.n.u64 == rhs.data_.n.u64;
|
||||
|
||||
@@ -1482,7 +1482,7 @@ private:
|
||||
inline SizeType GetLength() const { return (SizeType)(MaxSize - str[LenPos]); }
|
||||
}; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
|
||||
|
||||
// By using proper binary layout, retrieval of different integer types do not need conversions.
|
||||
// By using proper binary layout, retrieval of different inteelementwise_2d types do not need conversions.
|
||||
union Number {
|
||||
#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
|
||||
struct I {
|
||||
|
2
lib/external/rapidjson/internal/dtoa.h
vendored
2
lib/external/rapidjson/internal/dtoa.h
vendored
@@ -20,7 +20,7 @@
|
||||
|
||||
// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
|
||||
// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
|
||||
// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
|
||||
// inteelementwise_2ds." ACM Sigplan Notices 45.6 (2010): 233-243.
|
||||
|
||||
#ifndef RAPIDJSON_DTOA_
|
||||
#define RAPIDJSON_DTOA_
|
||||
|
2
lib/external/rapidjson/internal/pow10.h
vendored
2
lib/external/rapidjson/internal/pow10.h
vendored
@@ -24,7 +24,7 @@
|
||||
namespace rapidjson {
|
||||
namespace internal {
|
||||
|
||||
//! Computes integer powers of 10 in double (10.0^n).
|
||||
//! Computes inteelementwise_2d powers of 10 in double (10.0^n).
|
||||
/*! This function uses lookup table for fast and accurate results.
|
||||
\param n non-negative exponent. Must <= 308.
|
||||
\return 10.0^n
|
||||
|
8
lib/external/rapidjson/rapidjson.h
vendored
8
lib/external/rapidjson/rapidjson.h
vendored
@@ -53,9 +53,9 @@
|
||||
|
||||
/*! \def RAPIDJSON_NO_INT64DEFINE
|
||||
\ingroup RAPIDJSON_CONFIG
|
||||
\brief Use external 64-bit integer types.
|
||||
\brief Use external 64-bit inteelementwise_2d types.
|
||||
|
||||
RapidJSON requires the 64-bit integer types \c int64_t and \c uint64_t types
|
||||
RapidJSON requires the 64-bit inteelementwise_2d types \c int64_t and \c uint64_t types
|
||||
to be available at global scope.
|
||||
|
||||
If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to
|
||||
@@ -171,11 +171,11 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// RAPIDJSON_UINT64_C2
|
||||
|
||||
//! Construct a 64-bit literal by a pair of 32-bit integer.
|
||||
//! Construct a 64-bit literal by a pair of 32-bit inteelementwise_2d.
|
||||
/*!
|
||||
64-bit literal with or without ULL suffix is prone to compiler warnings.
|
||||
UINT64_C() is C macro which cause compilation problems.
|
||||
Use this macro to define 64-bit constants by a pair of 32-bit integer.
|
||||
Use this macro to define 64-bit constants by a pair of 32-bit inteelementwise_2d.
|
||||
*/
|
||||
#ifndef RAPIDJSON_UINT64_C2
|
||||
#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))
|
||||
|
2
lib/external/rapidjson/reader.h
vendored
2
lib/external/rapidjson/reader.h
vendored
@@ -792,7 +792,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// Force double for big integer
|
||||
// Force double for big inteelementwise_2d
|
||||
if (useDouble) {
|
||||
while (s.Peek() >= '0' && s.Peek() <= '9') {
|
||||
if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
|
||||
|
@@ -117,23 +117,23 @@ std::string binary_leaf::evaluate_recursive(leaf_t leaf, std::map<std::string, s
|
||||
}
|
||||
|
||||
|
||||
mapped_gemm::mapped_gemm(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "gemm"), binary_leaf(info) { }
|
||||
mapped_matrix_product::mapped_matrix_product(std::string const & scalartype, unsigned int id, node_info info) : mapped_object(scalartype, id, "matrix_product"), binary_leaf(info) { }
|
||||
|
||||
//
|
||||
mapped_dot::mapped_dot(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) :
|
||||
mapped_reduce::mapped_reduce(std::string const & scalartype, unsigned int id, node_info info, std::string const & type_key) :
|
||||
mapped_object(scalartype, id, type_key), binary_leaf(info)
|
||||
{ }
|
||||
|
||||
size_t mapped_dot::root_idx() const
|
||||
size_t mapped_reduce::root_idx() const
|
||||
{ return info_.root_idx; }
|
||||
|
||||
isaac::math_expression const & mapped_dot::math_expression() const
|
||||
isaac::math_expression const & mapped_reduce::math_expression() const
|
||||
{ return *info_.math_expression; }
|
||||
|
||||
math_expression::node mapped_dot::root_node() const
|
||||
math_expression::node mapped_reduce::root_node() const
|
||||
{ return math_expression().tree()[root_idx()]; }
|
||||
|
||||
bool mapped_dot::is_index_dot() const
|
||||
bool mapped_reduce::is_index_reduction() const
|
||||
{
|
||||
op_element const & op = root_op();
|
||||
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
|
||||
@@ -142,17 +142,17 @@ bool mapped_dot::is_index_dot() const
|
||||
|| op.type==OPERATOR_ELEMENT_ARGMIN_TYPE;
|
||||
}
|
||||
|
||||
op_element mapped_dot::root_op() const
|
||||
op_element mapped_reduce::root_op() const
|
||||
{
|
||||
return info_.math_expression->tree()[info_.root_idx].op;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
mapped_scalar_dot::mapped_scalar_dot(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "scalar_dot"){ }
|
||||
mapped_reduce_1d::mapped_reduce_1d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "scalar_reduce_1d"){ }
|
||||
|
||||
//
|
||||
mapped_gemv::mapped_gemv(std::string const & scalartype, unsigned int id, node_info info) : mapped_dot(scalartype, id, info, "gemv") { }
|
||||
mapped_reduce_2d::mapped_reduce_2d(std::string const & scalartype, unsigned int id, node_info info) : mapped_reduce(scalartype, id, info, "reduce_2d") { }
|
||||
|
||||
//
|
||||
void mapped_host_scalar::preprocess(std::string & str) const
|
||||
|
@@ -14,12 +14,12 @@ namespace detail
|
||||
|
||||
|
||||
|
||||
bool is_scalar_dot(math_expression::node const & node)
|
||||
bool is_scalar_reduce_1d(math_expression::node const & node)
|
||||
{
|
||||
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY;
|
||||
}
|
||||
|
||||
bool is_vector_dot(math_expression::node const & node)
|
||||
bool is_vector_reduce_1d(math_expression::node const & node)
|
||||
{
|
||||
return node.op.type_family==OPERATOR_ROWS_DOT_TYPE_FAMILY
|
||||
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY;
|
||||
|
@@ -5,11 +5,11 @@
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/tuple.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
#include "isaac/kernels/templates/axpy.h"
|
||||
#include "isaac/kernels/templates/dot.h"
|
||||
#include "isaac/kernels/templates/ger.h"
|
||||
#include "isaac/kernels/templates/gemv.h"
|
||||
#include "isaac/kernels/templates/gemm.h"
|
||||
#include "isaac/kernels/templates/elementwise_1d.h"
|
||||
#include "isaac/kernels/templates/reduce_1d.h"
|
||||
#include "isaac/kernels/templates/elementwise_2d.h"
|
||||
#include "isaac/kernels/templates/reduce_2d.h"
|
||||
#include "isaac/kernels/templates/matrix_product.h"
|
||||
#include "isaac/kernels/templates/base.h"
|
||||
#include "isaac/kernels/parse.h"
|
||||
#include "isaac/exception/unknown_datatype.h"
|
||||
@@ -150,11 +150,11 @@ int base_impl<TType, PType>::is_invalid(math_expression const & expressions, dr
|
||||
return is_invalid_impl(device, expressions);
|
||||
}
|
||||
|
||||
template class base_impl<axpy, axpy_parameters>;
|
||||
template class base_impl<dot, dot_parameters>;
|
||||
template class base_impl<ger, ger_parameters>;
|
||||
template class base_impl<gemv, gemv_parameters>;
|
||||
template class base_impl<gemm, gemm_parameters>;
|
||||
template class base_impl<elementwise_1d, elementwise_1d_parameters>;
|
||||
template class base_impl<reduce_1d, reduce_1d_parameters>;
|
||||
template class base_impl<elementwise_2d, elementwise_2d_parameters>;
|
||||
template class base_impl<reduce_2d, reduce_2d_parameters>;
|
||||
template class base_impl<matrix_product, matrix_product_parameters>;
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
|
||||
#include "isaac/kernels/templates/axpy.h"
|
||||
#include "isaac/kernels/templates/elementwise_1d.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
#include "isaac/driver/backend.h"
|
||||
|
||||
@@ -18,7 +18,7 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
axpy_parameters::axpy_parameters(unsigned int _simd_width,
|
||||
elementwise_1d_parameters::elementwise_1d_parameters(unsigned int _simd_width,
|
||||
unsigned int _group_size, unsigned int _num_groups,
|
||||
fetching_policy_type _fetching_policy) :
|
||||
base::parameters_type(_simd_width, _group_size, 1, 1), num_groups(_num_groups), fetching_policy(_fetching_policy)
|
||||
@@ -26,14 +26,14 @@ axpy_parameters::axpy_parameters(unsigned int _simd_width,
|
||||
}
|
||||
|
||||
|
||||
int axpy::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
int elementwise_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
{
|
||||
if (p_.fetching_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string axpy::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
|
||||
std::string elementwise_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
|
||||
{
|
||||
driver::backend_type backend = device.backend();
|
||||
std::string _size_t = size_type(device);
|
||||
@@ -55,7 +55,7 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << "axpy" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
|
||||
stream << KernelPrefix(backend) << " void " << "elementwise_1d" << suffix << "(" << _size_t << " N," << generate_arguments(dtype, device, mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -174,23 +174,23 @@ std::string axpy::generate_impl(std::string const & suffix, math_expression cons
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
axpy::axpy(axpy_parameters const & parameters,
|
||||
elementwise_1d::elementwise_1d(elementwise_1d_parameters const & parameters,
|
||||
binding_policy_t binding_policy) :
|
||||
base_impl<axpy, axpy_parameters>(parameters, binding_policy)
|
||||
base_impl<elementwise_1d, elementwise_1d_parameters>(parameters, binding_policy)
|
||||
{}
|
||||
|
||||
axpy::axpy(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
elementwise_1d::elementwise_1d(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
fetching_policy_type fetch, binding_policy_t bind):
|
||||
base_impl<axpy, axpy_parameters>(axpy_parameters(simd,ls,ng,fetch), bind)
|
||||
base_impl<elementwise_1d, elementwise_1d_parameters>(elementwise_1d_parameters(simd,ls,ng,fetch), bind)
|
||||
{}
|
||||
|
||||
|
||||
std::vector<int_t> axpy::input_sizes(math_expression const & expressions) const
|
||||
std::vector<int_t> elementwise_1d::input_sizes(math_expression const & expressions) const
|
||||
{
|
||||
return {expressions.shape().max()};
|
||||
}
|
||||
|
||||
void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
void elementwise_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
{
|
||||
math_expression const & expressions = control.x();
|
||||
//Size
|
||||
@@ -202,7 +202,7 @@ void axpy::enqueue(driver::CommandQueue & queue, driver::Program const & program
|
||||
return;
|
||||
}
|
||||
//Kernel
|
||||
std::string name = "axpy";
|
||||
std::string name = "elementwise_1d";
|
||||
name += suffix;
|
||||
driver::Kernel kernel(program, name.c_str());
|
||||
//NDRange
|
@@ -1,6 +1,6 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "isaac/kernels/templates/ger.h"
|
||||
#include "isaac/kernels/templates/elementwise_2d.h"
|
||||
#include "isaac/symbolic/io.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
|
||||
@@ -13,14 +13,14 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
ger_parameters::ger_parameters(unsigned int _simd_width,
|
||||
elementwise_2d_parameters::elementwise_2d_parameters(unsigned int _simd_width,
|
||||
unsigned int _local_size_0, unsigned int _local_size_1,
|
||||
unsigned int _num_groups_0, unsigned int _num_groups_1,
|
||||
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1), num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetching_policy(_fetching_policy){ }
|
||||
|
||||
|
||||
|
||||
int ger::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
int elementwise_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
{
|
||||
if (p_.simd_width>1)
|
||||
return TEMPLATE_INVALID_SIMD_WIDTH;
|
||||
@@ -29,7 +29,7 @@ int ger::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string ger::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
|
||||
std::string elementwise_2d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mappings) const
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
std::string _size_t = size_type(device);
|
||||
@@ -45,7 +45,7 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void axpy" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
|
||||
stream << KernelPrefix(backend) << " void elementwise_1d" << suffix << "(" << _size_t << " M, " << _size_t << " N, " << generate_arguments("#scalartype", device, mappings, expressions) << ")" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
@@ -105,25 +105,25 @@ std::string ger::generate_impl(std::string const & suffix, math_expression const
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
ger::ger(parameters_type const & parameters, binding_policy_t binding_policy) :
|
||||
base_impl<ger, ger_parameters>(parameters, binding_policy){ }
|
||||
elementwise_2d::elementwise_2d(parameters_type const & parameters, binding_policy_t binding_policy) :
|
||||
base_impl<elementwise_2d, elementwise_2d_parameters>(parameters, binding_policy){ }
|
||||
|
||||
ger::ger(unsigned int simd, unsigned int ls1, unsigned int ls2,
|
||||
elementwise_2d::elementwise_2d(unsigned int simd, unsigned int ls1, unsigned int ls2,
|
||||
unsigned int ng1, unsigned int ng2, fetching_policy_type fetch,
|
||||
binding_policy_t bind):
|
||||
base_impl<ger, ger_parameters>(ger_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
|
||||
base_impl<elementwise_2d, elementwise_2d_parameters>(elementwise_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), bind)
|
||||
{}
|
||||
|
||||
std::vector<int_t> ger::input_sizes(math_expression const & expression) const
|
||||
std::vector<int_t> elementwise_2d::input_sizes(math_expression const & expression) const
|
||||
{
|
||||
std::pair<int_t, int_t> size = matrix_size(expression.tree(), lhs_most(expression.tree(), expression.root()));
|
||||
return {size.first, size.second};
|
||||
}
|
||||
|
||||
void ger::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control)
|
||||
void elementwise_2d::enqueue(driver::CommandQueue & /*queue*/, driver::Program const & program, std::string const & suffix, base &, execution_handler const & control)
|
||||
{
|
||||
math_expression const & expressions = control.x();
|
||||
std::string name = "axpy";
|
||||
std::string name = "elementwise_1d";
|
||||
name +=suffix;
|
||||
driver::Kernel kernel(program, name.c_str());
|
||||
driver::NDRange global(p_.local_size_0*p_.num_groups_0, p_.local_size_1*p_.num_groups_1);
|
@@ -1,5 +1,5 @@
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/kernels/templates/gemm.h"
|
||||
#include "isaac/kernels/templates/matrix_product.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
#include "isaac/symbolic/preset.h"
|
||||
#include "isaac/exception/operation_not_supported.h"
|
||||
@@ -15,7 +15,7 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
matrix_product_parameters::matrix_product_parameters(unsigned int simd_width
|
||||
, unsigned int local_size_0, unsigned int KL, unsigned int local_size_1, unsigned int D
|
||||
, unsigned int ms, unsigned int ks, unsigned int ns
|
||||
, fetching_policy_type A_fetching_policy, fetching_policy_type B_fetching_policy
|
||||
@@ -27,7 +27,7 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
||||
}
|
||||
|
||||
|
||||
unsigned int gemm::lmem_usage(math_expression const & expression) const
|
||||
unsigned int matrix_product::lmem_usage(math_expression const & expression) const
|
||||
{
|
||||
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
|
||||
unsigned int N = 0;
|
||||
@@ -36,7 +36,7 @@ unsigned int gemm::lmem_usage(math_expression const & expression) const
|
||||
return N*size_of(numeric_t);
|
||||
}
|
||||
|
||||
unsigned int gemm::registers_usage(math_expression const & expression) const
|
||||
unsigned int matrix_product::registers_usage(math_expression const & expression) const
|
||||
{
|
||||
numeric_type numeric_t = lhs_most(expression.tree(), expression.root()).lhs.dtype;
|
||||
|
||||
@@ -44,7 +44,7 @@ unsigned int gemm::registers_usage(math_expression const & expression) const
|
||||
return N*size_of(numeric_t);
|
||||
}
|
||||
|
||||
unsigned int gemm::temporary_workspace(math_expression const & expressions) const
|
||||
unsigned int matrix_product::temporary_workspace(math_expression const & expressions) const
|
||||
{
|
||||
std::vector<int_t> MNK = input_sizes(expressions);
|
||||
int_t M = MNK[0]; int_t N = MNK[1];
|
||||
@@ -53,7 +53,7 @@ unsigned int gemm::temporary_workspace(math_expression const & expressions) cons
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
int matrix_product::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
{
|
||||
// if(device.vendor()==driver::Device::Vendor::NVIDIA && p_.simd_width > 1)
|
||||
// return TEMPLATE_INVALID_SIMD_WIDTH;
|
||||
@@ -103,7 +103,7 @@ int gemm::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
std::string gemm::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const
|
||||
std::string matrix_product::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const &) const
|
||||
{
|
||||
using std::string;
|
||||
using tools::to_string;
|
||||
@@ -132,10 +132,10 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
|
||||
//////////////////
|
||||
/// DECLARATIONS
|
||||
/// //////////////
|
||||
std::string gemm_name = "gemm";
|
||||
std::string matrix_product_name = "matrix_product";
|
||||
std::string reduce_name = "reduce";
|
||||
|
||||
gemm_name += suffix;
|
||||
matrix_product_name += suffix;
|
||||
reduce_name += suffix;
|
||||
|
||||
switch(backend)
|
||||
@@ -146,7 +146,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break;
|
||||
}
|
||||
|
||||
stream << KernelPrefix(backend) << " void " << gemm_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
|
||||
stream << KernelPrefix(backend) << " void " << matrix_product_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, "
|
||||
<< Global(backend) << " " << sdtype << "* C, " << _size_t << " ldc," << _size_t << " offc," << _size_t << " Cstride1, "
|
||||
<< sdtype << " alpha,"
|
||||
<< Global(backend) << " " << sdtype << "* A, " << _size_t << " lda," << _size_t << " offa," << _size_t << " Astride1,"
|
||||
@@ -572,7 +572,7 @@ std::string gemm::generate_impl(std::string const & suffix, math_expression cons
|
||||
#undef VST0RE
|
||||
}
|
||||
|
||||
void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K,
|
||||
void matrix_product::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int_t K,
|
||||
array_base const & A, array_base const & B, array_base const & C,
|
||||
value_scalar const & alpha, value_scalar const & beta,
|
||||
driver::Program const & program, std::string const & suffix, execution_options_type const & options)
|
||||
@@ -582,53 +582,53 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
|
||||
if(M==0 || N==0 || K==0)
|
||||
return;
|
||||
|
||||
std::string gemm_name = "gemm";
|
||||
std::string matrix_product_name = "matrix_product";
|
||||
std::string reduce_name = "reduce";
|
||||
|
||||
gemm_name += suffix;
|
||||
matrix_product_name += suffix;
|
||||
reduce_name += suffix;
|
||||
|
||||
driver::Kernel gemm(program, gemm_name.c_str());
|
||||
driver::Kernel matrix_product(program, matrix_product_name.c_str());
|
||||
driver::NDRange local(p_.local_size_0, p_.local_size_1, 1);
|
||||
driver::NDRange global(align(align(M,p_.mS)/p_.mS, p_.local_size_0), align(align(N,p_.nS)/p_.nS, p_.local_size_1), p_.depth);
|
||||
|
||||
unsigned int current_arg = 0;
|
||||
bind_independent binder;
|
||||
set_arguments_functor helper(binder, current_arg, gemm);
|
||||
set_arguments_functor helper(binder, current_arg, matrix_product);
|
||||
|
||||
driver::Buffer& workspace = driver::backend::workspaces::get(options.queue(C.context()));
|
||||
gemm.setSizeArg(current_arg++, M);
|
||||
gemm.setSizeArg(current_arg++, N);
|
||||
gemm.setSizeArg(current_arg++, K);
|
||||
matrix_product.setSizeArg(current_arg++, M);
|
||||
matrix_product.setSizeArg(current_arg++, N);
|
||||
matrix_product.setSizeArg(current_arg++, K);
|
||||
if(p_.depth==1)
|
||||
{
|
||||
gemm.setArg(current_arg++,C.data());
|
||||
gemm.setSizeArg(current_arg++, C.stride()[1]);
|
||||
gemm.setSizeArg(current_arg++, C.start());
|
||||
gemm.setSizeArg(current_arg++, C.stride()[0]);
|
||||
matrix_product.setArg(current_arg++,C.data());
|
||||
matrix_product.setSizeArg(current_arg++, C.stride()[1]);
|
||||
matrix_product.setSizeArg(current_arg++, C.start());
|
||||
matrix_product.setSizeArg(current_arg++, C.stride()[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemm.setArg(current_arg++, workspace);
|
||||
gemm.setSizeArg(current_arg++, M);
|
||||
gemm.setSizeArg(current_arg++, 0);
|
||||
gemm.setSizeArg(current_arg++, 1);
|
||||
matrix_product.setArg(current_arg++, workspace);
|
||||
matrix_product.setSizeArg(current_arg++, M);
|
||||
matrix_product.setSizeArg(current_arg++, 0);
|
||||
matrix_product.setSizeArg(current_arg++, 1);
|
||||
}
|
||||
|
||||
|
||||
helper.set_arguments(alpha.dtype(), alpha.values());
|
||||
gemm.setArg(current_arg++, A.data());
|
||||
gemm.setSizeArg(current_arg++, A.stride()[1]);
|
||||
gemm.setSizeArg(current_arg++, A.start());
|
||||
gemm.setSizeArg(current_arg++, A.stride()[0]);
|
||||
matrix_product.setArg(current_arg++, A.data());
|
||||
matrix_product.setSizeArg(current_arg++, A.stride()[1]);
|
||||
matrix_product.setSizeArg(current_arg++, A.start());
|
||||
matrix_product.setSizeArg(current_arg++, A.stride()[0]);
|
||||
|
||||
gemm.setArg(current_arg++, B.data());
|
||||
gemm.setSizeArg(current_arg++, B.stride()[1]);
|
||||
gemm.setSizeArg(current_arg++, B.start());
|
||||
gemm.setSizeArg(current_arg++, B.stride()[0]);
|
||||
matrix_product.setArg(current_arg++, B.data());
|
||||
matrix_product.setSizeArg(current_arg++, B.stride()[1]);
|
||||
matrix_product.setSizeArg(current_arg++, B.start());
|
||||
matrix_product.setSizeArg(current_arg++, B.stride()[0]);
|
||||
|
||||
helper.set_arguments(beta.dtype(), beta.values());
|
||||
options.enqueue(program.context(), gemm, global, local);
|
||||
options.enqueue(program.context(), matrix_product, global, local);
|
||||
|
||||
if(p_.depth > 1)
|
||||
{
|
||||
@@ -652,18 +652,18 @@ void gemm::enqueue_block(driver::CommandQueue & /*queue*/, int_t M, int_t N, int
|
||||
|
||||
}
|
||||
|
||||
std::vector<int_t> gemm::infos(math_expression const & expression, symbolic::preset::gemm::args& arguments) const
|
||||
std::vector<int_t> matrix_product::infos(math_expression const & expression, symbolic::preset::matrix_product::args& arguments) const
|
||||
{
|
||||
math_expression::container_type const & array = expression.tree();
|
||||
std::size_t root = expression.root();
|
||||
arguments = symbolic::preset::gemm::check(array, root);
|
||||
arguments = symbolic::preset::matrix_product::check(array, root);
|
||||
int_t M = arguments.C->array->shape()[0];
|
||||
int_t N = arguments.C->array->shape()[1];
|
||||
int_t K = (A_trans_=='T')?arguments.A->array->shape()[0]:arguments.A->array->shape()[1];
|
||||
return {M, N, K};
|
||||
}
|
||||
|
||||
gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<gemm, gemm_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
|
||||
matrix_product::matrix_product(matrix_product_parameters const & parameters, bool check_bounds, char A_trans, char B_trans) : base_impl<matrix_product, matrix_product_parameters>(parameters, BIND_INDEPENDENT), A_trans_(A_trans), B_trans_(B_trans), check_bounds_(check_bounds)
|
||||
{
|
||||
if(A_trans_=='N' && B_trans_=='N') type_ = GEMM_NN_TYPE;
|
||||
else if(A_trans_=='T' && B_trans_=='N') type_ = GEMM_TN_TYPE;
|
||||
@@ -672,21 +672,21 @@ gemm::gemm(gemm_parameters const & parameters, bool check_bounds, char A_trans,
|
||||
else throw;
|
||||
}
|
||||
|
||||
std::vector<int_t> gemm::input_sizes(math_expression const & expressions) const
|
||||
std::vector<int_t> matrix_product::input_sizes(math_expression const & expressions) const
|
||||
{
|
||||
symbolic::preset::gemm::args dummy;
|
||||
symbolic::preset::matrix_product::args dummy;
|
||||
return infos((math_expression&)expressions, dummy);
|
||||
}
|
||||
|
||||
void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control)
|
||||
void matrix_product::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback_base, execution_handler const & control)
|
||||
{
|
||||
using namespace tools;
|
||||
|
||||
gemm & fallback = (gemm&)fallback_base;
|
||||
matrix_product & fallback = (matrix_product&)fallback_base;
|
||||
math_expression const & expressions = control.x();
|
||||
|
||||
|
||||
symbolic::preset::gemm::args args;
|
||||
symbolic::preset::matrix_product::args args;
|
||||
std::vector<int_t> MNK = infos(expressions, args);
|
||||
|
||||
int_t M = MNK[0];
|
||||
@@ -720,40 +720,40 @@ void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program
|
||||
}
|
||||
|
||||
//
|
||||
gemm_nn::gemm_nn(unsigned int simd
|
||||
matrix_product_nn::matrix_product_nn(unsigned int simd
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'N')
|
||||
{
|
||||
}
|
||||
|
||||
//
|
||||
gemm_tn::gemm_tn(unsigned int simd
|
||||
matrix_product_tn::matrix_product_tn(unsigned int simd
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'N')
|
||||
{ }
|
||||
|
||||
//
|
||||
gemm_nt::gemm_nt(unsigned int simd
|
||||
matrix_product_nt::matrix_product_nt(unsigned int simd
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'N', 'T')
|
||||
{ }
|
||||
|
||||
//
|
||||
gemm_tt::gemm_tt(unsigned int simd
|
||||
matrix_product_tt::matrix_product_tt(unsigned int simd
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetching_policy_type Afetch , fetching_policy_type Bfetch
|
||||
, int_t lfetch0, int_t lfetch1, bool check_bound) :
|
||||
gemm(gemm_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
|
||||
matrix_product(matrix_product_parameters(simd, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lfetch0, lfetch1), check_bound, 'T', 'T')
|
||||
{ }
|
||||
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "isaac/kernels/templates/dot.h"
|
||||
#include "isaac/kernels/templates/reduce_1d.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
|
||||
#include "tools/loop.hpp"
|
||||
@@ -15,25 +15,25 @@ namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
dot_parameters::dot_parameters(unsigned int _simd_width,
|
||||
reduce_1d_parameters::reduce_1d_parameters(unsigned int _simd_width,
|
||||
unsigned int _group_size, unsigned int _num_groups,
|
||||
fetching_policy_type _fetching_policy) : base::parameters_type(_simd_width, _group_size, 1, 2), num_groups(_num_groups), fetching_policy(_fetching_policy)
|
||||
{ }
|
||||
|
||||
unsigned int dot::lmem_usage(math_expression const & x) const
|
||||
unsigned int reduce_1d::lmem_usage(math_expression const & x) const
|
||||
{
|
||||
numeric_type numeric_t= lhs_most(x.tree(), x.root()).lhs.dtype;
|
||||
return p_.local_size_0*size_of(numeric_t);
|
||||
}
|
||||
|
||||
int dot::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
int reduce_1d::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
{
|
||||
if (p_.fetching_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_scalar_dot*> exprs,
|
||||
inline void reduce_1d::reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<mapped_reduce_1d*> exprs,
|
||||
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const
|
||||
{
|
||||
stream << "#pragma unroll" << std::endl;
|
||||
@@ -46,25 +46,25 @@ inline void dot::reduce_1d_local_memory(kernel_generation_stream & stream, unsig
|
||||
stream.inc_tab();
|
||||
|
||||
for (auto & expr : exprs)
|
||||
if (expr->is_index_dot())
|
||||
compute_index_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
|
||||
if (expr->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]")
|
||||
, expr->process(buf_value_str+"[lid]"), expr->process(buf_value_str+"[lid+stride]"),
|
||||
expr->root_op());
|
||||
else
|
||||
compute_dot(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
|
||||
compute_reduce_1d(stream, expr->process(buf_str+"[lid]"), expr->process(buf_str+"[lid+stride]"), expr->root_op());
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
}
|
||||
|
||||
std::string dot::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const
|
||||
std::string reduce_1d::generate_impl(std::string const & suffix, math_expression const & expressions, driver::Device const & device, mapping_type const & mapping) const
|
||||
{
|
||||
kernel_generation_stream stream;
|
||||
|
||||
std::vector<mapped_scalar_dot*> exprs;
|
||||
std::vector<mapped_reduce_1d*> exprs;
|
||||
for (mapping_type::const_iterator iit = mapping.begin(); iit != mapping.end(); ++iit)
|
||||
if (mapped_scalar_dot * p = dynamic_cast<mapped_scalar_dot*>(iit->second.get()))
|
||||
if (mapped_reduce_1d * p = dynamic_cast<mapped_reduce_1d*>(iit->second.get()))
|
||||
exprs.push_back(p);
|
||||
std::size_t N = exprs.size();
|
||||
driver::backend_type backend = device.backend();
|
||||
@@ -81,7 +81,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
{
|
||||
numeric_type dtype = lhs_most(exprs[k]->math_expression().tree(), exprs[k]->math_expression().root()).lhs.dtype;
|
||||
std::string sdtype = to_string(dtype);
|
||||
if (exprs[k]->is_index_dot())
|
||||
if (exprs[k]->is_index_reduction())
|
||||
{
|
||||
stream << exprs[k]->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + ");");
|
||||
offset += 4*p_.num_groups;
|
||||
@@ -125,7 +125,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
|
||||
for (unsigned int k = 0; k < N; ++k)
|
||||
{
|
||||
if (exprs[k]->is_index_dot())
|
||||
if (exprs[k]->is_index_reduction())
|
||||
{
|
||||
stream << exprs[k]->process(Local(backend).get() + " #scalartype #name_buf_value[" + tools::to_string(p_.local_size_0) + "];") << std::endl;
|
||||
stream << exprs[k]->process("#scalartype #name_acc_value = " + neutral_element(exprs[k]->root_op(), backend, "#scalartype") + ";") << std::endl;
|
||||
@@ -174,11 +174,11 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
accessors["matrix_diag"] = str[a];
|
||||
accessors["array1"] = "#namereg";
|
||||
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, accessors);
|
||||
if (elem->is_index_dot())
|
||||
compute_index_dot(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
|
||||
if (elem->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, elem->process("#name_acc"), "i*" + tools::to_string(simd_width) + "+"
|
||||
+ tools::to_string(a), elem->process("#name_acc_value"), value,elem->root_op());
|
||||
else
|
||||
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -186,7 +186,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
//Fills local memory
|
||||
for (unsigned int k = 0; k < N; ++k)
|
||||
{
|
||||
if (exprs[k]->is_index_dot())
|
||||
if (exprs[k]->is_index_reduction())
|
||||
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
|
||||
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
|
||||
}
|
||||
@@ -200,7 +200,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
stream.inc_tab();
|
||||
for (unsigned int k = 0; k < N; ++k)
|
||||
{
|
||||
if (exprs[k]->is_index_dot())
|
||||
if (exprs[k]->is_index_reduction())
|
||||
stream << exprs[k]->process("#name_temp_value[gpid] = #name_buf_value[0];") << std::endl;
|
||||
stream << exprs[k]->process("#name_temp[gpid] = #name_buf[0];") << std::endl;
|
||||
}
|
||||
@@ -225,9 +225,9 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
stream << "unsigned int lid = " <<LocalIdx0(backend) << ";" << std::endl;
|
||||
stream << "unsigned int lsize = " <<LocalSize0(backend) << ";" << std::endl;
|
||||
|
||||
for (mapped_scalar_dot* e: exprs)
|
||||
for (mapped_reduce_1d* e: exprs)
|
||||
{
|
||||
if (e->is_index_dot())
|
||||
if (e->is_index_reduction())
|
||||
{
|
||||
stream << e->process(Local(backend).get() + " unsigned int #name_buf[" + tools::to_string(p_.local_size_0) + "];");
|
||||
stream << e->process("unsigned int #name_acc = 0;") << std::endl;
|
||||
@@ -244,18 +244,18 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
stream << "for(unsigned int i = lid; i < " << p_.num_groups << "; i += lsize)" << std::endl;
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
for (mapped_scalar_dot* e: exprs)
|
||||
if (e->is_index_dot())
|
||||
compute_index_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op());
|
||||
for (mapped_reduce_1d* e: exprs)
|
||||
if (e->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->process("#name_acc_value"),e->process("#name_temp_value[i]"),e->root_op());
|
||||
else
|
||||
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op());
|
||||
compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[i]"), e->root_op());
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
for (unsigned int k = 0; k < N; ++k)
|
||||
{
|
||||
if (exprs[k]->is_index_dot())
|
||||
if (exprs[k]->is_index_reduction())
|
||||
stream << exprs[k]->process("#name_buf_value[lid] = #name_acc_value;") << std::endl;
|
||||
stream << exprs[k]->process("#name_buf[lid] = #name_acc;") << std::endl;
|
||||
}
|
||||
@@ -268,7 +268,7 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
std::map<std::string, std::string> accessors;
|
||||
accessors["scalar_dot"] = "#name_buf[0]";
|
||||
accessors["scalar_reduce_1d"] = "#name_buf[0]";
|
||||
accessors["array1"] = "#pointer[#start]";
|
||||
accessors["array11"] = "#pointer[#start]";
|
||||
stream << evaluate(PARENT_NODE_TYPE, accessors, expressions, expressions.root(), mapping) << ";" << std::endl;
|
||||
@@ -283,23 +283,23 @@ std::string dot::generate_impl(std::string const & suffix, math_expression const
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
dot::dot(dot::parameters_type const & parameters,
|
||||
binding_policy_t binding) : base_impl<dot, dot_parameters>(parameters, binding)
|
||||
reduce_1d::reduce_1d(reduce_1d::parameters_type const & parameters,
|
||||
binding_policy_t binding) : base_impl<reduce_1d, reduce_1d_parameters>(parameters, binding)
|
||||
{ }
|
||||
|
||||
dot::dot(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
reduce_1d::reduce_1d(unsigned int simd, unsigned int ls, unsigned int ng,
|
||||
fetching_policy_type fetch, binding_policy_t bind):
|
||||
base_impl<dot, dot_parameters>(dot_parameters(simd,ls,ng,fetch), bind)
|
||||
base_impl<reduce_1d, reduce_1d_parameters>(reduce_1d_parameters(simd,ls,ng,fetch), bind)
|
||||
{}
|
||||
|
||||
std::vector<int_t> dot::input_sizes(math_expression const & x) const
|
||||
std::vector<int_t> reduce_1d::input_sizes(math_expression const & x) const
|
||||
{
|
||||
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false);
|
||||
int_t N = vector_size(lhs_most(x.tree(), dots_idx[0]));
|
||||
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
|
||||
int_t N = vector_size(lhs_most(x.tree(), reduce_1ds_idx[0]));
|
||||
return {N};
|
||||
}
|
||||
|
||||
void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
void reduce_1d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
{
|
||||
math_expression const & x = control.x();
|
||||
|
||||
@@ -313,10 +313,10 @@ void dot::enqueue(driver::CommandQueue & queue, driver::Program const & program,
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<math_expression::node const *> dots;
|
||||
std::vector<size_t> dots_idx = filter_nodes(&is_dot, x, x.root(), false);
|
||||
for (size_t idx: dots_idx)
|
||||
dots.push_back(&x.tree()[idx]);
|
||||
std::vector<math_expression::node const *> reduce_1ds;
|
||||
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, x, x.root(), false);
|
||||
for (size_t idx: reduce_1ds_idx)
|
||||
reduce_1ds.push_back(&x.tree()[idx]);
|
||||
|
||||
//Kernel
|
||||
std::string name[2] = {"prod", "reduce"};
|
@@ -2,7 +2,7 @@
|
||||
#include <iostream>
|
||||
#include "isaac/kernels/stream.h"
|
||||
#include "isaac/kernels/keywords.h"
|
||||
#include "isaac/kernels/templates/gemv.h"
|
||||
#include "isaac/kernels/templates/reduce_2d.h"
|
||||
|
||||
#include "tools/arguments.hpp"
|
||||
#include "tools/loop.hpp"
|
||||
@@ -16,33 +16,33 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
gemv_parameters::gemv_parameters(unsigned int _simd_width,
|
||||
reduce_2d_parameters::reduce_2d_parameters(unsigned int _simd_width,
|
||||
unsigned int _local_size_0, unsigned int _local_size_1,
|
||||
unsigned int _num_groups_0, unsigned int _num_groups_1, fetching_policy_type _fetch_policy): base::parameters_type(_simd_width, _local_size_0, _local_size_1, 1),
|
||||
num_groups_0(_num_groups_0), num_groups_1(_num_groups_1), fetch_policy(_fetch_policy) { }
|
||||
|
||||
|
||||
int gemv::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
int reduce_2d::is_invalid_impl(driver::Device const &, math_expression const &) const
|
||||
{
|
||||
if (p_.fetch_policy==FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
|
||||
unsigned int gemv::lmem_usage(const math_expression&) const
|
||||
unsigned int reduce_2d::lmem_usage(const math_expression&) const
|
||||
{
|
||||
return (p_.local_size_0+1)*p_.local_size_1;
|
||||
}
|
||||
|
||||
std::string gemv::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const
|
||||
std::string reduce_2d::generate_impl(std::string const & suffix, math_expression const & expression, driver::Device const & device, mapping_type const & mapping) const
|
||||
{
|
||||
using tools::to_string;
|
||||
|
||||
|
||||
std::vector<mapped_gemv*> dots;
|
||||
std::vector<size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false);
|
||||
std::vector<mapped_reduce_2d*> reduce_1ds;
|
||||
std::vector<size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
|
||||
for (auto & elem : idx)
|
||||
dots.push_back((mapped_gemv*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
|
||||
reduce_1ds.push_back((mapped_reduce_2d*)(mapping.at(mapping_key(elem, PARENT_NODE_TYPE)).get()));
|
||||
|
||||
kernel_generation_stream stream;
|
||||
driver::backend_type backend = device.backend();
|
||||
@@ -55,11 +55,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
auto unroll_tmp = [&]()
|
||||
{
|
||||
unsigned int offset = 0;
|
||||
for (const auto & e : dots)
|
||||
for (const auto & e : reduce_1ds)
|
||||
{
|
||||
numeric_type dtype = lhs_most(e->math_expression().tree(), e->math_expression().root()).lhs.dtype;
|
||||
std::string sdtype = to_string(dtype);
|
||||
if (e->is_index_dot())
|
||||
if (e->is_index_reduction())
|
||||
{
|
||||
stream << e->process("uint* #name_temp = (uint*)(tmp + " + tools::to_string(offset) + "*M);");
|
||||
offset += 4*p_.num_groups_0;
|
||||
@@ -73,7 +73,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
}
|
||||
};
|
||||
|
||||
int col_simd_width = (dot_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
|
||||
int col_simd_width = (reduce_1d_type_ == REDUCE_COLUMNS) ? 1 : p_.simd_width;
|
||||
switch(backend)
|
||||
{
|
||||
case driver::CUDA:
|
||||
@@ -96,7 +96,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
unsigned int local_size_0_ld = p_.local_size_0;
|
||||
std::string local_size_0_ld_str = to_string(local_size_0_ld);
|
||||
|
||||
for (const auto & e : dots)
|
||||
for (const auto & e : reduce_1ds)
|
||||
stream << e->process(Local(backend).get() + " " + append_width("#scalartype", col_simd_width) + " #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
|
||||
|
||||
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "*" << col_simd_width << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1*col_simd_width << "; r += " << GlobalSize1(backend) << "*" << col_simd_width << ")" << std::endl;
|
||||
@@ -106,7 +106,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "" << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
|
||||
stream << "" << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
|
||||
|
||||
for (const auto & e : dots){
|
||||
for (const auto & e : reduce_1ds){
|
||||
std::string data_type = append_width("#scalartype",col_simd_width);
|
||||
|
||||
stream << e->process(data_type + " #name_acc = " + InitPrefix(backend, data_type).get() + "(" + neutral_element((e)->root_op(), backend, "#scalartype") + ");") << std::endl;
|
||||
@@ -116,14 +116,14 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
element_wise_loop_1D(stream, p_.fetch_policy, (dot_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width)
|
||||
element_wise_loop_1D(stream, p_.fetch_policy, (reduce_1d_type_==REDUCE_COLUMNS)?p_.simd_width:1, "c", "N", GlobalIdx0(backend).get(), GlobalSize0(backend).get(), device, [&](unsigned int row_simd_width)
|
||||
{
|
||||
|
||||
std::set<std::string> already_fetched;
|
||||
for (const auto & e : dots)
|
||||
for (const auto & e : reduce_1ds)
|
||||
{
|
||||
std::map<std::string, std::string> accessors;
|
||||
if(dot_type_==REDUCE_COLUMNS)
|
||||
if(reduce_1d_type_==REDUCE_COLUMNS)
|
||||
{
|
||||
std::string data_type = append_width("#scalartype",row_simd_width);
|
||||
accessors["arraynn"] = data_type + " #namereg = " + vload(row_simd_width, "#scalartype", "c*#stride", "#pointer + r*#ld", "1", backend,false)+";";
|
||||
@@ -147,20 +147,20 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
str[a] = access_vector_type("#namereg",a);
|
||||
|
||||
|
||||
for (auto & elem : dots)
|
||||
for (auto & elem : reduce_1ds)
|
||||
for (unsigned int a = 0; a < row_simd_width; ++a)
|
||||
{
|
||||
std::string value = elem->evaluate_recursive(LHS_NODE_TYPE, {{"arraynn", str[a]}, {"repeat", str[a]}, {"array1", "#namereg"}});
|
||||
if (elem->is_index_dot())
|
||||
compute_index_dot(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op());
|
||||
if (elem->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, elem->process("#name_acc"), "c*"+to_string(row_simd_width) + to_string(a), elem->process("#name_acc_value"), value, elem->root_op());
|
||||
else
|
||||
compute_dot(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
compute_reduce_1d(stream, elem->process("#name_acc"), value,elem->root_op());
|
||||
}
|
||||
});
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
for (auto & expr : dots)
|
||||
for (auto & expr : reduce_1ds)
|
||||
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
|
||||
|
||||
stream << "#pragma unroll" << std::endl;
|
||||
@@ -173,13 +173,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (auto & e : dots)
|
||||
if (e->is_index_dot())
|
||||
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
for (auto & e : reduce_1ds)
|
||||
if (e->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
, e->root_op());
|
||||
else
|
||||
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
|
||||
compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -196,9 +196,9 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
std::map<std::string, std::string> accessors;
|
||||
for(int s = 0 ; s < col_simd_width ; ++s)
|
||||
{
|
||||
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
|
||||
accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
|
||||
if(col_simd_width > 1)
|
||||
accessors["gemv"] = access_vector_type(accessors["gemv"], s);
|
||||
accessors["reduce_2d"] = access_vector_type(accessors["reduce_2d"], s);
|
||||
accessors["arrayn"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
|
||||
accessors["array1n"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
|
||||
accessors["arrayn1"] = "#pointer[(r +" + to_string(s) + ")*#stride]";
|
||||
@@ -207,11 +207,11 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
}
|
||||
else
|
||||
{
|
||||
for (mapped_dot const * e : dots)
|
||||
for (mapped_reduce const * e : reduce_1ds)
|
||||
{
|
||||
if(col_simd_width > 1)
|
||||
stream << "if(M - r > " << col_simd_width << "){" << std::endl;
|
||||
if (e->is_index_dot())
|
||||
if (e->is_index_reduction())
|
||||
stream << e->process(vstore(col_simd_width,"uint", "#name_buf_value[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp_value + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
|
||||
stream << e->process(vstore(col_simd_width,"#scalartype", "#name_buf[lidy*" + local_size_0_ld_str + "]", "0", "#name_temp + r + M*" + GroupIdx0(backend).get(), "1", backend, false)) << ";" << std::endl;
|
||||
if(col_simd_width > 1)
|
||||
@@ -220,7 +220,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "else{" << std::endl;
|
||||
stream.inc_tab();
|
||||
for(int s = 0 ; s < col_simd_width ; ++s){
|
||||
if (e->is_index_dot())
|
||||
if (e->is_index_reduction())
|
||||
stream << "if(r + " << s << "< M) " << e->process("#name_temp_value[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf_value[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
|
||||
stream << "if(r + " << s << "< M) " << e->process("#name_temp[r + " + to_string(s) + " + M*" + GroupIdx0(backend).get() + "] = " + access_vector_type("#name_buf[lidy*" + local_size_0_ld_str + "]", s)) << ";" << std::endl;
|
||||
}
|
||||
@@ -262,7 +262,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
{"arrayn1", "#pointer += #start;"},
|
||||
{"arraynn", "#pointer += #start; "}}, expression, mapping);
|
||||
|
||||
for (const auto & e : dots)
|
||||
for (const auto & e : reduce_1ds)
|
||||
stream << e->process(Local(backend).get() + " #scalartype #name_buf[" + to_string(p_.local_size_1*local_size_0_ld) + "];") << std::endl;
|
||||
|
||||
stream << "for(" << _size_t << " r = " << GlobalIdx1(backend) << "; r < (M +" << p_.local_size_1 - 1 << ")/" << p_.local_size_1 << "*" << p_.local_size_1 << "; r += " << GlobalSize1(backend) << "){" << std::endl;
|
||||
@@ -270,7 +270,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << _size_t << " lidx = " << LocalIdx0(backend) << ";" << std::endl;
|
||||
stream << _size_t << " lidy = " << LocalIdx1(backend) <<";" << std::endl;
|
||||
|
||||
for (const auto & e : dots)
|
||||
for (const auto & e : reduce_1ds)
|
||||
stream << e->process("#scalartype #name_acc = " + neutral_element((e)->root_op(), backend, "#scalartype") + ";") << std::endl;
|
||||
|
||||
stream << "if (r < M)" << std::endl;
|
||||
@@ -280,8 +280,8 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "for(" << _size_t << " c = lidx; c < " << p_.num_groups_0 << "; c += " << LocalSize0(backend) << "){" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (mapped_dot* e: dots)
|
||||
compute_dot(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op());
|
||||
for (mapped_reduce* e: reduce_1ds)
|
||||
compute_reduce_1d(stream, e->process("#name_acc"), e->process("#name_temp[r + M*c]"), e->root_op());
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -290,7 +290,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
|
||||
for (auto & expr : dots)
|
||||
for (auto & expr : reduce_1ds)
|
||||
stream << expr->process("#name_buf[lidy*" + local_size_0_ld_str + "+ lidx] = #name_acc;") << std::endl;
|
||||
|
||||
stream << "#pragma unroll" << std::endl;
|
||||
@@ -303,13 +303,13 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream << "{" << std::endl;
|
||||
stream.inc_tab();
|
||||
|
||||
for (auto & e : dots)
|
||||
if (e->is_index_dot())
|
||||
compute_index_dot(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
for (auto & e : reduce_1ds)
|
||||
if (e->is_index_reduction())
|
||||
compute_index_reduce_1d(stream, e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
, e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf_value[lidy*" + local_size_0_ld_str + " + lidx + stride]")
|
||||
, e->root_op());
|
||||
else
|
||||
compute_dot(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
|
||||
compute_reduce_1d(stream,e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx]"), e->process("#name_buf[lidy*" + local_size_0_ld_str + " + lidx + stride]"), e->root_op());
|
||||
|
||||
stream.dec_tab();
|
||||
stream << "}" << std::endl;
|
||||
@@ -323,7 +323,7 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
stream.inc_tab();
|
||||
|
||||
std::map<std::string, std::string> accessors;
|
||||
accessors["gemv"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
|
||||
accessors["reduce_2d"] = "#name_buf[lidy*" + local_size_0_ld_str + "]";
|
||||
accessors["arrayn"] = "#pointer[r*#stride]";
|
||||
accessors["array1n"] = "#pointer[r*#stride]";
|
||||
accessors["arrayn1"] = "#pointer[r*#stride]";
|
||||
@@ -344,30 +344,30 @@ std::string gemv::generate_impl(std::string const & suffix, math_expression cons
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
gemv::gemv(gemv::parameters_type const & parameters,
|
||||
gemv::dot_type rtype,
|
||||
reduce_2d::reduce_2d(reduce_2d::parameters_type const & parameters,
|
||||
reduce_2d::reduce_1d_type rtype,
|
||||
binding_policy_t binding_policy) :
|
||||
base_impl<gemv, gemv_parameters>(parameters, binding_policy),
|
||||
dot_type_(rtype){ }
|
||||
base_impl<reduce_2d, reduce_2d_parameters>(parameters, binding_policy),
|
||||
reduce_1d_type_(rtype){ }
|
||||
|
||||
std::vector<int_t> gemv::input_sizes(math_expression const & expression) const
|
||||
std::vector<int_t> reduce_2d::input_sizes(math_expression const & expression) const
|
||||
{
|
||||
std::vector<std::size_t> idx = filter_nodes(&is_dot, expression, expression.root(), false);
|
||||
std::vector<std::size_t> idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
|
||||
std::pair<int_t, int_t> MN = matrix_size(expression.tree(), lhs_most(expression.tree(), idx[0]));
|
||||
if(dot_type_==REDUCE_COLUMNS)
|
||||
if(reduce_1d_type_==REDUCE_COLUMNS)
|
||||
std::swap(MN.first,MN.second);
|
||||
return {MN.first, MN.second};
|
||||
}
|
||||
|
||||
void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
void reduce_2d::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, base & fallback, execution_handler const & control)
|
||||
{
|
||||
math_expression const & expression = control.x();
|
||||
|
||||
std::vector<int_t> MN = input_sizes(expression);
|
||||
std::vector<math_expression::node const *> dots;
|
||||
std::vector<size_t> dots_idx = filter_nodes(&is_dot, expression, expression.root(), false);
|
||||
for (size_t idx : dots_idx)
|
||||
dots.push_back(&expression.tree()[idx]);
|
||||
std::vector<math_expression::node const *> reduce_1ds;
|
||||
std::vector<size_t> reduce_1ds_idx = filter_nodes(&is_reduce_1d, expression, expression.root(), false);
|
||||
for (size_t idx : reduce_1ds_idx)
|
||||
reduce_1ds.push_back(&expression.tree()[idx]);
|
||||
|
||||
//Fallback
|
||||
if(p_.simd_width>1 && requires_fallback(expression))
|
||||
@@ -406,15 +406,15 @@ void gemv::enqueue(driver::CommandQueue & queue, driver::Program const & program
|
||||
control.execution_options().enqueue(program.context(), kernels[i], global[i], local[i]);
|
||||
}
|
||||
|
||||
gemv_n::gemv_n(gemv_parameters const & parameters,binding_policy_t binding_policy): gemv(parameters, REDUCE_ROWS, binding_policy){}
|
||||
reduce_2d_n::reduce_2d_n(reduce_2d_parameters const & parameters,binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_ROWS, binding_policy){}
|
||||
|
||||
gemv_n::gemv_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
|
||||
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
|
||||
reduce_2d_n::reduce_2d_n(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
|
||||
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_ROWS, bind) {}
|
||||
|
||||
gemv_t::gemv_t(gemv::parameters_type const & parameters, binding_policy_t binding_policy): gemv(parameters, REDUCE_COLUMNS, binding_policy){}
|
||||
reduce_2d_t::reduce_2d_t(reduce_2d::parameters_type const & parameters, binding_policy_t binding_policy): reduce_2d(parameters, REDUCE_COLUMNS, binding_policy){}
|
||||
|
||||
gemv_t::gemv_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
|
||||
fetching_policy_type fetch, binding_policy_t bind): gemv(gemv_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}
|
||||
reduce_2d_t::reduce_2d_t(unsigned int simd, unsigned int ls1, unsigned int ls2, unsigned int ng1, unsigned int ng2,
|
||||
fetching_policy_type fetch, binding_policy_t bind): reduce_2d(reduce_2d_parameters(simd, ls1, ls2, ng1, ng2, fetch), REDUCE_COLUMNS, bind) {}
|
||||
|
||||
|
||||
}
|
@@ -81,12 +81,12 @@ public:
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_column>(&math_expression, root_idx, &mapping_)));
|
||||
else if(root_node.op.type==OPERATOR_ACCESS_INDEX_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_array_access>(&math_expression, root_idx, &mapping_)));
|
||||
else if (detail::is_scalar_dot(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_scalar_dot>(&math_expression, root_idx, &mapping_)));
|
||||
else if (detail::is_vector_dot(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemv>(&math_expression, root_idx, &mapping_)));
|
||||
else if (detail::is_scalar_reduce_1d(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_1d>(&math_expression, root_idx, &mapping_)));
|
||||
else if (detail::is_vector_reduce_1d(root_node))
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_reduce_2d>(&math_expression, root_idx, &mapping_)));
|
||||
else if (root_node.op.type_family == OPERATOR_GEMM_TYPE_FAMILY)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_gemm>(&math_expression, root_idx, &mapping_)));
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_matrix_product>(&math_expression, root_idx, &mapping_)));
|
||||
else if (root_node.op.type == OPERATOR_REPEAT_TYPE)
|
||||
mapping_.insert(mapping_type::value_type(key, binary_leaf<mapped_repeat>(&math_expression, root_idx, &mapping_)));
|
||||
else if (root_node.op.type == OPERATOR_OUTER_PROD_TYPE)
|
||||
|
@@ -12,7 +12,7 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
inline void compute_dot(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
|
||||
inline void compute_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, op_element const & op)
|
||||
{
|
||||
if (detail::is_elementwise_function(op))
|
||||
os << acc << "=" << evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl;
|
||||
@@ -20,7 +20,7 @@ inline void compute_dot(kernel_generation_stream & os, std::string acc, std::str
|
||||
os << acc << "= (" << acc << ")" << evaluate(op.type) << "(" << cur << ");" << std::endl;
|
||||
}
|
||||
|
||||
inline void compute_index_dot(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
|
||||
inline void compute_index_reduce_1d(kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, op_element const & op)
|
||||
{
|
||||
// os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl;
|
||||
os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl;
|
||||
@@ -51,11 +51,11 @@ inline std::string neutral_element(op_element const & op, driver::backend_type b
|
||||
case OPERATOR_ELEMENT_MIN_TYPE : return INF;
|
||||
case OPERATOR_ELEMENT_ARGMIN_TYPE : return INF;
|
||||
|
||||
default: throw std::runtime_error("Unsupported dot operator : no neutral element known");
|
||||
default: throw std::runtime_error("Unsupported reduce_1d operator : no neutral element known");
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_dot(math_expression::node const & node)
|
||||
inline bool is_reduce_1d(math_expression::node const & node)
|
||||
{
|
||||
return node.op.type_family==OPERATOR_VECTOR_DOT_TYPE_FAMILY
|
||||
|| node.op.type_family==OPERATOR_COLUMNS_DOT_TYPE_FAMILY
|
||||
@@ -63,7 +63,7 @@ inline bool is_dot(math_expression::node const & node)
|
||||
}
|
||||
|
||||
|
||||
inline bool is_index_dot(op_element const & op)
|
||||
inline bool is_index_reduction(op_element const & op)
|
||||
{
|
||||
return op.type==OPERATOR_ELEMENT_ARGFMAX_TYPE
|
||||
|| op.type==OPERATOR_ELEMENT_ARGMAX_TYPE
|
||||
|
@@ -9,11 +9,11 @@
|
||||
#include "isaac/driver/program_cache.h"
|
||||
#include "isaac/profiles/profiles.h"
|
||||
#include "isaac/kernels/parse.h"
|
||||
#include "isaac/kernels/templates/axpy.h"
|
||||
#include "isaac/kernels/templates/dot.h"
|
||||
#include "isaac/kernels/templates/ger.h"
|
||||
#include "isaac/kernels/templates/gemv.h"
|
||||
#include "isaac/kernels/templates/gemm.h"
|
||||
#include "isaac/kernels/templates/elementwise_1d.h"
|
||||
#include "isaac/kernels/templates/reduce_1d.h"
|
||||
#include "isaac/kernels/templates/elementwise_2d.h"
|
||||
#include "isaac/kernels/templates/reduce_2d.h"
|
||||
#include "isaac/kernels/templates/matrix_product.h"
|
||||
#include "isaac/exception/operation_not_supported.h"
|
||||
|
||||
|
||||
@@ -134,24 +134,24 @@ profiles::value_type::templates_container const & profiles::value_type::template
|
||||
std::shared_ptr<templates::base> profiles::create(std::string const & template_name, std::vector<int> const & x)
|
||||
{
|
||||
templates::fetching_policy_type fetch[] = {templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_GLOBAL_STRIDED, templates::FETCH_FROM_GLOBAL_CONTIGUOUS};
|
||||
if(template_name=="axpy")
|
||||
return std::shared_ptr<templates::base>(new templates::axpy(x[0], x[1], x[2], fetch[x[3]]));
|
||||
else if(template_name=="dot")
|
||||
return std::shared_ptr<templates::base>(new templates::dot(x[0], x[1], x[2], fetch[x[3]]));
|
||||
else if(template_name=="ger")
|
||||
return std::shared_ptr<templates::base>(new templates::ger(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("gemv_n")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemv_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("gemv_t")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemv_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("gemm_nn")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemm_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("gemm_tn")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemm_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("gemm_nt")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemm_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("gemm_tt")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::gemm_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
if(template_name=="elementwise_1d")
|
||||
return std::shared_ptr<templates::base>(new templates::elementwise_1d(x[0], x[1], x[2], fetch[x[3]]));
|
||||
else if(template_name=="reduce_1d")
|
||||
return std::shared_ptr<templates::base>(new templates::reduce_1d(x[0], x[1], x[2], fetch[x[3]]));
|
||||
else if(template_name=="elementwise_2d")
|
||||
return std::shared_ptr<templates::base>(new templates::elementwise_2d(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("reduce_2d_n")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::reduce_2d_n(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("reduce_2d_t")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::reduce_2d_t(x[0], x[1], x[2], x[3], x[4], fetch[x[5]]));
|
||||
else if(template_name.find("matrix_product_nn")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::matrix_product_nn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("matrix_product_tn")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::matrix_product_tn(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("matrix_product_nt")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::matrix_product_nt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else if(template_name.find("matrix_product_tt")!=std::string::npos)
|
||||
return std::shared_ptr<templates::base>(new templates::matrix_product_tt(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], fetch[x[8]], fetch[x[9]], x[10], x[11]));
|
||||
else
|
||||
throw std::invalid_argument("Invalid expression: " + template_name);
|
||||
}
|
||||
@@ -163,7 +163,7 @@ void profiles::import(std::string const & str, driver::CommandQueue const & queu
|
||||
rapidjson::Document document;
|
||||
document.Parse<0>(str.c_str());
|
||||
//Deserialize
|
||||
std::vector<std::string> operations = {"axpy", "dot", "ger", "gemv_n", "gemv_t", "gemm_nn", "gemm_tn", "gemm_nt", "gemm_tt"};
|
||||
std::vector<std::string> operations = {"elementwise_1d", "reduce_1d", "elementwise_2d", "reduce_2d_n", "reduce_2d_t", "matrix_product_nn", "matrix_product_tn", "matrix_product_nt", "matrix_product_tt"};
|
||||
std::vector<std::string> dtype = {"float32", "float64"};
|
||||
for(auto & operation : operations)
|
||||
{
|
||||
@@ -265,15 +265,15 @@ std::map<std::pair<expression_type, numeric_type>, std::shared_ptr<templates::ba
|
||||
numeric_type types[] = {CHAR_TYPE, UCHAR_TYPE, SHORT_TYPE, USHORT_TYPE, INT_TYPE, UINT_TYPE, LONG_TYPE, ULONG_TYPE, FLOAT_TYPE, DOUBLE_TYPE};
|
||||
for(auto DTYPE : types)
|
||||
{
|
||||
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::axpy(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::dot(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::ger(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::gemv_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::gemv_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::gemm_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::gemm_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::gemm_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::gemm_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(AXPY_TYPE, DTYPE)] = ptr_t (new templates::elementwise_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(DOT_TYPE, DTYPE)] = ptr_t(new templates::reduce_1d(1,64,128,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GER_TYPE, DTYPE)] = ptr_t(new templates::elementwise_2d(1,128,1,16,32,templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMV_N_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_n(1, 8, 8, 4, 16, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMV_T_TYPE, DTYPE)] = ptr_t(new templates::reduce_2d_t(1, 8, 8, 64, 8, templates::FETCH_FROM_GLOBAL_STRIDED));
|
||||
res[std::make_pair(GEMM_NN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_TN_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tn(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_NT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_nt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
res[std::make_pair(GEMM_TT_TYPE, DTYPE)] = ptr_t(new templates::matrix_product_tt(1, 8, 16, 8, 1, 8, 1, 8, templates::FETCH_FROM_LOCAL, templates::FETCH_FROM_LOCAL, 8, 8, true));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
@@ -161,7 +161,7 @@ namespace isaac
|
||||
|
||||
expression_type final_type;
|
||||
//GEMM
|
||||
if(symbolic::preset::gemm::args args = symbolic::preset::gemm::check(tree, rootidx)){
|
||||
if(symbolic::preset::matrix_product::args args = symbolic::preset::matrix_product::check(tree, rootidx)){
|
||||
final_type = args.type;
|
||||
}
|
||||
//Default
|
||||
|
@@ -9,7 +9,7 @@ namespace symbolic
|
||||
namespace preset
|
||||
{
|
||||
|
||||
void gemm::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a)
|
||||
void matrix_product::handle_node(math_expression::container_type const & tree, size_t rootidx, args & a)
|
||||
{
|
||||
//Matrix-Matrix product node
|
||||
if(tree[rootidx].op.type_family==OPERATOR_GEMM_TYPE_FAMILY)
|
||||
@@ -46,11 +46,11 @@ void gemm::handle_node(math_expression::container_type const & tree, size_t root
|
||||
}
|
||||
}
|
||||
|
||||
gemm::args gemm::check(math_expression::container_type const & tree, size_t rootidx)
|
||||
matrix_product::args matrix_product::check(math_expression::container_type const & tree, size_t rootidx)
|
||||
{
|
||||
lhs_rhs_element const * assigned = &tree[rootidx].lhs;
|
||||
numeric_type dtype = assigned->dtype;
|
||||
gemm::args result ;
|
||||
matrix_product::args result ;
|
||||
if(dtype==INVALID_NUMERIC_TYPE)
|
||||
return result;
|
||||
result.alpha = value_scalar(1, dtype);
|
||||
|
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user