105 lines
3.8 KiB
C
105 lines
3.8 KiB
C
![]() |
#ifndef _COMMON_CUDA_FORWARDS_H_
|
||
|
#define _COMMON_CUDA_FORwARDS_H_
|
||
|
|
||
|
struct cublasContext;
|
||
|
typedef struct cublasContext *cublasHandle_t;
|
||
|
struct CUstream_st;
|
||
|
typedef struct CUstream_st *cudaStream_t;
|
||
|
|
||
|
/* CUBLAS status type returns */
|
||
|
typedef enum{
|
||
|
CUBLAS_STATUS_SUCCESS =0,
|
||
|
CUBLAS_STATUS_NOT_INITIALIZED =1,
|
||
|
CUBLAS_STATUS_ALLOC_FAILED =3,
|
||
|
CUBLAS_STATUS_INVALID_VALUE =7,
|
||
|
CUBLAS_STATUS_ARCH_MISMATCH =8,
|
||
|
CUBLAS_STATUS_MAPPING_ERROR =11,
|
||
|
CUBLAS_STATUS_EXECUTION_FAILED=13,
|
||
|
CUBLAS_STATUS_INTERNAL_ERROR =14,
|
||
|
CUBLAS_STATUS_NOT_SUPPORTED =15,
|
||
|
CUBLAS_STATUS_LICENSE_ERROR =16
|
||
|
} cublasStatus_t;
|
||
|
|
||
|
/*For different GEMM algorithm */
|
||
|
typedef enum {
|
||
|
CUBLAS_GEMM_DFALT = -1,
|
||
|
CUBLAS_GEMM_DEFAULT = -1,
|
||
|
CUBLAS_GEMM_ALGO0 = 0,
|
||
|
CUBLAS_GEMM_ALGO1 = 1,
|
||
|
CUBLAS_GEMM_ALGO2 = 2,
|
||
|
CUBLAS_GEMM_ALGO3 = 3,
|
||
|
CUBLAS_GEMM_ALGO4 = 4,
|
||
|
CUBLAS_GEMM_ALGO5 = 5,
|
||
|
CUBLAS_GEMM_ALGO6 = 6,
|
||
|
CUBLAS_GEMM_ALGO7 = 7,
|
||
|
CUBLAS_GEMM_ALGO8 = 8,
|
||
|
CUBLAS_GEMM_ALGO9 = 9,
|
||
|
CUBLAS_GEMM_ALGO10 = 10,
|
||
|
CUBLAS_GEMM_ALGO11 = 11,
|
||
|
CUBLAS_GEMM_ALGO12 = 12,
|
||
|
CUBLAS_GEMM_ALGO13 = 13,
|
||
|
CUBLAS_GEMM_ALGO14 = 14,
|
||
|
CUBLAS_GEMM_ALGO15 = 15,
|
||
|
CUBLAS_GEMM_ALGO16 = 16,
|
||
|
CUBLAS_GEMM_ALGO17 = 17,
|
||
|
CUBLAS_GEMM_ALGO18 = 18, //sliced 32x32
|
||
|
CUBLAS_GEMM_ALGO19 = 19, //sliced 64x32
|
||
|
CUBLAS_GEMM_ALGO20 = 20, //sliced 128x32
|
||
|
CUBLAS_GEMM_ALGO21 = 21, //sliced 32x32 -splitK
|
||
|
CUBLAS_GEMM_ALGO22 = 22, //sliced 64x32 -splitK
|
||
|
CUBLAS_GEMM_ALGO23 = 23, //sliced 128x32 -splitK
|
||
|
CUBLAS_GEMM_DEFAULT_TENSOR_OP = 99,
|
||
|
CUBLAS_GEMM_DFALT_TENSOR_OP = 99,
|
||
|
CUBLAS_GEMM_ALGO0_TENSOR_OP = 100,
|
||
|
CUBLAS_GEMM_ALGO1_TENSOR_OP = 101,
|
||
|
CUBLAS_GEMM_ALGO2_TENSOR_OP = 102,
|
||
|
CUBLAS_GEMM_ALGO3_TENSOR_OP = 103,
|
||
|
CUBLAS_GEMM_ALGO4_TENSOR_OP = 104,
|
||
|
CUBLAS_GEMM_ALGO5_TENSOR_OP = 105,
|
||
|
CUBLAS_GEMM_ALGO6_TENSOR_OP = 106,
|
||
|
CUBLAS_GEMM_ALGO7_TENSOR_OP = 107,
|
||
|
CUBLAS_GEMM_ALGO8_TENSOR_OP = 108,
|
||
|
CUBLAS_GEMM_ALGO9_TENSOR_OP = 109,
|
||
|
CUBLAS_GEMM_ALGO10_TENSOR_OP = 110,
|
||
|
CUBLAS_GEMM_ALGO11_TENSOR_OP = 111,
|
||
|
CUBLAS_GEMM_ALGO12_TENSOR_OP = 112,
|
||
|
CUBLAS_GEMM_ALGO13_TENSOR_OP = 113,
|
||
|
CUBLAS_GEMM_ALGO14_TENSOR_OP = 114,
|
||
|
CUBLAS_GEMM_ALGO15_TENSOR_OP = 115
|
||
|
} cublasGemmAlgo_t;
|
||
|
|
||
|
typedef enum cudaDataType_t
|
||
|
{
|
||
|
CUDA_R_16F= 2, /* real as a half */
|
||
|
CUDA_C_16F= 6, /* complex as a pair of half numbers */
|
||
|
CUDA_R_32F= 0, /* real as a float */
|
||
|
CUDA_C_32F= 4, /* complex as a pair of float numbers */
|
||
|
CUDA_R_64F= 1, /* real as a double */
|
||
|
CUDA_C_64F= 5, /* complex as a pair of double numbers */
|
||
|
CUDA_R_8I = 3, /* real as a signed char */
|
||
|
CUDA_C_8I = 7, /* complex as a pair of signed char numbers */
|
||
|
CUDA_R_8U = 8, /* real as a unsigned char */
|
||
|
CUDA_C_8U = 9, /* complex as a pair of unsigned char numbers */
|
||
|
CUDA_R_32I= 10, /* real as a signed int */
|
||
|
CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
|
||
|
CUDA_R_32U= 12, /* real as a unsigned int */
|
||
|
CUDA_C_32U= 13 /* complex as a pair of unsigned int numbers */
|
||
|
} cudaDataType;
|
||
|
|
||
|
typedef cudaDataType cublasDataType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUBLAS_OP_N=0,
|
||
|
CUBLAS_OP_T=1,
|
||
|
CUBLAS_OP_C=2,
|
||
|
CUBLAS_OP_HERMITAN=2, /* synonym if CUBLAS_OP_C */
|
||
|
CUBLAS_OP_CONJG=3 /* conjugate */
|
||
|
} cublasOperation_t;
|
||
|
|
||
|
/*Enum for default math mode/tensor operation*/
|
||
|
typedef enum {
|
||
|
CUBLAS_DEFAULT_MATH = 0,
|
||
|
CUBLAS_TENSOR_OP_MATH = 1
|
||
|
} cublasMath_t;
|
||
|
|
||
|
#endif
|