triton/tests/common/cuda/forward.h

#ifndef _COMMON_CUDA_FORWARDS_H_
#define _COMMON_CUDA_FORwARDS_H_

struct cublasContext;
typedef struct cublasContext *cublasHandle_t;
struct CUstream_st;
typedef struct CUstream_st *cudaStream_t;

/* CUBLAS status type returns */
typedef enum{
    CUBLAS_STATUS_SUCCESS         =0,
    CUBLAS_STATUS_NOT_INITIALIZED =1,
    CUBLAS_STATUS_ALLOC_FAILED    =3,
    CUBLAS_STATUS_INVALID_VALUE   =7,
    CUBLAS_STATUS_ARCH_MISMATCH   =8,
    CUBLAS_STATUS_MAPPING_ERROR   =11,
    CUBLAS_STATUS_EXECUTION_FAILED=13,
    CUBLAS_STATUS_INTERNAL_ERROR  =14,
    CUBLAS_STATUS_NOT_SUPPORTED   =15,
    CUBLAS_STATUS_LICENSE_ERROR   =16
} cublasStatus_t;

/*For different GEMM algorithm */
typedef enum {
    CUBLAS_GEMM_DFALT               = -1,
    CUBLAS_GEMM_DEFAULT             = -1,
    CUBLAS_GEMM_ALGO0               =  0,
    CUBLAS_GEMM_ALGO1               =  1,
    CUBLAS_GEMM_ALGO2               =  2,
    CUBLAS_GEMM_ALGO3               =  3,
    CUBLAS_GEMM_ALGO4               =  4,
    CUBLAS_GEMM_ALGO5               =  5,
    CUBLAS_GEMM_ALGO6               =  6,
    CUBLAS_GEMM_ALGO7               =  7,
    CUBLAS_GEMM_ALGO8               =  8,
    CUBLAS_GEMM_ALGO9               =  9,
    CUBLAS_GEMM_ALGO10              =  10,
    CUBLAS_GEMM_ALGO11              =  11,
    CUBLAS_GEMM_ALGO12              =  12,
    CUBLAS_GEMM_ALGO13              =  13,
    CUBLAS_GEMM_ALGO14              =  14,
    CUBLAS_GEMM_ALGO15              =  15,
    CUBLAS_GEMM_ALGO16              =  16,
    CUBLAS_GEMM_ALGO17              =  17,
    CUBLAS_GEMM_ALGO18              =  18, //sliced 32x32
    CUBLAS_GEMM_ALGO19              =  19, //sliced 64x32
    CUBLAS_GEMM_ALGO20              =  20, //sliced 128x32
    CUBLAS_GEMM_ALGO21              =  21, //sliced 32x32  -splitK
    CUBLAS_GEMM_ALGO22              =  22, //sliced 64x32  -splitK
    CUBLAS_GEMM_ALGO23              =  23, //sliced 128x32 -splitK
    CUBLAS_GEMM_DEFAULT_TENSOR_OP   =  99,
    CUBLAS_GEMM_DFALT_TENSOR_OP     =  99,
    CUBLAS_GEMM_ALGO0_TENSOR_OP     =  100,
    CUBLAS_GEMM_ALGO1_TENSOR_OP     =  101,
    CUBLAS_GEMM_ALGO2_TENSOR_OP     =  102,
    CUBLAS_GEMM_ALGO3_TENSOR_OP     =  103,
    CUBLAS_GEMM_ALGO4_TENSOR_OP     =  104,
    CUBLAS_GEMM_ALGO5_TENSOR_OP     =  105,
    CUBLAS_GEMM_ALGO6_TENSOR_OP     =  106,
    CUBLAS_GEMM_ALGO7_TENSOR_OP     =  107,
    CUBLAS_GEMM_ALGO8_TENSOR_OP     =  108,
    CUBLAS_GEMM_ALGO9_TENSOR_OP     =  109,
    CUBLAS_GEMM_ALGO10_TENSOR_OP     =  110,
    CUBLAS_GEMM_ALGO11_TENSOR_OP     =  111,
    CUBLAS_GEMM_ALGO12_TENSOR_OP     =  112,
    CUBLAS_GEMM_ALGO13_TENSOR_OP     =  113,
    CUBLAS_GEMM_ALGO14_TENSOR_OP     =  114,
    CUBLAS_GEMM_ALGO15_TENSOR_OP     =  115
} cublasGemmAlgo_t;

typedef enum cudaDataType_t
{
  CUDA_R_16F= 2,  /* real as a half */
  CUDA_C_16F= 6,  /* complex as a pair of half numbers */
  CUDA_R_32F= 0,  /* real as a float */
  CUDA_C_32F= 4,  /* complex as a pair of float numbers */
  CUDA_R_64F= 1,  /* real as a double */
  CUDA_C_64F= 5,  /* complex as a pair of double numbers */
  CUDA_R_8I = 3,  /* real as a signed char */
  CUDA_C_8I = 7,  /* complex as a pair of signed char numbers */
  CUDA_R_8U = 8,  /* real as a unsigned char */
  CUDA_C_8U = 9,  /* complex as a pair of unsigned char numbers */
  CUDA_R_32I= 10, /* real as a signed int */
  CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
  CUDA_R_32U= 12, /* real as a unsigned int */
  CUDA_C_32U= 13  /* complex as a pair of unsigned int numbers */
} cudaDataType;

typedef cudaDataType cublasDataType_t;

typedef enum {
    CUBLAS_OP_N=0,
    CUBLAS_OP_T=1,
    CUBLAS_OP_C=2,
    CUBLAS_OP_HERMITAN=2, /* synonym if CUBLAS_OP_C */
    CUBLAS_OP_CONJG=3     /* conjugate */
} cublasOperation_t;

/*Enum for default math mode/tensor operation*/
typedef enum {
    CUBLAS_DEFAULT_MATH = 0,
    CUBLAS_TENSOR_OP_MATH = 1
} cublasMath_t;

#endif
[structure] better directory structure for tests 2019-08-27 20:33:38 -07:00			`#ifndef _COMMON_CUDA_FORWARDS_H_`
			`#define _COMMON_CUDA_FORwARDS_H_`

			`struct cublasContext;`
			`typedef struct cublasContext *cublasHandle_t;`
			`struct CUstream_st;`
			`typedef struct CUstream_st *cudaStream_t;`

			`/* CUBLAS status type returns */`
			`typedef enum{`
			`CUBLAS_STATUS_SUCCESS =0,`
			`CUBLAS_STATUS_NOT_INITIALIZED =1,`
			`CUBLAS_STATUS_ALLOC_FAILED =3,`
			`CUBLAS_STATUS_INVALID_VALUE =7,`
			`CUBLAS_STATUS_ARCH_MISMATCH =8,`
			`CUBLAS_STATUS_MAPPING_ERROR =11,`
			`CUBLAS_STATUS_EXECUTION_FAILED=13,`
			`CUBLAS_STATUS_INTERNAL_ERROR =14,`
			`CUBLAS_STATUS_NOT_SUPPORTED =15,`
			`CUBLAS_STATUS_LICENSE_ERROR =16`
			`} cublasStatus_t;`

			`/For different GEMM algorithm /`
			`typedef enum {`
			`CUBLAS_GEMM_DFALT = -1,`
			`CUBLAS_GEMM_DEFAULT = -1,`
			`CUBLAS_GEMM_ALGO0 = 0,`
			`CUBLAS_GEMM_ALGO1 = 1,`
			`CUBLAS_GEMM_ALGO2 = 2,`
			`CUBLAS_GEMM_ALGO3 = 3,`
			`CUBLAS_GEMM_ALGO4 = 4,`
			`CUBLAS_GEMM_ALGO5 = 5,`
			`CUBLAS_GEMM_ALGO6 = 6,`
			`CUBLAS_GEMM_ALGO7 = 7,`
			`CUBLAS_GEMM_ALGO8 = 8,`
			`CUBLAS_GEMM_ALGO9 = 9,`
			`CUBLAS_GEMM_ALGO10 = 10,`
			`CUBLAS_GEMM_ALGO11 = 11,`
			`CUBLAS_GEMM_ALGO12 = 12,`
			`CUBLAS_GEMM_ALGO13 = 13,`
			`CUBLAS_GEMM_ALGO14 = 14,`
			`CUBLAS_GEMM_ALGO15 = 15,`
			`CUBLAS_GEMM_ALGO16 = 16,`
			`CUBLAS_GEMM_ALGO17 = 17,`
			`CUBLAS_GEMM_ALGO18 = 18, //sliced 32x32`
			`CUBLAS_GEMM_ALGO19 = 19, //sliced 64x32`
			`CUBLAS_GEMM_ALGO20 = 20, //sliced 128x32`
			`CUBLAS_GEMM_ALGO21 = 21, //sliced 32x32 -splitK`
			`CUBLAS_GEMM_ALGO22 = 22, //sliced 64x32 -splitK`
			`CUBLAS_GEMM_ALGO23 = 23, //sliced 128x32 -splitK`
			`CUBLAS_GEMM_DEFAULT_TENSOR_OP = 99,`
			`CUBLAS_GEMM_DFALT_TENSOR_OP = 99,`
			`CUBLAS_GEMM_ALGO0_TENSOR_OP = 100,`
			`CUBLAS_GEMM_ALGO1_TENSOR_OP = 101,`
			`CUBLAS_GEMM_ALGO2_TENSOR_OP = 102,`
			`CUBLAS_GEMM_ALGO3_TENSOR_OP = 103,`
			`CUBLAS_GEMM_ALGO4_TENSOR_OP = 104,`
			`CUBLAS_GEMM_ALGO5_TENSOR_OP = 105,`
			`CUBLAS_GEMM_ALGO6_TENSOR_OP = 106,`
			`CUBLAS_GEMM_ALGO7_TENSOR_OP = 107,`
			`CUBLAS_GEMM_ALGO8_TENSOR_OP = 108,`
			`CUBLAS_GEMM_ALGO9_TENSOR_OP = 109,`
			`CUBLAS_GEMM_ALGO10_TENSOR_OP = 110,`
			`CUBLAS_GEMM_ALGO11_TENSOR_OP = 111,`
			`CUBLAS_GEMM_ALGO12_TENSOR_OP = 112,`
			`CUBLAS_GEMM_ALGO13_TENSOR_OP = 113,`
			`CUBLAS_GEMM_ALGO14_TENSOR_OP = 114,`
			`CUBLAS_GEMM_ALGO15_TENSOR_OP = 115`
			`} cublasGemmAlgo_t;`

			`typedef enum cudaDataType_t`
			`{`
			`CUDA_R_16F= 2, /* real as a half */`
			`CUDA_C_16F= 6, /* complex as a pair of half numbers */`
			`CUDA_R_32F= 0, /* real as a float */`
			`CUDA_C_32F= 4, /* complex as a pair of float numbers */`
			`CUDA_R_64F= 1, /* real as a double */`
			`CUDA_C_64F= 5, /* complex as a pair of double numbers */`
			`CUDA_R_8I = 3, /* real as a signed char */`
			`CUDA_C_8I = 7, /* complex as a pair of signed char numbers */`
			`CUDA_R_8U = 8, /* real as a unsigned char */`
			`CUDA_C_8U = 9, /* complex as a pair of unsigned char numbers */`
			`CUDA_R_32I= 10, /* real as a signed int */`
			`CUDA_C_32I= 11, /* complex as a pair of signed int numbers */`
			`CUDA_R_32U= 12, /* real as a unsigned int */`
			`CUDA_C_32U= 13 /* complex as a pair of unsigned int numbers */`
			`} cudaDataType;`

			`typedef cudaDataType cublasDataType_t;`

			`typedef enum {`
			`CUBLAS_OP_N=0,`
			`CUBLAS_OP_T=1,`
			`CUBLAS_OP_C=2,`
			`CUBLAS_OP_HERMITAN=2, /* synonym if CUBLAS_OP_C */`
			`CUBLAS_OP_CONJG=3 /* conjugate */`
			`} cublasOperation_t;`

			`/Enum for default math mode/tensor operation/`
			`typedef enum {`
			`CUBLAS_DEFAULT_MATH = 0,`
			`CUBLAS_TENSOR_OP_MATH = 1`
			`} cublasMath_t;`

			`#endif`