#ifndef _COMMON_CUDA_FORWARDS_H_
#define _COMMON_CUDA_FORwARDS_H_

struct cublasContext;
typedef struct cublasContext *cublasHandle_t;
struct CUstream_st;
typedef struct CUstream_st *cudaStream_t;

/* CUBLAS status type returns */
typedef enum{
    CUBLAS_STATUS_SUCCESS         =0,
    CUBLAS_STATUS_NOT_INITIALIZED =1,
    CUBLAS_STATUS_ALLOC_FAILED    =3,
    CUBLAS_STATUS_INVALID_VALUE   =7,
    CUBLAS_STATUS_ARCH_MISMATCH   =8,
    CUBLAS_STATUS_MAPPING_ERROR   =11,
    CUBLAS_STATUS_EXECUTION_FAILED=13,
    CUBLAS_STATUS_INTERNAL_ERROR  =14,
    CUBLAS_STATUS_NOT_SUPPORTED   =15,
    CUBLAS_STATUS_LICENSE_ERROR   =16
} cublasStatus_t;

/*For different GEMM algorithm */
typedef enum {
    CUBLAS_GEMM_DFALT               = -1,
    CUBLAS_GEMM_DEFAULT             = -1,
    CUBLAS_GEMM_ALGO0               =  0, // maxwell_sgemm_32x128_nt
    CUBLAS_GEMM_ALGO1               =  1, // maxwell_sgemm_64x64_nt
    CUBLAS_GEMM_ALGO2               =  2, // maxwell_sgemm_128x32_nt
    CUBLAS_GEMM_ALGO3               =  3, // maxwell_sgemm_128x64_nt
    CUBLAS_GEMM_ALGO4               =  4, // maxwell_sgemm_128x128_nt
    CUBLAS_GEMM_ALGO5               =  5,
    CUBLAS_GEMM_ALGO6               =  6,
    CUBLAS_GEMM_ALGO7               =  7,
    CUBLAS_GEMM_ALGO8               =  8,
    CUBLAS_GEMM_ALGO9               =  9,
    CUBLAS_GEMM_ALGO10              =  10,
    CUBLAS_GEMM_ALGO11              =  11,
    CUBLAS_GEMM_ALGO12              =  12,
    CUBLAS_GEMM_ALGO13              =  13,
    CUBLAS_GEMM_ALGO14              =  14,
    CUBLAS_GEMM_ALGO15              =  15,
    CUBLAS_GEMM_ALGO16              =  16,
    CUBLAS_GEMM_ALGO17              =  17,
    CUBLAS_GEMM_ALGO18              =  18, //sliced 32x32
    CUBLAS_GEMM_ALGO19              =  19, //sliced 64x32
    CUBLAS_GEMM_ALGO20              =  20, //sliced 128x32
    CUBLAS_GEMM_ALGO21              =  21, //sliced 32x32  -splitK
    CUBLAS_GEMM_ALGO22              =  22, //sliced 64x32  -splitK
    CUBLAS_GEMM_ALGO23              =  23, //sliced 128x32 -splitK
    CUBLAS_GEMM_DEFAULT_TENSOR_OP   =  99,
    CUBLAS_GEMM_DFALT_TENSOR_OP     =  99,
    CUBLAS_GEMM_ALGO0_TENSOR_OP     =  100,
    CUBLAS_GEMM_ALGO1_TENSOR_OP     =  101,
    CUBLAS_GEMM_ALGO2_TENSOR_OP     =  102,
    CUBLAS_GEMM_ALGO3_TENSOR_OP     =  103,
    CUBLAS_GEMM_ALGO4_TENSOR_OP     =  104,
    CUBLAS_GEMM_ALGO5_TENSOR_OP     =  105,
    CUBLAS_GEMM_ALGO6_TENSOR_OP     =  106,
    CUBLAS_GEMM_ALGO7_TENSOR_OP     =  107,
    CUBLAS_GEMM_ALGO8_TENSOR_OP     =  108,
    CUBLAS_GEMM_ALGO9_TENSOR_OP     =  109,
    CUBLAS_GEMM_ALGO10_TENSOR_OP     =  110,
    CUBLAS_GEMM_ALGO11_TENSOR_OP     =  111,
    CUBLAS_GEMM_ALGO12_TENSOR_OP     =  112,
    CUBLAS_GEMM_ALGO13_TENSOR_OP     =  113,
    CUBLAS_GEMM_ALGO14_TENSOR_OP     =  114,
    CUBLAS_GEMM_ALGO15_TENSOR_OP     =  115
} cublasGemmAlgo_t;

typedef enum cudaDataType_t
{
  CUDA_R_16F= 2,  /* real as a half */
  CUDA_C_16F= 6,  /* complex as a pair of half numbers */
  CUDA_R_32F= 0,  /* real as a float */
  CUDA_C_32F= 4,  /* complex as a pair of float numbers */
  CUDA_R_64F= 1,  /* real as a double */
  CUDA_C_64F= 5,  /* complex as a pair of double numbers */
  CUDA_R_8I = 3,  /* real as a signed char */
  CUDA_C_8I = 7,  /* complex as a pair of signed char numbers */
  CUDA_R_8U = 8,  /* real as a unsigned char */
  CUDA_C_8U = 9,  /* complex as a pair of unsigned char numbers */
  CUDA_R_32I= 10, /* real as a signed int */
  CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
  CUDA_R_32U= 12, /* real as a unsigned int */
  CUDA_C_32U= 13  /* complex as a pair of unsigned int numbers */
} cudaDataType;

typedef cudaDataType cublasDataType_t;

typedef enum {
    CUBLAS_OP_N=0,
    CUBLAS_OP_T=1,
    CUBLAS_OP_C=2,
    CUBLAS_OP_HERMITAN=2, /* synonym if CUBLAS_OP_C */
    CUBLAS_OP_CONJG=3     /* conjugate */
} cublasOperation_t;

/*Enum for default math mode/tensor operation*/
typedef enum {
    CUBLAS_DEFAULT_MATH = 0,
    CUBLAS_TENSOR_OP_MATH = 1
} cublasMath_t;

#endif