Files
triton/include/isaac/driver/external/CUDA/cublas_api.h
2016-10-03 21:26:05 -04:00

2584 lines
167 KiB
C

/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/*
* This is the public header file for the CUBLAS library, defining the API
*
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
* on top of the CUDA runtime.
*/
#if !defined(CUBLAS_API_H_)
#define CUBLAS_API_H_
#ifndef CUBLASWINAPI
#ifdef _WIN32
#define CUBLASWINAPI __stdcall
#else
#define CUBLASWINAPI
#endif
#endif
#ifndef CUBLASAPI
#error "This file should not be included without defining CUBLASAPI"
#endif
#include "driver_types.h"
#include "cuComplex.h" /* import complex data type */
#include "cuda_fp16.h"
#if defined(__cplusplus)
extern "C" {
#endif /* __cplusplus */
/* CUBLAS status type returns */
typedef enum{
CUBLAS_STATUS_SUCCESS =0,
CUBLAS_STATUS_NOT_INITIALIZED =1,
CUBLAS_STATUS_ALLOC_FAILED =3,
CUBLAS_STATUS_INVALID_VALUE =7,
CUBLAS_STATUS_ARCH_MISMATCH =8,
CUBLAS_STATUS_MAPPING_ERROR =11,
CUBLAS_STATUS_EXECUTION_FAILED=13,
CUBLAS_STATUS_INTERNAL_ERROR =14,
CUBLAS_STATUS_NOT_SUPPORTED =15,
CUBLAS_STATUS_LICENSE_ERROR =16
} cublasStatus_t;
typedef enum {
CUBLAS_FILL_MODE_LOWER=0,
CUBLAS_FILL_MODE_UPPER=1
} cublasFillMode_t;
typedef enum {
CUBLAS_DIAG_NON_UNIT=0,
CUBLAS_DIAG_UNIT=1
} cublasDiagType_t;
typedef enum {
CUBLAS_SIDE_LEFT =0,
CUBLAS_SIDE_RIGHT=1
} cublasSideMode_t;
typedef enum {
CUBLAS_OP_N=0,
CUBLAS_OP_T=1,
CUBLAS_OP_C=2
} cublasOperation_t;
typedef enum {
CUBLAS_POINTER_MODE_HOST = 0,
CUBLAS_POINTER_MODE_DEVICE = 1
} cublasPointerMode_t;
typedef enum {
CUBLAS_ATOMICS_NOT_ALLOWED = 0,
CUBLAS_ATOMICS_ALLOWED = 1
} cublasAtomicsMode_t;
/* Used by cublasSgemmEx */
typedef enum
{
CUBLAS_DATA_FLOAT = 0,
CUBLAS_DATA_DOUBLE = 1,
CUBLAS_DATA_HALF = 2,
CUBLAS_DATA_INT8 = 3
} cublasDataType_t;
/* Opaque structure holding CUBLAS library context */
struct cublasContext;
typedef struct cublasContext *cublasHandle_t;
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCreate_v2 (cublasHandle_t *handle);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDestroy_v2 (cublasHandle_t handle);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, int *version);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetStream_v2 (cublasHandle_t handle, cudaStream_t streamId);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetStream_v2 (cublasHandle_t handle, cudaStream_t *streamId);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t *mode);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t mode);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t *mode);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode);
/*
* cublasStatus_t
* cublasSetVector (int n, int elemSize, const void *x, int incx,
* void *y, int incy)
*
* copies n elements from a vector x in CPU memory space to a vector y
* in GPU memory space. Elements in both vectors are assumed to have a
* size of elemSize bytes. Storage spacing between consecutive elements
* is incx for the source vector x and incy for the destination vector
* y. In general, y points to an object, or part of an object, allocated
* via cublasAlloc(). Column major format for two-dimensional matrices
* is assumed throughout CUBLAS. Therefore, if the increment for a vector
* is equal to 1, this access a column vector while using an increment
* equal to the leading dimension of the respective matrix accesses a
* row vector.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasSetVector (int n, int elemSize, const void *x,
int incx, void *devicePtr, int incy);
/*
* cublasStatus_t
* cublasGetVector (int n, int elemSize, const void *x, int incx,
* void *y, int incy)
*
* copies n elements from a vector x in GPU memory space to a vector y
* in CPU memory space. Elements in both vectors are assumed to have a
* size of elemSize bytes. Storage spacing between consecutive elements
* is incx for the source vector x and incy for the destination vector
* y. In general, x points to an object, or part of an object, allocated
* via cublasAlloc(). Column major format for two-dimensional matrices
* is assumed throughout CUBLAS. Therefore, if the increment for a vector
* is equal to 1, this access a column vector while using an increment
* equal to the leading dimension of the respective matrix accesses a
* row vector.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasGetVector (int n, int elemSize, const void *x,
int incx, void *y, int incy);
/*
* cublasStatus_t
* cublasSetMatrix (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb)
*
* copies a tile of rows x cols elements from a matrix A in CPU memory
* space to a matrix B in GPU memory space. Each element requires storage
* of elemSize bytes. Both matrices are assumed to be stored in column
* major format, with the leading dimension (i.e. number of rows) of
* source matrix A provided in lda, and the leading dimension of matrix B
* provided in ldb. In general, B points to an object, or part of an
* object, that was allocated via cublasAlloc().
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or
* ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasSetMatrix (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb);
/*
* cublasStatus_t
* cublasGetMatrix (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb)
*
* copies a tile of rows x cols elements from a matrix A in GPU memory
* space to a matrix B in CPU memory space. Each element requires storage
* of elemSize bytes. Both matrices are assumed to be stored in column
* major format, with the leading dimension (i.e. number of rows) of
* source matrix A provided in lda, and the leading dimension of matrix B
* provided in ldb. In general, A points to an object, or part of an
* object, that was allocated via cublasAlloc().
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasGetMatrix (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb);
/*
* cublasStatus
* cublasSetVectorAsync ( int n, int elemSize, const void *x, int incx,
* void *y, int incy, cudaStream_t stream );
*
* cublasSetVectorAsync has the same functionnality as cublasSetVector
* but the transfer is done asynchronously within the CUDA stream passed
* in parameter.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasSetVectorAsync (int n, int elemSize,
const void *hostPtr, int incx,
void *devicePtr, int incy,
cudaStream_t stream);
/*
* cublasStatus
* cublasGetVectorAsync( int n, int elemSize, const void *x, int incx,
* void *y, int incy, cudaStream_t stream)
*
* cublasGetVectorAsync has the same functionnality as cublasGetVector
* but the transfer is done asynchronously within the CUDA stream passed
* in parameter.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasGetVectorAsync (int n, int elemSize,
const void *devicePtr, int incx,
void *hostPtr, int incy,
cudaStream_t stream);
/*
* cublasStatus_t
* cublasSetMatrixAsync (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb, cudaStream_t stream)
*
* cublasSetMatrixAsync has the same functionnality as cublasSetMatrix
* but the transfer is done asynchronously within the CUDA stream passed
* in parameter.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or
* ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb, cudaStream_t stream);
/*
* cublasStatus_t
* cublasGetMatrixAsync (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb, cudaStream_t stream)
*
* cublasGetMatrixAsync has the same functionnality as cublasGetMatrix
* but the transfer is done asynchronously within the CUDA stream passed
* in parameter.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb, cudaStream_t stream);
CUBLASAPI void CUBLASWINAPI cublasXerbla (const char *srName, int info);
/* ---------------- CUBLAS BLAS1 functions ---------------- */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle,
int n,
const float *x,
int incx,
float *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle,
int n,
const double *x,
int incx,
double *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
float *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
double *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdot_v2 (cublasHandle_t handle,
int n,
const float *x,
int incx,
const float *y,
int incy,
float *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdot_v2 (cublasHandle_t handle,
int n,
const double *x,
int incx,
const double *y,
int incy,
double *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotu_v2 (cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdotc_v2 (cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotu_v2 (cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdotc_v2 (cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle,
int n,
const float *alpha, /* host or device pointer */
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle,
int n,
const double *alpha, /* host or device pointer */
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle,
int n,
const cuComplex *alpha, /* host or device pointer */
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle,
int n,
const float *alpha, /* host or device pointer */
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
cuDoubleComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle,
int n,
const double *alpha, /* host or device pointer */
cuDoubleComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSaxpy_v2 (cublasHandle_t handle,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDaxpy_v2 (cublasHandle_t handle,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCaxpy_v2 (cublasHandle_t handle,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZaxpy_v2 (cublasHandle_t handle,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScopy_v2 (cublasHandle_t handle,
int n,
const float *x,
int incx,
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDcopy_v2 (cublasHandle_t handle,
int n,
const double *x,
int incx,
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCcopy_v2 (cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZcopy_v2 (cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSswap_v2 (cublasHandle_t handle,
int n,
float *x,
int incx,
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDswap_v2 (cublasHandle_t handle,
int n,
double *x,
int incx,
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCswap_v2 (cublasHandle_t handle,
int n,
cuComplex *x,
int incx,
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZswap_v2 (cublasHandle_t handle,
int n,
cuDoubleComplex *x,
int incx,
cuDoubleComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle,
int n,
const float *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle,
int n,
const double *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle,
int n,
const float *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle,
int n,
const double *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
int *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle,
int n,
const float *x,
int incx,
float *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle,
int n,
const double *x,
int incx,
double *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle,
int n,
const cuComplex *x,
int incx,
float *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle,
int n,
const cuDoubleComplex *x,
int incx,
double *result); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrot_v2 (cublasHandle_t handle,
int n,
float *x,
int incx,
float *y,
int incy,
const float *c, /* host or device pointer */
const float *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrot_v2 (cublasHandle_t handle,
int n,
double *x,
int incx,
double *y,
int incy,
const double *c, /* host or device pointer */
const double *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrot_v2 (cublasHandle_t handle,
int n,
cuComplex *x,
int incx,
cuComplex *y,
int incy,
const float *c, /* host or device pointer */
const cuComplex *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle,
int n,
cuComplex *x,
int incx,
cuComplex *y,
int incy,
const float *c, /* host or device pointer */
const float *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrot_v2 (cublasHandle_t handle,
int n,
cuDoubleComplex *x,
int incx,
cuDoubleComplex *y,
int incy,
const double *c, /* host or device pointer */
const cuDoubleComplex *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle,
int n,
cuDoubleComplex *x,
int incx,
cuDoubleComplex *y,
int incy,
const double *c, /* host or device pointer */
const double *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle,
float *a, /* host or device pointer */
float *b, /* host or device pointer */
float *c, /* host or device pointer */
float *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle,
double *a, /* host or device pointer */
double *b, /* host or device pointer */
double *c, /* host or device pointer */
double *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCrotg_v2(cublasHandle_t handle,
cuComplex *a, /* host or device pointer */
cuComplex *b, /* host or device pointer */
float *c, /* host or device pointer */
cuComplex *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZrotg_v2(cublasHandle_t handle,
cuDoubleComplex *a, /* host or device pointer */
cuDoubleComplex *b, /* host or device pointer */
double *c, /* host or device pointer */
cuDoubleComplex *s); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle,
int n,
float *x,
int incx,
float *y,
int incy,
const float* param); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle,
int n,
double *x,
int incx,
double *y,
int incy,
const double* param); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSrotmg_v2(cublasHandle_t handle,
float *d1, /* host or device pointer */
float *d2, /* host or device pointer */
float *x1, /* host or device pointer */
const float *y1, /* host or device pointer */
float *param); /* host or device pointer */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDrotmg_v2(cublasHandle_t handle,
double *d1, /* host or device pointer */
double *d2, /* host or device pointer */
double *x1, /* host or device pointer */
const double *y1, /* host or device pointer */
double *param); /* host or device pointer */
/* --------------- CUBLAS BLAS2 functions ---------------- */
/* GEMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *x,
int incx,
const float *beta, /* host or device pointer */
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *x,
int incx,
const double *beta, /* host or device pointer */
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
/* GBMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgbmv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int kl,
int ku,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *x,
int incx,
const float *beta, /* host or device pointer */
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgbmv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int kl,
int ku,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *x,
int incx,
const double *beta, /* host or device pointer */
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int kl,
int ku,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2 (cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int kl,
int ku,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
/* TRMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const float *A,
int lda,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const double *A,
int lda,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuComplex *A,
int lda,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *x,
int incx);
/* TBMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const float *A,
int lda,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const double *A,
int lda,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const cuComplex *A,
int lda,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *x,
int incx);
/* TPMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const float *AP,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const double *AP,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuComplex *AP,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuDoubleComplex *AP,
cuDoubleComplex *x,
int incx);
/* TRSV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const float *A,
int lda,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const double *A,
int lda,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuComplex *A,
int lda,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *x,
int incx);
/* TPSV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const float *AP,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const double *AP,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuComplex *AP,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
const cuDoubleComplex *AP,
cuDoubleComplex *x,
int incx);
/* TBSV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStbsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const float *A,
int lda,
float *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtbsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const double *A,
int lda,
double *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtbsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const cuComplex *A,
int lda,
cuComplex *x,
int incx);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtbsv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int n,
int k,
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *x,
int incx);
/* SYMV/HEMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *x,
int incx,
const float *beta, /* host or device pointer */
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *x,
int incx,
const double *beta, /* host or device pointer */
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
/* SBMV/HBMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *x,
int incx,
const float *beta, /* host or device pointer */
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *x,
int incx,
const double *beta, /* host or device pointer */
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhbmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
/* SPMV/HPMV */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *AP,
const float *x,
int incx,
const float *beta, /* host or device pointer */
float *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *AP,
const double *x,
int incx,
const double *beta, /* host or device pointer */
double *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *AP,
const cuComplex *x,
int incx,
const cuComplex *beta, /* host or device pointer */
cuComplex *y,
int incy);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpmv_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *AP,
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *y,
int incy);
/* GER */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSger_v2 (cublasHandle_t handle,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
const float *y,
int incy,
float *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDger_v2 (cublasHandle_t handle,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
const double *y,
int incy,
double *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeru_v2 (cublasHandle_t handle,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgerc_v2 (cublasHandle_t handle,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeru_v2 (cublasHandle_t handle,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgerc_v2 (cublasHandle_t handle,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *A,
int lda);
/* SYR/HER */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
float *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
double *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *A,
int lda);
/* SPR/HPR */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
float *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
double *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
cuComplex *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *AP);
/* SYR2/HER2 */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
const float *y,
int incy,
float *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
const double *y,
int incy,
double *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo, int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo, int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *A,
int lda);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *A,
int lda);
/* SPR2/HPR2 */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSspr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *alpha, /* host or device pointer */
const float *x,
int incx,
const float *y,
int incy,
float *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDspr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *alpha, /* host or device pointer */
const double *x,
int incx,
const double *y,
int incy,
double *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChpr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *x,
int incx,
const cuComplex *y,
int incy,
cuComplex *AP);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhpr2_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *x,
int incx,
const cuDoubleComplex *y,
int incy,
cuDoubleComplex *AP);
/* ---------------- CUBLAS BLAS3 functions ---------------- */
/* GEMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemm_v2 (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *B,
int ldb,
const float *beta, /* host or device pointer */
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemm_v2 (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *B,
int ldb,
const double *beta, /* host or device pointer */
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemm_v2 (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemm_v2 (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasHgemm (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const __half *alpha, /* host or device pointer */
const __half *A,
int lda,
const __half *B,
int ldb,
const __half *beta, /* host or device pointer */
__half *C,
int ldc);
/* IO in FP16/FP32, computation in float */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmEx (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const float *alpha, /* host or device pointer */
const void *A,
cublasDataType_t Atype,
int lda,
const void *B,
cublasDataType_t Btype,
int ldb,
const float *beta, /* host or device pointer */
void *C,
cublasDataType_t Ctype,
int ldc);
/* SYRK */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *beta, /* host or device pointer */
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *beta, /* host or device pointer */
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* HERK */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const float *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const float *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherk_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const double *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const double *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* SYR2K */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *B,
int ldb,
const float *beta, /* host or device pointer */
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *B,
int ldb,
const double *beta, /* host or device pointer */
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* HER2K */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCher2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const float *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZher2k_v2 (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const double *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* SYRKX : eXtended SYRK*/
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsyrkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *B,
int ldb,
const float *beta, /* host or device pointer */
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsyrkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *B,
int ldb,
const double *beta, /* host or device pointer */
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsyrkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsyrkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* HERKX : eXtended HERK */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCherkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const float *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZherkx (cublasHandle_t handle,
cublasFillMode_t uplo,
cublasOperation_t trans,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const double *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* SYMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSsymm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *B,
int ldb,
const float *beta, /* host or device pointer */
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDsymm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *B,
int ldb,
const double *beta, /* host or device pointer */
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCsymm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZsymm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* HEMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasChemm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZhemm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *C,
int ldc);
/* TRSM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
float *B,
int ldb);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
double *B,
int ldb);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
cuComplex *B,
int ldb);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *B,
int ldb);
/* TRMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrmm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *B,
int ldb,
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrmm_v2 (cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *B,
int ldb,
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *B,
int ldb,
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *B,
int ldb,
cuDoubleComplex *C,
int ldc);
/* BATCH GEMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemmBatched (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const float *alpha, /* host or device pointer */
const float *Aarray[],
int lda,
const float *Barray[],
int ldb,
const float *beta, /* host or device pointer */
float *Carray[],
int ldc,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemmBatched (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const double *alpha, /* host or device pointer */
const double *Aarray[],
int lda,
const double *Barray[],
int ldb,
const double *beta, /* host or device pointer */
double *Carray[],
int ldc,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemmBatched (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *Aarray[],
int lda,
const cuComplex *Barray[],
int ldb,
const cuComplex *beta, /* host or device pointer */
cuComplex *Carray[],
int ldc,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemmBatched (cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
int k,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *Aarray[],
int lda,
const cuDoubleComplex *Barray[],
int ldb,
const cuDoubleComplex *beta, /* host or device pointer */
cuDoubleComplex *Carray[],
int ldc,
int batchCount);
/* ---------------- CUBLAS BLAS-like extension ---------------- */
/* GEAM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
const float *alpha, /* host or device pointer */
const float *A,
int lda,
const float *beta , /* host or device pointer */
const float *B,
int ldb,
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
const double *alpha, /* host or device pointer */
const double *A,
int lda,
const double *beta, /* host or device pointer */
const double *B,
int ldb,
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
const cuComplex *alpha, /* host or device pointer */
const cuComplex *A,
int lda,
const cuComplex *beta, /* host or device pointer */
const cuComplex *B,
int ldb,
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle,
cublasOperation_t transa,
cublasOperation_t transb,
int m,
int n,
const cuDoubleComplex *alpha, /* host or device pointer */
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *beta, /* host or device pointer */
const cuDoubleComplex *B,
int ldb,
cuDoubleComplex *C,
int ldc);
/* Batched LU - GETRF*/
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle,
int n,
float *A[], /*Device pointer*/
int lda,
int *P, /*Device Pointer*/
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle,
int n,
double *A[], /*Device pointer*/
int lda,
int *P, /*Device Pointer*/
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle,
int n,
cuComplex *A[], /*Device pointer*/
int lda,
int *P, /*Device Pointer*/
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle,
int n,
cuDoubleComplex *A[], /*Device pointer*/
int lda,
int *P, /*Device Pointer*/
int *info, /*Device Pointer*/
int batchSize);
/* Batched inversion based on LU factorization from getrf */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle,
int n,
const float *A[], /*Device pointer*/
int lda,
const int *P, /*Device pointer*/
float *C[], /*Device pointer*/
int ldc,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle,
int n,
const double *A[], /*Device pointer*/
int lda,
const int *P, /*Device pointer*/
double *C[], /*Device pointer*/
int ldc,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle,
int n,
const cuComplex *A[], /*Device pointer*/
int lda,
const int *P, /*Device pointer*/
cuComplex *C[], /*Device pointer*/
int ldc,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle,
int n,
const cuDoubleComplex *A[], /*Device pointer*/
int lda,
const int *P, /*Device pointer*/
cuDoubleComplex *C[], /*Device pointer*/
int ldc,
int *info,
int batchSize);
/* Batched solver based on LU factorization from getrf */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int n,
int nrhs,
const float *Aarray[],
int lda,
const int *devIpiv,
float *Barray[],
int ldb,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int n,
int nrhs,
const double *Aarray[],
int lda,
const int *devIpiv,
double *Barray[],
int ldb,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int n,
int nrhs,
const cuComplex *Aarray[],
int lda,
const int *devIpiv,
cuComplex *Barray[],
int ldb,
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int n,
int nrhs,
const cuDoubleComplex *Aarray[],
int lda,
const int *devIpiv,
cuDoubleComplex *Barray[],
int ldb,
int *info,
int batchSize);
/* TRSM - Batched Triangular Solver */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrsmBatched( cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const float *alpha, /*Host or Device Pointer*/
const float *A[],
int lda,
float *B[],
int ldb,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const double *alpha, /*Host or Device Pointer*/
const double *A[],
int lda,
double *B[],
int ldb,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuComplex *alpha, /*Host or Device Pointer*/
const cuComplex *A[],
int lda,
cuComplex *B[],
int ldb,
int batchCount);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( cublasHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
cublasDiagType_t diag,
int m,
int n,
const cuDoubleComplex *alpha, /*Host or Device Pointer*/
const cuDoubleComplex *A[],
int lda,
cuDoubleComplex *B[],
int ldb,
int batchCount);
/* Batched - MATINV*/
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(cublasHandle_t handle,
int n,
const float *A[], /*Device pointer*/
int lda,
float *Ainv[], /*Device pointer*/
int lda_inv,
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDmatinvBatched(cublasHandle_t handle,
int n,
const double *A[], /*Device pointer*/
int lda,
double *Ainv[], /*Device pointer*/
int lda_inv,
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCmatinvBatched(cublasHandle_t handle,
int n,
const cuComplex *A[], /*Device pointer*/
int lda,
cuComplex *Ainv[], /*Device pointer*/
int lda_inv,
int *info, /*Device Pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZmatinvBatched(cublasHandle_t handle,
int n,
const cuDoubleComplex *A[], /*Device pointer*/
int lda,
cuDoubleComplex *Ainv[], /*Device pointer*/
int lda_inv,
int *info, /*Device Pointer*/
int batchSize);
/* Batch QR Factorization */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched( cublasHandle_t handle,
int m,
int n,
float *Aarray[], /*Device pointer*/
int lda,
float *TauArray[], /* Device pointer*/
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched( cublasHandle_t handle,
int m,
int n,
double *Aarray[], /*Device pointer*/
int lda,
double *TauArray[], /* Device pointer*/
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched( cublasHandle_t handle,
int m,
int n,
cuComplex *Aarray[], /*Device pointer*/
int lda,
cuComplex *TauArray[], /* Device pointer*/
int *info,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( cublasHandle_t handle,
int m,
int n,
cuDoubleComplex *Aarray[], /*Device pointer*/
int lda,
cuDoubleComplex *TauArray[], /* Device pointer*/
int *info,
int batchSize);
/* Least Square Min only m >= n and Non-transpose supported */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgelsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int nrhs,
float *Aarray[], /*Device pointer*/
int lda,
float *Carray[], /* Device pointer*/
int ldc,
int *info,
int *devInfoArray, /* Device pointer*/
int batchSize );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgelsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int nrhs,
double *Aarray[], /*Device pointer*/
int lda,
double *Carray[], /* Device pointer*/
int ldc,
int *info,
int *devInfoArray, /* Device pointer*/
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgelsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int nrhs,
cuComplex *Aarray[], /*Device pointer*/
int lda,
cuComplex *Carray[], /* Device pointer*/
int ldc,
int *info,
int *devInfoArray,
int batchSize);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgelsBatched( cublasHandle_t handle,
cublasOperation_t trans,
int m,
int n,
int nrhs,
cuDoubleComplex *Aarray[], /*Device pointer*/
int lda,
cuDoubleComplex *Carray[], /* Device pointer*/
int ldc,
int *info,
int *devInfoArray,
int batchSize);
/* DGMM */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle,
cublasSideMode_t mode,
int m,
int n,
const float *A,
int lda,
const float *x,
int incx,
float *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle,
cublasSideMode_t mode,
int m,
int n,
const double *A,
int lda,
const double *x,
int incx,
double *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle,
cublasSideMode_t mode,
int m,
int n,
const cuComplex *A,
int lda,
const cuComplex *x,
int incx,
cuComplex *C,
int ldc);
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle,
cublasSideMode_t mode,
int m,
int n,
const cuDoubleComplex *A,
int lda,
const cuDoubleComplex *x,
int incx,
cuDoubleComplex *C,
int ldc);
/* TPTTR : Triangular Pack format to Triangular format */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStpttr ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *AP,
float *A,
int lda );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtpttr ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *AP,
double *A,
int lda );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtpttr ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *AP,
cuComplex *A,
int lda );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtpttr ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *AP,
cuDoubleComplex *A,
int lda );
/* TRTTP : Triangular format to Triangular Pack format */
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasStrttp ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const float *A,
int lda,
float *AP );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDtrttp ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const double *A,
int lda,
double *AP );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCtrttp ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuComplex *A,
int lda,
cuComplex *AP );
CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZtrttp ( cublasHandle_t handle,
cublasFillMode_t uplo,
int n,
const cuDoubleComplex *A,
int lda,
cuDoubleComplex *AP );
#if defined(__cplusplus)
}
#endif /* __cplusplus */
#endif /* !defined(CUBLAS_API_H_) */