GEMM: Added skeleton for cuBLAS GEMM calls
This commit is contained in:
@@ -31,6 +31,7 @@
|
||||
//CUDA Backend
|
||||
#include "isaac/driver/external/CUDA/cuda.h"
|
||||
#include "isaac/driver/external/CUDA/nvrtc.h"
|
||||
#include "isaac/driver/external/CUDA/cublas.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -48,9 +49,7 @@ private:
|
||||
|
||||
template <class R, class... A>
|
||||
struct return_type<R (*)(A...)>
|
||||
{
|
||||
typedef R type;
|
||||
};
|
||||
{ typedef R type; };
|
||||
|
||||
typedef bool (*f_init_t)();
|
||||
|
||||
@@ -65,10 +64,13 @@ private:
|
||||
return (*fptr)(args...);
|
||||
}
|
||||
|
||||
static void cublasCreate(cublasHandle_t* h);
|
||||
|
||||
public:
|
||||
static bool clinit();
|
||||
static bool cuinit();
|
||||
static bool cublasinit();
|
||||
static bool nvrtcinit();
|
||||
static bool cuinit();
|
||||
|
||||
static void release();
|
||||
|
||||
@@ -144,10 +146,17 @@ public:
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
static void cublasGetStream(cudaStream_t *streamId);
|
||||
static void cublasSetStream(cudaStream_t streamId);
|
||||
static void cublasSgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||
static void cublasDgemm (cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||
|
||||
private:
|
||||
static void* opencl_;
|
||||
static void* cuda_;
|
||||
static void* nvrtc_;
|
||||
static void* cublas_;
|
||||
static cublasHandle_t cublas_handle_;
|
||||
|
||||
//OpenCL
|
||||
static void* clBuildProgram_;
|
||||
@@ -220,6 +229,12 @@ private:
|
||||
static void* nvrtcGetPTXSize_;
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
|
||||
static void* cublasCreate_;
|
||||
static void* cublasGetStream_;
|
||||
static void* cublasSetStream_;
|
||||
static void* cublasSgemm_;
|
||||
static void* cublasDgemm_;
|
||||
};
|
||||
|
||||
}
|
||||
|
64
include/isaac/driver/external/CUDA/builtin_types.h
vendored
Normal file
64
include/isaac/driver/external/CUDA/builtin_types.h
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "device_types.h"
|
||||
#if !defined(__CUDACC_RTC__)
|
||||
#define EXCLUDE_FROM_RTC
|
||||
#include "driver_types.h"
|
||||
#undef EXCLUDE_FROM_RTC
|
||||
#endif /* !__CUDACC_RTC__ */
|
||||
#include "surface_types.h"
|
||||
#include "texture_types.h"
|
||||
#include "vector_types.h"
|
412
include/isaac/driver/external/CUDA/channel_descriptor.h
vendored
Normal file
412
include/isaac/driver/external/CUDA/channel_descriptor.h
vendored
Normal file
@@ -0,0 +1,412 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__CHANNEL_DESCRIPTOR_H__)
|
||||
#define __CHANNEL_DESCRIPTOR_H__
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_HIGHLEVEL
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief \hl Returns a channel descriptor using the specified format
|
||||
*
|
||||
* Returns a channel descriptor with format \p f and number of bits of each
|
||||
* component \p x, \p y, \p z, and \p w. The ::cudaChannelFormatDesc is
|
||||
* defined as:
|
||||
* \code
|
||||
struct cudaChannelFormatDesc {
|
||||
int x, y, z, w;
|
||||
enum cudaChannelFormatKind f;
|
||||
};
|
||||
* \endcode
|
||||
*
|
||||
* where ::cudaChannelFormatKind is one of ::cudaChannelFormatKindSigned,
|
||||
* ::cudaChannelFormatKindUnsigned, or ::cudaChannelFormatKindFloat.
|
||||
*
|
||||
* \return
|
||||
* Channel descriptor with format \p f
|
||||
*
|
||||
* \sa \ref ::cudaCreateChannelDesc(int,int,int,int,cudaChannelFormatKind) "cudaCreateChannelDesc (Low level)",
|
||||
* ::cudaGetChannelDesc, ::cudaGetTextureReference,
|
||||
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture (High level)",
|
||||
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, size_t) "cudaBindTexture (High level, inherited channel descriptor)",
|
||||
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t) "cudaBindTexture2D (High level)",
|
||||
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t, const struct cudaChannelFormatDesc&) "cudaBindTextureToArray (High level)",
|
||||
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t) "cudaBindTextureToArray (High level, inherited channel descriptor)",
|
||||
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cudaUnbindTexture (High level)",
|
||||
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, dim, readMode>&) "cudaGetTextureAlignmentOffset (High level)"
|
||||
*/
|
||||
template<class T> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc(void)
|
||||
{
|
||||
return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf1(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf2(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf4(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char>(void)
|
||||
{
|
||||
int e = (int)sizeof(char) * 8;
|
||||
|
||||
#if defined(_CHAR_UNSIGNED) || defined(__CHAR_UNSIGNED__)
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
#else /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
#endif /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<signed char>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned char>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char1>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char2>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char4>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned short>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short1>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short2>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short4>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned int>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int1>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int2>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int4>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
#if !defined(__LP64__)
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned long>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long1>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long2>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long4>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
#endif /* !__LP64__ */
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float1>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float2>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float4>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TEXTURE_HL */
|
||||
|
||||
#endif /* !__CHANNEL_DESCRIPTOR_H__ */
|
338
include/isaac/driver/external/CUDA/cuComplex.h
vendored
Normal file
338
include/isaac/driver/external/CUDA/cuComplex.h
vendored
Normal file
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(CU_COMPLEX_H_)
|
||||
#define CU_COMPLEX_H_
|
||||
|
||||
/* When trying to include C header file in C++ Code extern "C" is required
|
||||
* But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
|
||||
* extern "C" cannot be nested
|
||||
* Hence keep the header out of extern "C" block
|
||||
*/
|
||||
|
||||
#include <math.h> /* import fabsf, sqrt */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include "vector_types.h"
|
||||
|
||||
typedef float2 cuFloatComplex;
|
||||
|
||||
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
|
||||
{
|
||||
return x.x;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
|
||||
{
|
||||
return x.y;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
|
||||
(float r, float i)
|
||||
{
|
||||
cuFloatComplex res;
|
||||
res.x = r;
|
||||
res.y = i;
|
||||
return res;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComplex x)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x), -cuCimagf(x));
|
||||
}
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x) + cuCrealf(y),
|
||||
cuCimagf(x) + cuCimagf(y));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x) - cuCrealf(y),
|
||||
cuCimagf(x) - cuCimagf(y));
|
||||
}
|
||||
|
||||
/* This implementation could suffer from intermediate overflow even though
|
||||
* the final result would be in range. However, various implementations do
|
||||
* not guard against this (presumably to avoid losing performance), so we
|
||||
* don't do it either to stay competitive.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
cuFloatComplex prod;
|
||||
prod = make_cuFloatComplex ((cuCrealf(x) * cuCrealf(y)) -
|
||||
(cuCimagf(x) * cuCimagf(y)),
|
||||
(cuCrealf(x) * cuCimagf(y)) +
|
||||
(cuCimagf(x) * cuCrealf(y)));
|
||||
return prod;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Such guarded implementations are usually the default for
|
||||
* complex library implementations, with some also offering an unguarded,
|
||||
* faster version.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
cuFloatComplex quot;
|
||||
float s = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y));
|
||||
float oos = 1.0f / s;
|
||||
float ars = cuCrealf(x) * oos;
|
||||
float ais = cuCimagf(x) * oos;
|
||||
float brs = cuCrealf(y) * oos;
|
||||
float bis = cuCimagf(y) * oos;
|
||||
s = (brs * brs) + (bis * bis);
|
||||
oos = 1.0f / s;
|
||||
quot = make_cuFloatComplex (((ars * brs) + (ais * bis)) * oos,
|
||||
((ais * brs) - (ars * bis)) * oos);
|
||||
return quot;
|
||||
}
|
||||
|
||||
/*
|
||||
* We would like to call hypotf(), but it's not available on all platforms.
|
||||
* This discrete implementation guards against intermediate underflow and
|
||||
* overflow by scaling. Otherwise we would lose half the exponent range.
|
||||
* There are various ways of doing guarded computation. For now chose the
|
||||
* simplest and fastest solution, however this may suffer from inaccuracies
|
||||
* if sqrt and division are not IEEE compliant.
|
||||
*/
|
||||
__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
|
||||
{
|
||||
float a = cuCrealf(x);
|
||||
float b = cuCimagf(x);
|
||||
float v, w, t;
|
||||
a = fabsf(a);
|
||||
b = fabsf(b);
|
||||
if (a > b) {
|
||||
v = a;
|
||||
w = b;
|
||||
} else {
|
||||
v = b;
|
||||
w = a;
|
||||
}
|
||||
t = w / v;
|
||||
t = 1.0f + t * t;
|
||||
t = v * sqrtf(t);
|
||||
if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) {
|
||||
t = v + w;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Double precision */
|
||||
typedef double2 cuDoubleComplex;
|
||||
|
||||
__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
|
||||
{
|
||||
return x.x;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
|
||||
{
|
||||
return x.y;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
|
||||
(double r, double i)
|
||||
{
|
||||
cuDoubleComplex res;
|
||||
res.x = r;
|
||||
res.y = i;
|
||||
return res;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComplex x)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x), -cuCimag(x));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x) + cuCreal(y),
|
||||
cuCimag(x) + cuCimag(y));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x) - cuCreal(y),
|
||||
cuCimag(x) - cuCimag(y));
|
||||
}
|
||||
|
||||
/* This implementation could suffer from intermediate overflow even though
|
||||
* the final result would be in range. However, various implementations do
|
||||
* not guard against this (presumably to avoid losing performance), so we
|
||||
* don't do it either to stay competitive.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
cuDoubleComplex prod;
|
||||
prod = make_cuDoubleComplex ((cuCreal(x) * cuCreal(y)) -
|
||||
(cuCimag(x) * cuCimag(y)),
|
||||
(cuCreal(x) * cuCimag(y)) +
|
||||
(cuCimag(x) * cuCreal(y)));
|
||||
return prod;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Such guarded implementations are usually the default for
|
||||
* complex library implementations, with some also offering an unguarded,
|
||||
* faster version.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
cuDoubleComplex quot;
|
||||
double s = (fabs(cuCreal(y))) + (fabs(cuCimag(y)));
|
||||
double oos = 1.0 / s;
|
||||
double ars = cuCreal(x) * oos;
|
||||
double ais = cuCimag(x) * oos;
|
||||
double brs = cuCreal(y) * oos;
|
||||
double bis = cuCimag(y) * oos;
|
||||
s = (brs * brs) + (bis * bis);
|
||||
oos = 1.0 / s;
|
||||
quot = make_cuDoubleComplex (((ars * brs) + (ais * bis)) * oos,
|
||||
((ais * brs) - (ars * bis)) * oos);
|
||||
return quot;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Otherwise we would lose half the exponent range. There are
|
||||
* various ways of doing guarded computation. For now chose the simplest
|
||||
* and fastest solution, however this may suffer from inaccuracies if sqrt
|
||||
* and division are not IEEE compliant.
|
||||
*/
|
||||
__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
|
||||
{
|
||||
double a = cuCreal(x);
|
||||
double b = cuCimag(x);
|
||||
double v, w, t;
|
||||
a = fabs(a);
|
||||
b = fabs(b);
|
||||
if (a > b) {
|
||||
v = a;
|
||||
w = b;
|
||||
} else {
|
||||
v = b;
|
||||
w = a;
|
||||
}
|
||||
t = w / v;
|
||||
t = 1.0 + t * t;
|
||||
t = v * sqrt(t);
|
||||
if ((v == 0.0) ||
|
||||
(v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {
|
||||
t = v + w;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* aliases */
|
||||
typedef cuFloatComplex cuComplex;
|
||||
__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
|
||||
float y)
|
||||
{
|
||||
return make_cuFloatComplex (x, y);
|
||||
}
|
||||
|
||||
/* float-to-double promotion */
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuComplexFloatToDouble
|
||||
(cuFloatComplex c)
|
||||
{
|
||||
return make_cuDoubleComplex ((double)cuCrealf(c), (double)cuCimagf(c));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuComplexDoubleToFloat
|
||||
(cuDoubleComplex c)
|
||||
{
|
||||
return make_cuFloatComplex ((float)cuCreal(c), (float)cuCimag(c));
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ static __inline__ cuComplex cuCfmaf( cuComplex x, cuComplex y, cuComplex d)
|
||||
{
|
||||
float real_res;
|
||||
float imag_res;
|
||||
|
||||
real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d);
|
||||
imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d);
|
||||
|
||||
real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res;
|
||||
imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res;
|
||||
|
||||
return make_cuComplex(real_res, imag_res);
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCfma( cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)
|
||||
{
|
||||
double real_res;
|
||||
double imag_res;
|
||||
|
||||
real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d);
|
||||
imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d);
|
||||
|
||||
real_res = -(cuCimag(x) * cuCimag(y)) + real_res;
|
||||
imag_res = (cuCimag(x) * cuCreal(y)) + imag_res;
|
||||
|
||||
return make_cuDoubleComplex(real_res, imag_res);
|
||||
}
|
||||
|
||||
#endif /* !defined(CU_COMPLEX_H_) */
|
565
include/isaac/driver/external/CUDA/cublas.h
vendored
Normal file
565
include/isaac/driver/external/CUDA/cublas.h
vendored
Normal file
@@ -0,0 +1,565 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is the public header file for the CUBLAS library, defining the API
|
||||
*
|
||||
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
|
||||
* on top of the CUDA runtime.
|
||||
*/
|
||||
|
||||
#if !defined(CUBLAS_H_)
|
||||
#define CUBLAS_H_
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#ifndef CUBLASWINAPI
|
||||
#ifdef _WIN32
|
||||
#define CUBLASWINAPI __stdcall
|
||||
#else
|
||||
#define CUBLASWINAPI
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef CUBLASAPI
|
||||
#ifdef __CUDACC__
|
||||
#define CUBLASAPI __host__
|
||||
#else
|
||||
#define CUBLASAPI
|
||||
#endif
|
||||
|
||||
#include "cublas_api.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* CUBLAS data types */
|
||||
#define cublasStatus cublasStatus_t
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasInit (void);
|
||||
cublasStatus CUBLASWINAPI cublasShutdown (void);
|
||||
cublasStatus CUBLASWINAPI cublasGetError (void);
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasGetVersion(int *version);
|
||||
cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr);
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasFree (void *devicePtr);
|
||||
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
/* ---------------- CUBLAS BLAS1 functions ---------------- */
|
||||
/* NRM2 */
|
||||
float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx);
|
||||
double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx);
|
||||
float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx);
|
||||
double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* DOT */
|
||||
float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y,
|
||||
int incy);
|
||||
double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y,
|
||||
int incy);
|
||||
cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy);
|
||||
cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy);
|
||||
cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy);
|
||||
cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SCAL */
|
||||
void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AXPY */
|
||||
void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx,
|
||||
float *y, int incy);
|
||||
void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x,
|
||||
int incx, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* COPY */
|
||||
void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SWAP */
|
||||
void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AMAX */
|
||||
int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx);
|
||||
int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx);
|
||||
int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx);
|
||||
int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AMIN */
|
||||
int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx);
|
||||
int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx);
|
||||
|
||||
int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx);
|
||||
int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ASUM */
|
||||
float CUBLASWINAPI cublasSasum (int n, const float *x, int incx);
|
||||
double CUBLASWINAPI cublasDasum (int n, const double *x, int incx);
|
||||
float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx);
|
||||
double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROT */
|
||||
void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy,
|
||||
float sc, float ss);
|
||||
void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy,
|
||||
double sc, double ss);
|
||||
void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||
int incy, float c, cuComplex s);
|
||||
void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex *y, int incy, double sc,
|
||||
cuDoubleComplex cs);
|
||||
void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||
int incy, float c, float s);
|
||||
void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex *y, int incy, double c, double s);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTG */
|
||||
void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss);
|
||||
void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss);
|
||||
void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
|
||||
cuComplex *cs);
|
||||
void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
|
||||
cuDoubleComplex *cs);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTM */
|
||||
void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
|
||||
const float* sparam);
|
||||
void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
|
||||
const double* sparam);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTMG */
|
||||
void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1,
|
||||
const float *sy1, float* sparam);
|
||||
void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1,
|
||||
const double *sy1, double* sparam);
|
||||
|
||||
/* --------------- CUBLAS BLAS2 functions ---------------- */
|
||||
/* GEMV */
|
||||
void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
|
||||
const float *A, int lda, const float *x, int incx,
|
||||
float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
|
||||
const double *A, int lda, const double *x, int incx,
|
||||
double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||
cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* GBMV */
|
||||
void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku,
|
||||
float alpha, const float *A, int lda,
|
||||
const float *x, int incx, float beta, float *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *x, int incx, double beta, double *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *x, int incx, cuComplex beta, cuComplex *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRMV */
|
||||
void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n,
|
||||
const float *A, int lda, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n,
|
||||
const double *A, int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n,
|
||||
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TBMV */
|
||||
void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const float *A, int lda, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const double *A, int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TPMV */
|
||||
void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRSV */
|
||||
void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TPSV */
|
||||
void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP,
|
||||
float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
|
||||
cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TBSV */
|
||||
void CUBLASWINAPI cublasStbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const float *A,
|
||||
int lda, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const double *A,
|
||||
int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const cuComplex *A,
|
||||
int lda, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const cuDoubleComplex *A,
|
||||
int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYMV/HEMV */
|
||||
void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
|
||||
int lda, const float *x, int incx, float beta,
|
||||
float *y, int incy);
|
||||
void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
|
||||
int lda, const double *x, int incx, double beta,
|
||||
double *y, int incy);
|
||||
void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, const cuComplex *x, int incx, cuComplex beta,
|
||||
cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
|
||||
int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta,
|
||||
cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SBMV/HBMV */
|
||||
void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha,
|
||||
const float *A, int lda, const float *x, int incx,
|
||||
float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha,
|
||||
const double *A, int lda, const double *x, int incx,
|
||||
double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||
cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPMV/HPMV */
|
||||
void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
|
||||
const float *AP, const float *x,
|
||||
int incx, float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
|
||||
const double *AP, const double *x,
|
||||
int incx, double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
|
||||
const cuComplex *AP, const cuComplex *x,
|
||||
int incx, cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *AP, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* GER */
|
||||
void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
|
||||
const float *y, int incy, float *A, int lda);
|
||||
void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
|
||||
const double *y, int incy, double *A, int lda);
|
||||
|
||||
void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy,
|
||||
cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy,
|
||||
cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy,
|
||||
cuDoubleComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy,
|
||||
cuDoubleComplex *A, int lda);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYR/HER */
|
||||
void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
|
||||
int incx, float *A, int lda);
|
||||
void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
|
||||
int incx, double *A, int lda);
|
||||
|
||||
void CUBLASWINAPI cublasCher (char uplo, int n, float alpha,
|
||||
const cuComplex *x, int incx, cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZher (char uplo, int n, double alpha,
|
||||
const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPR/HPR */
|
||||
void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
|
||||
int incx, float *AP);
|
||||
void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
|
||||
int incx, double *AP);
|
||||
void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
|
||||
int incx, cuComplex *AP);
|
||||
void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex *AP);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYR2/HER2 */
|
||||
void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x,
|
||||
int incx, const float *y, int incy, float *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x,
|
||||
int incx, const double *y, int incy, double *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy, cuComplex *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A,
|
||||
int lda);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPR2/HPR2 */
|
||||
void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x,
|
||||
int incx, const float *y, int incy, float *AP);
|
||||
void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
|
||||
const double *x, int incx, const double *y,
|
||||
int incy, double *AP);
|
||||
void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
|
||||
const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy, cuComplex *AP);
|
||||
void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy, cuDoubleComplex *AP);
|
||||
/* ------------------------BLAS3 Functions ------------------------------- */
|
||||
/* GEMM */
|
||||
void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k,
|
||||
float alpha, const float *A, int lda,
|
||||
const float *B, int ldb, float beta, float *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *B, int ldb, double beta, double *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
|
||||
int k, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb,
|
||||
cuDoubleComplex beta, cuDoubleComplex *C,
|
||||
int ldc);
|
||||
/* -------------------------------------------------------*/
|
||||
/* SYRK */
|
||||
void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha,
|
||||
const float *A, int lda, float beta, float *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
double beta, double *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
cuComplex beta, cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* HERK */
|
||||
void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
|
||||
float alpha, const cuComplex *A, int lda,
|
||||
float beta, cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
|
||||
double alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
double beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* SYR2K */
|
||||
void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha,
|
||||
const float *A, int lda, const float *B, int ldb,
|
||||
float beta, float *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *B, int ldb, double beta,
|
||||
double *C, int ldc);
|
||||
void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* HER2K */
|
||||
void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, float beta,
|
||||
cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, double beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYMM*/
|
||||
void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha,
|
||||
const float *A, int lda, const float *B, int ldb,
|
||||
float beta, float *C, int ldc);
|
||||
void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha,
|
||||
const double *A, int lda, const double *B, int ldb,
|
||||
double beta, double *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *B, int ldb,
|
||||
cuComplex beta, cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
|
||||
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* HEMM*/
|
||||
void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRSM*/
|
||||
void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, float alpha, const float *A, int lda,
|
||||
float *B, int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, double alpha,
|
||||
const double *A, int lda, double *B,
|
||||
int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, cuComplex *B, int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex *B, int ldb);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRMM*/
|
||||
void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, float alpha, const float *A, int lda,
|
||||
float *B, int ldb);
|
||||
void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, double alpha,
|
||||
const double *A, int lda, double *B,
|
||||
int ldb);
|
||||
void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, cuComplex *B, int ldb);
|
||||
void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
|
||||
int ldb);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* !defined(CUBLAS_H_) */
|
2583
include/isaac/driver/external/CUDA/cublas_api.h
vendored
Normal file
2583
include/isaac/driver/external/CUDA/cublas_api.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
274
include/isaac/driver/external/CUDA/cublas_v2.h
vendored
Normal file
274
include/isaac/driver/external/CUDA/cublas_v2.h
vendored
Normal file
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is the public header file for the new CUBLAS library API, it mapped the generic
|
||||
* Cublas name functions to the actual _v2 implementations.
|
||||
*/
|
||||
|
||||
#if !defined(CUBLAS_V2_H_)
|
||||
#define CUBLAS_V2_H_
|
||||
|
||||
#undef CUBLASAPI
|
||||
#ifdef __CUDACC__
|
||||
#define CUBLASAPI __host__ __device__
|
||||
#else
|
||||
#define CUBLASAPI
|
||||
#endif
|
||||
|
||||
#include "cublas_api.h"
|
||||
|
||||
#define cublasCreate cublasCreate_v2
|
||||
#define cublasDestroy cublasDestroy_v2
|
||||
#define cublasGetVersion cublasGetVersion_v2
|
||||
#define cublasSetStream cublasSetStream_v2
|
||||
#define cublasGetStream cublasGetStream_v2
|
||||
#define cublasGetPointerMode cublasGetPointerMode_v2
|
||||
#define cublasSetPointerMode cublasSetPointerMode_v2
|
||||
|
||||
/* Blas3 Routines */
|
||||
|
||||
#define cublasSnrm2 cublasSnrm2_v2
|
||||
#define cublasDnrm2 cublasDnrm2_v2
|
||||
#define cublasScnrm2 cublasScnrm2_v2
|
||||
#define cublasDznrm2 cublasDznrm2_v2
|
||||
|
||||
#define cublasSdot cublasSdot_v2
|
||||
#define cublasDdot cublasDdot_v2
|
||||
#define cublasCdotu cublasCdotu_v2
|
||||
#define cublasCdotc cublasCdotc_v2
|
||||
#define cublasZdotu cublasZdotu_v2
|
||||
#define cublasZdotc cublasZdotc_v2
|
||||
|
||||
#define cublasSscal cublasSscal_v2
|
||||
#define cublasDscal cublasDscal_v2
|
||||
#define cublasCscal cublasCscal_v2
|
||||
#define cublasCsscal cublasCsscal_v2
|
||||
#define cublasZscal cublasZscal_v2
|
||||
#define cublasZdscal cublasZdscal_v2
|
||||
|
||||
#define cublasSaxpy cublasSaxpy_v2
|
||||
#define cublasDaxpy cublasDaxpy_v2
|
||||
#define cublasCaxpy cublasCaxpy_v2
|
||||
#define cublasZaxpy cublasZaxpy_v2
|
||||
|
||||
#define cublasScopy cublasScopy_v2
|
||||
#define cublasDcopy cublasDcopy_v2
|
||||
#define cublasCcopy cublasCcopy_v2
|
||||
#define cublasZcopy cublasZcopy_v2
|
||||
|
||||
#define cublasSswap cublasSswap_v2
|
||||
#define cublasDswap cublasDswap_v2
|
||||
#define cublasCswap cublasCswap_v2
|
||||
#define cublasZswap cublasZswap_v2
|
||||
|
||||
#define cublasIsamax cublasIsamax_v2
|
||||
#define cublasIdamax cublasIdamax_v2
|
||||
#define cublasIcamax cublasIcamax_v2
|
||||
#define cublasIzamax cublasIzamax_v2
|
||||
|
||||
#define cublasIsamin cublasIsamin_v2
|
||||
#define cublasIdamin cublasIdamin_v2
|
||||
#define cublasIcamin cublasIcamin_v2
|
||||
#define cublasIzamin cublasIzamin_v2
|
||||
|
||||
#define cublasSasum cublasSasum_v2
|
||||
#define cublasDasum cublasDasum_v2
|
||||
#define cublasScasum cublasScasum_v2
|
||||
#define cublasDzasum cublasDzasum_v2
|
||||
|
||||
#define cublasSrot cublasSrot_v2
|
||||
#define cublasDrot cublasDrot_v2
|
||||
#define cublasCrot cublasCrot_v2
|
||||
#define cublasCsrot cublasCsrot_v2
|
||||
#define cublasZrot cublasZrot_v2
|
||||
#define cublasZdrot cublasZdrot_v2
|
||||
|
||||
#define cublasSrotg cublasSrotg_v2
|
||||
#define cublasDrotg cublasDrotg_v2
|
||||
#define cublasCrotg cublasCrotg_v2
|
||||
#define cublasZrotg cublasZrotg_v2
|
||||
|
||||
#define cublasSrotm cublasSrotm_v2
|
||||
#define cublasDrotm cublasDrotm_v2
|
||||
|
||||
#define cublasSrotmg cublasSrotmg_v2
|
||||
#define cublasDrotmg cublasDrotmg_v2
|
||||
|
||||
|
||||
/* Blas2 Routines */
|
||||
|
||||
#define cublasSgemv cublasSgemv_v2
|
||||
#define cublasDgemv cublasDgemv_v2
|
||||
#define cublasCgemv cublasCgemv_v2
|
||||
#define cublasZgemv cublasZgemv_v2
|
||||
|
||||
#define cublasSgbmv cublasSgbmv_v2
|
||||
#define cublasDgbmv cublasDgbmv_v2
|
||||
#define cublasCgbmv cublasCgbmv_v2
|
||||
#define cublasZgbmv cublasZgbmv_v2
|
||||
|
||||
#define cublasStrmv cublasStrmv_v2
|
||||
#define cublasDtrmv cublasDtrmv_v2
|
||||
#define cublasCtrmv cublasCtrmv_v2
|
||||
#define cublasZtrmv cublasZtrmv_v2
|
||||
|
||||
#define cublasStbmv cublasStbmv_v2
|
||||
#define cublasDtbmv cublasDtbmv_v2
|
||||
#define cublasCtbmv cublasCtbmv_v2
|
||||
#define cublasZtbmv cublasZtbmv_v2
|
||||
|
||||
#define cublasStpmv cublasStpmv_v2
|
||||
#define cublasDtpmv cublasDtpmv_v2
|
||||
#define cublasCtpmv cublasCtpmv_v2
|
||||
#define cublasZtpmv cublasZtpmv_v2
|
||||
|
||||
#define cublasStrsv cublasStrsv_v2
|
||||
#define cublasDtrsv cublasDtrsv_v2
|
||||
#define cublasCtrsv cublasCtrsv_v2
|
||||
#define cublasZtrsv cublasZtrsv_v2
|
||||
|
||||
#define cublasStpsv cublasStpsv_v2
|
||||
#define cublasDtpsv cublasDtpsv_v2
|
||||
#define cublasCtpsv cublasCtpsv_v2
|
||||
#define cublasZtpsv cublasZtpsv_v2
|
||||
|
||||
#define cublasStbsv cublasStbsv_v2
|
||||
#define cublasDtbsv cublasDtbsv_v2
|
||||
#define cublasCtbsv cublasCtbsv_v2
|
||||
#define cublasZtbsv cublasZtbsv_v2
|
||||
|
||||
#define cublasSsymv cublasSsymv_v2
|
||||
#define cublasDsymv cublasDsymv_v2
|
||||
#define cublasCsymv cublasCsymv_v2
|
||||
#define cublasZsymv cublasZsymv_v2
|
||||
#define cublasChemv cublasChemv_v2
|
||||
#define cublasZhemv cublasZhemv_v2
|
||||
|
||||
#define cublasSsbmv cublasSsbmv_v2
|
||||
#define cublasDsbmv cublasDsbmv_v2
|
||||
#define cublasChbmv cublasChbmv_v2
|
||||
#define cublasZhbmv cublasZhbmv_v2
|
||||
|
||||
#define cublasSspmv cublasSspmv_v2
|
||||
#define cublasDspmv cublasDspmv_v2
|
||||
#define cublasChpmv cublasChpmv_v2
|
||||
#define cublasZhpmv cublasZhpmv_v2
|
||||
|
||||
|
||||
#define cublasSger cublasSger_v2
|
||||
#define cublasDger cublasDger_v2
|
||||
#define cublasCgeru cublasCgeru_v2
|
||||
#define cublasCgerc cublasCgerc_v2
|
||||
#define cublasZgeru cublasZgeru_v2
|
||||
#define cublasZgerc cublasZgerc_v2
|
||||
|
||||
#define cublasSsyr cublasSsyr_v2
|
||||
#define cublasDsyr cublasDsyr_v2
|
||||
#define cublasCsyr cublasCsyr_v2
|
||||
#define cublasZsyr cublasZsyr_v2
|
||||
#define cublasCher cublasCher_v2
|
||||
#define cublasZher cublasZher_v2
|
||||
|
||||
#define cublasSspr cublasSspr_v2
|
||||
#define cublasDspr cublasDspr_v2
|
||||
#define cublasChpr cublasChpr_v2
|
||||
#define cublasZhpr cublasZhpr_v2
|
||||
|
||||
#define cublasSsyr2 cublasSsyr2_v2
|
||||
#define cublasDsyr2 cublasDsyr2_v2
|
||||
#define cublasCsyr2 cublasCsyr2_v2
|
||||
#define cublasZsyr2 cublasZsyr2_v2
|
||||
#define cublasCher2 cublasCher2_v2
|
||||
#define cublasZher2 cublasZher2_v2
|
||||
|
||||
#define cublasSspr2 cublasSspr2_v2
|
||||
#define cublasDspr2 cublasDspr2_v2
|
||||
#define cublasChpr2 cublasChpr2_v2
|
||||
#define cublasZhpr2 cublasZhpr2_v2
|
||||
|
||||
/* Blas3 Routines */
|
||||
|
||||
#define cublasSgemm cublasSgemm_v2
|
||||
#define cublasDgemm cublasDgemm_v2
|
||||
#define cublasCgemm cublasCgemm_v2
|
||||
#define cublasZgemm cublasZgemm_v2
|
||||
|
||||
#define cublasSsyrk cublasSsyrk_v2
|
||||
#define cublasDsyrk cublasDsyrk_v2
|
||||
#define cublasCsyrk cublasCsyrk_v2
|
||||
#define cublasZsyrk cublasZsyrk_v2
|
||||
#define cublasCherk cublasCherk_v2
|
||||
#define cublasZherk cublasZherk_v2
|
||||
|
||||
#define cublasSsyr2k cublasSsyr2k_v2
|
||||
#define cublasDsyr2k cublasDsyr2k_v2
|
||||
#define cublasCsyr2k cublasCsyr2k_v2
|
||||
#define cublasZsyr2k cublasZsyr2k_v2
|
||||
#define cublasCher2k cublasCher2k_v2
|
||||
#define cublasZher2k cublasZher2k_v2
|
||||
|
||||
#define cublasSsymm cublasSsymm_v2
|
||||
#define cublasDsymm cublasDsymm_v2
|
||||
#define cublasCsymm cublasCsymm_v2
|
||||
#define cublasZsymm cublasZsymm_v2
|
||||
#define cublasChemm cublasChemm_v2
|
||||
#define cublasZhemm cublasZhemm_v2
|
||||
|
||||
#define cublasStrsm cublasStrsm_v2
|
||||
#define cublasDtrsm cublasDtrsm_v2
|
||||
#define cublasCtrsm cublasCtrsm_v2
|
||||
#define cublasZtrsm cublasZtrsm_v2
|
||||
|
||||
#define cublasStrmm cublasStrmm_v2
|
||||
#define cublasDtrmm cublasDtrmm_v2
|
||||
#define cublasCtrmm cublasCtrmm_v2
|
||||
#define cublasZtrmm cublasZtrmm_v2
|
||||
|
||||
#endif /* !defined(CUBLAS_V2_H_) */
|
228
include/isaac/driver/external/CUDA/cuda_device_runtime_api.h
vendored
Normal file
228
include/isaac/driver/external/CUDA/cuda_device_runtime_api.h
vendored
Normal file
@@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__CUDA_DEVICE_RUNTIME_API_H__)
|
||||
#define __CUDA_DEVICE_RUNTIME_API_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if defined(__CUDABE__)
|
||||
|
||||
#if (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
|
||||
struct cudaFuncAttributes;
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaMalloc(void **p, size_t s)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaGetDevice(int *device)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
#endif /* (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
|
||||
|
||||
#else /* defined(__CUDABE__) */
|
||||
|
||||
#if defined(__cplusplus) && defined(__CUDACC__) // Visible to nvcc front-end only
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350) // Visible to SM>=3.5 and "__host__ __device__" only
|
||||
|
||||
#include "driver_types.h"
|
||||
#include "host_defines.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(void);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaPeekAtLastError(void);
|
||||
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cudaError_t error);
|
||||
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorName(cudaError_t error);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamDestroy(cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventDestroy(cudaEvent_t event);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
|
||||
|
||||
/**
|
||||
* \ingroup CUDART_EXECUTION
|
||||
* \brief Obtains a parameter buffer
|
||||
*
|
||||
* Obtains a parameter buffer which can be filled with parameters for a kernel launch.
|
||||
* Parameters passed to ::cudaLaunchDevice must be allocated via this function.
|
||||
*
|
||||
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||
* CUDA user code should use <<< >>> to launch kernels.
|
||||
*
|
||||
* \param alignment - Specifies alignment requirement of the parameter buffer
|
||||
* \param size - Specifies size requirement in bytes
|
||||
*
|
||||
* \return
|
||||
* Returns pointer to the allocated parameterBuffer
|
||||
* \notefnerr
|
||||
*
|
||||
* \sa cudaLaunchDevice
|
||||
*/
|
||||
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBuffer(size_t alignment, size_t size);
|
||||
|
||||
/**
|
||||
* \ingroup CUDART_EXECUTION
|
||||
* \brief Launches a specified kernel
|
||||
*
|
||||
* Launches a specified kernel with the specified parameter buffer. A parameter buffer can be obtained
|
||||
* by calling ::cudaGetParameterBuffer().
|
||||
*
|
||||
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||
* CUDA user code should use <<< >>> to launch the kernels.
|
||||
*
|
||||
* \param func - Pointer to the kernel to be launched
|
||||
* \param parameterBuffer - Holds the parameters to the launched kernel. parameterBuffer can be NULL. (Optional)
|
||||
* \param gridDimension - Specifies grid dimensions
|
||||
* \param blockDimension - Specifies block dimensions
|
||||
* \param sharedMemSize - Specifies size of shared memory
|
||||
* \param stream - Specifies the stream to be used
|
||||
*
|
||||
* \return
|
||||
* ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorLaunchMaxDepthExceeded, ::cudaErrorInvalidConfiguration,
|
||||
* ::cudaErrorStartupFailure, ::cudaErrorLaunchPendingCountExceeded, ::cudaErrorLaunchOutOfResources
|
||||
* \notefnerr
|
||||
* \n Please refer to Execution Configuration and Parameter Buffer Layout from the CUDA Programming
|
||||
* Guide for the detailed descriptions of launch configuration and parameter layout respectively.
|
||||
*
|
||||
* \sa cudaGetParameterBuffer
|
||||
*/
|
||||
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
|
||||
|
||||
#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) && defined(__CUDA_ARCH__)
|
||||
// When compiling for the device and per thread default stream is enabled, add
|
||||
// a static inline redirect to the per thread stream entry points.
|
||||
|
||||
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||
cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream)
|
||||
{
|
||||
return cudaLaunchDevice_ptsz(func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream);
|
||||
}
|
||||
|
||||
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||
cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream)
|
||||
{
|
||||
return cudaLaunchDeviceV2_ptsz(parameterBuffer, stream);
|
||||
}
|
||||
#else
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);
|
||||
#endif
|
||||
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||
|
||||
}
|
||||
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaMalloc(T **devPtr, size_t size);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)
|
||||
#endif // defined(__cplusplus) && defined(__CUDACC__)
|
||||
|
||||
#endif /* defined(__CUDABE__) */
|
||||
|
||||
#endif /* !__CUDA_DEVICE_RUNTIME_API_H__ */
|
1499
include/isaac/driver/external/CUDA/cuda_fp16.h
vendored
Normal file
1499
include/isaac/driver/external/CUDA/cuda_fp16.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1895
include/isaac/driver/external/CUDA/cuda_runtime.h
vendored
Normal file
1895
include/isaac/driver/external/CUDA/cuda_runtime.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6520
include/isaac/driver/external/CUDA/cuda_runtime_api.h
vendored
Normal file
6520
include/isaac/driver/external/CUDA/cuda_runtime_api.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
69
include/isaac/driver/external/CUDA/device_types.h
vendored
Normal file
69
include/isaac/driver/external/CUDA/device_types.h
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__DEVICE_TYPES_H__)
|
||||
#define __DEVICE_TYPES_H__
|
||||
|
||||
#include "host_defines.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
enum __device_builtin__ cudaRoundMode
|
||||
{
|
||||
cudaRoundNearest,
|
||||
cudaRoundZero,
|
||||
cudaRoundPosInf,
|
||||
cudaRoundMinInf
|
||||
};
|
||||
|
||||
#endif /* !__DEVICE_TYPES_H__ */
|
145
include/isaac/driver/external/CUDA/driver_functions.h
vendored
Normal file
145
include/isaac/driver/external/CUDA/driver_functions.h
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__DRIVER_FUNCTIONS_H__)
|
||||
#define __DRIVER_FUNCTIONS_H__
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_MEMORY
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaPitchedPtr based on input parameters
|
||||
*
|
||||
* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
|
||||
* \p p, \p xsz, and \p ysz.
|
||||
*
|
||||
* \param d - Pointer to allocated memory
|
||||
* \param p - Pitch of allocated memory in bytes
|
||||
* \param xsz - Logical width of allocation in elements
|
||||
* \param ysz - Logical height of allocation in elements
|
||||
*
|
||||
* \return
|
||||
* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
|
||||
*
|
||||
* \sa make_cudaExtent, make_cudaPos
|
||||
*/
|
||||
static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
|
||||
{
|
||||
struct cudaPitchedPtr s;
|
||||
|
||||
s.ptr = d;
|
||||
s.pitch = p;
|
||||
s.xsize = xsz;
|
||||
s.ysize = ysz;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaPos based on input parameters
|
||||
*
|
||||
* Returns a ::cudaPos based on the specified input parameters \p x,
|
||||
* \p y, and \p z.
|
||||
*
|
||||
* \param x - X position
|
||||
* \param y - Y position
|
||||
* \param z - Z position
|
||||
*
|
||||
* \return
|
||||
* ::cudaPos specified by \p x, \p y, and \p z
|
||||
*
|
||||
* \sa make_cudaExtent, make_cudaPitchedPtr
|
||||
*/
|
||||
static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
|
||||
{
|
||||
struct cudaPos p;
|
||||
|
||||
p.x = x;
|
||||
p.y = y;
|
||||
p.z = z;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaExtent based on input parameters
|
||||
*
|
||||
* Returns a ::cudaExtent based on the specified input parameters \p w,
|
||||
* \p h, and \p d.
|
||||
*
|
||||
* \param w - Width in bytes
|
||||
* \param h - Height in elements
|
||||
* \param d - Depth in elements
|
||||
*
|
||||
* \return
|
||||
* ::cudaExtent specified by \p w, \p h, and \p d
|
||||
*
|
||||
* \sa make_cudaPitchedPtr, make_cudaPos
|
||||
*/
|
||||
static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d)
|
||||
{
|
||||
struct cudaExtent e;
|
||||
|
||||
e.width = w;
|
||||
e.height = h;
|
||||
e.depth = d;
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
/** @} */ /* END CUDART_MEMORY */
|
||||
|
||||
#endif /* !__DRIVER_FUNCTIONS_H__ */
|
1450
include/isaac/driver/external/CUDA/driver_types.h
vendored
Normal file
1450
include/isaac/driver/external/CUDA/driver_types.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
201
include/isaac/driver/external/CUDA/host_config.h
vendored
Normal file
201
include/isaac/driver/external/CUDA/host_config.h
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__HOST_CONFIG_H__)
|
||||
#define __HOST_CONFIG_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#else /* __CUDACC_RTC__ */
|
||||
|
||||
/* check for host compilers that are compatible with nvcc */
|
||||
#if !defined(__GNUC__) && !defined(_WIN32)
|
||||
|
||||
#error --- !!! UNSUPPORTED COMPILER !!! ---
|
||||
|
||||
#endif /* !__GNUC__ && !_WIN32 */
|
||||
|
||||
#if defined(__ICC)
|
||||
|
||||
#if __ICC != 1500 || !defined(__GNUC__) || !defined(__LP64__)
|
||||
|
||||
#error -- unsupported ICC configuration! Only ICC 15.0 on Linux x86_64 is supported!
|
||||
|
||||
#endif /* __ICC != 1500 || !__GNUC__ || !__LP64__ */
|
||||
|
||||
#endif /* __ICC */
|
||||
|
||||
#if defined(__PGIC__)
|
||||
|
||||
#if __PGIC__ != 15 || __PGIC_MINOR__ != 4 || !defined(__GNUC__) || !defined(__LP64__)
|
||||
|
||||
#error -- unsupported pgc++ configuration! Only pgc++ 15.4 on Linux x86_64 is supported!
|
||||
|
||||
#endif /* __PGIC__ != 15 || __PGIC_MINOR != 4 || !__GNUC__ || !__LP64__ */
|
||||
|
||||
#endif /* __PGIC__ */
|
||||
|
||||
#if defined(__powerpc__)
|
||||
|
||||
#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
|
||||
|
||||
#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
|
||||
|
||||
#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
|
||||
|
||||
#if defined(__ibmxl_vrm__) && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000)
|
||||
|
||||
#error -- unsupported xlC version! only xlC 13.1 is supported
|
||||
|
||||
#endif /* __ibmxl_vrm__ && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000) */
|
||||
|
||||
#endif /* __powerpc__ */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 9)
|
||||
|
||||
#error -- unsupported GNU version! gcc versions later than 4.9 are not supported!
|
||||
|
||||
#endif /* __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 9) */
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
|
||||
#error -- clang and clang++ are the only supported host compilers on Mac OS X!
|
||||
#endif /* __APPLE__ && __MACH__ && !__clang__ */
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if _MSC_VER < 1600 || _MSC_VER > 1800
|
||||
|
||||
#error -- unsupported Microsoft Visual Studio version! Only the versions 2010, 2012, and 2013 are supported!
|
||||
|
||||
#endif /* _MSC_VER < 1600 || _MSC_VER > 1800 */
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
/* configure host compiler */
|
||||
#if defined(__APPLE__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#if defined(__BLOCKS__) /* nvcc does not support closures */
|
||||
|
||||
#undef __BLOCKS__
|
||||
|
||||
#endif /* __BLOCKS__ */
|
||||
|
||||
#elif defined(__ANDROID__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#elif defined(__QNX__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
#define _CRTIMP
|
||||
|
||||
#include <features.h> /* for __THROW */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
|
||||
#if _MSC_VER >= 1500
|
||||
|
||||
#undef _USE_DECLSPECS_FOR_SAL
|
||||
#define _USE_DECLSPECS_FOR_SAL \
|
||||
1
|
||||
|
||||
#endif /* _MSC_VER >= 1500 */
|
||||
|
||||
#if !defined(_CRT_NONSTDC_NO_WARNINGS)
|
||||
|
||||
#define _CRT_NONSTDC_NO_WARNINGS /* to suppress warnings */
|
||||
|
||||
#endif /* !_CRT_NONSTDC_NO_WARNINGS */
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS /* to suppress warnings */
|
||||
|
||||
#endif /* !_CRT_SECURE_NO_WARNINGS */
|
||||
|
||||
#if !defined(NOMINMAX)
|
||||
|
||||
#define NOMINMAX /* min and max are part of cuda runtime */
|
||||
|
||||
#endif /* !NOMINMAX */
|
||||
|
||||
#include <crtdefs.h> /* for _CRTIMP */
|
||||
|
||||
#define __THROW
|
||||
|
||||
#endif /* __APPLE__ */
|
||||
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
#endif /* __CUDACC__ */
|
||||
|
||||
#endif /* !__HOST_CONFIG_H__ */
|
241
include/isaac/driver/external/CUDA/host_defines.h
vendored
Normal file
241
include/isaac/driver/external/CUDA/host_defines.h
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__HOST_DEFINES_H__)
|
||||
#define __HOST_DEFINES_H__
|
||||
|
||||
/* CUDA JIT mode (__CUDACC_RTC__) also uses GNU style attributes */
|
||||
#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __volatile__ volatile
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
#define __no_return__ \
|
||||
__attribute__((noreturn))
|
||||
|
||||
#if defined(__CUDACC__) || defined(__CUDA_ARCH__)
|
||||
/* gcc allows users to define attributes with underscores,
|
||||
e.g., __attribute__((__noinline__)).
|
||||
Consider a non-CUDA source file (e.g. .cpp) that has the
|
||||
above attribute specification, and includes this header file. In that case,
|
||||
defining __noinline__ as below would cause a gcc compilation error.
|
||||
Hence, only define __noinline__ when the code is being processed
|
||||
by a CUDA compiler component.
|
||||
*/
|
||||
#define __noinline__ \
|
||||
__attribute__((noinline))
|
||||
#endif /* __CUDACC__ || __CUDA_ARCH__ */
|
||||
|
||||
#define __forceinline__ \
|
||||
__inline__ __attribute__((always_inline))
|
||||
#define __align__(n) \
|
||||
__attribute__((aligned(n)))
|
||||
#define __thread__ \
|
||||
__thread
|
||||
#define __import__
|
||||
#define __export__
|
||||
#define __cdecl
|
||||
#define __annotate__(a) \
|
||||
__attribute__((a))
|
||||
#define __location__(a) \
|
||||
__annotate__(a)
|
||||
#define CUDARTAPI
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#if _MSC_VER >= 1400
|
||||
|
||||
#define __restrict__ \
|
||||
__restrict
|
||||
|
||||
#else /* _MSC_VER >= 1400 */
|
||||
|
||||
#define __restrict__
|
||||
|
||||
#endif /* _MSC_VER >= 1400 */
|
||||
|
||||
#define __inline__ \
|
||||
__inline
|
||||
#define __no_return__ \
|
||||
__declspec(noreturn)
|
||||
#define __noinline__ \
|
||||
__declspec(noinline)
|
||||
#define __forceinline__ \
|
||||
__forceinline
|
||||
#define __align__(n) \
|
||||
__declspec(align(n))
|
||||
#define __thread__ \
|
||||
__declspec(thread)
|
||||
#define __import__ \
|
||||
__declspec(dllimport)
|
||||
#define __export__ \
|
||||
__declspec(dllexport)
|
||||
#define __annotate__(a) \
|
||||
__declspec(a)
|
||||
#define __location__(a) \
|
||||
__annotate__(__##a##__)
|
||||
#define CUDARTAPI \
|
||||
__stdcall
|
||||
|
||||
#else /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||
|
||||
#define __inline__
|
||||
|
||||
#if !defined(__align__)
|
||||
|
||||
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for '__align__' !!! ---
|
||||
|
||||
#endif /* !__align__ */
|
||||
|
||||
#if !defined(CUDARTAPI)
|
||||
|
||||
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for 'CUDARTAPI' !!! ---
|
||||
|
||||
#endif /* !CUDARTAPI */
|
||||
|
||||
#endif /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||
|
||||
#if (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !defined(__clang__)))) || \
|
||||
(defined(_MSC_VER) && _MSC_VER < 1900) || \
|
||||
(!defined(__GNUC__) && !defined(_MSC_VER))
|
||||
|
||||
#define __specialization_static \
|
||||
static
|
||||
|
||||
#else /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||
(_MSC_VER && _MSC_VER < 1900) ||
|
||||
(!__GNUC__ && !_MSC_VER) */
|
||||
|
||||
#define __specialization_static
|
||||
|
||||
#endif /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||
(_MSC_VER && _MSC_VER < 1900) ||
|
||||
(!__GNUC__ && !_MSC_VER) */
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__)
|
||||
|
||||
#undef __annotate__
|
||||
#define __annotate__(a)
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDABE__ */
|
||||
|
||||
#define __launch_bounds__(...) \
|
||||
__annotate__(launch_bounds(__VA_ARGS__))
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ */
|
||||
|
||||
#if defined(__CUDACC__) || defined(__CUDABE__) || \
|
||||
defined(__GNUC__) || defined(_WIN64)
|
||||
|
||||
#define __builtin_align__(a) \
|
||||
__align__(a)
|
||||
|
||||
#else /* __CUDACC__ || __CUDABE__ || __GNUC__ || _WIN64 */
|
||||
|
||||
#define __builtin_align__(a)
|
||||
|
||||
#endif /* __CUDACC__ || __CUDABE__ || __GNUC__ || _WIN64 */
|
||||
|
||||
#define __host__ \
|
||||
__location__(host)
|
||||
#define __device__ \
|
||||
__location__(device)
|
||||
#define __global__ \
|
||||
__location__(global)
|
||||
#define __shared__ \
|
||||
__location__(shared)
|
||||
#define __constant__ \
|
||||
__location__(constant)
|
||||
#define __managed__ \
|
||||
__location__(managed)
|
||||
|
||||
#if (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !defined(__CUDACC__)
|
||||
#define __device_builtin__
|
||||
#define __device_builtin_texture_type__
|
||||
#define __device_builtin_surface_type__
|
||||
#define __cudart_builtin__
|
||||
#else /* (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !__CUDACC__ */
|
||||
#define __device_builtin__ \
|
||||
__location__(device_builtin)
|
||||
#define __device_builtin_texture_type__ \
|
||||
__location__(device_builtin_texture_type)
|
||||
#define __device_builtin_surface_type__ \
|
||||
__location__(device_builtin_surface_type)
|
||||
#define __cudart_builtin__ \
|
||||
__location__(cudart_builtin)
|
||||
#endif /* (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !__CUDACC__ */
|
||||
|
||||
#if defined(__CUDACC__) && defined(__clang__)
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#error --- !!! The Clang version does not support __has_feature !!! ---
|
||||
#endif /* !__has_feature */
|
||||
|
||||
#if defined(__cplusplus) && defined(__CUDACC__)
|
||||
#if (__has_feature(cxx_noexcept))
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT noexcept
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT_(x) noexcept(x)
|
||||
#else /* !__has_feature(cxx_noexcept) */
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT throw()
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT_(x)
|
||||
#endif /* __has_feature(cxx_noexcept) */
|
||||
template <typename T> struct __nv_clang_atomic_t {
|
||||
__nv_clang_atomic_t() NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
__nv_clang_atomic_t(const T &x) NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
operator T() volatile NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
operator T() NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
};
|
||||
#define _Atomic(X) __nv_clang_atomic_t<X>
|
||||
#endif /* defined(__cplusplus) && defined(__CUDACC__) */
|
||||
|
||||
#endif /* __CUDACC__ && __clang__ */
|
||||
|
||||
|
||||
#endif /* !__HOST_DEFINES_H__ */
|
119
include/isaac/driver/external/CUDA/surface_types.h
vendored
Normal file
119
include/isaac/driver/external/CUDA/surface_types.h
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__SURFACE_TYPES_H__)
|
||||
#define __SURFACE_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_TYPES
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#define cudaSurfaceType1D 0x01
|
||||
#define cudaSurfaceType2D 0x02
|
||||
#define cudaSurfaceType3D 0x03
|
||||
#define cudaSurfaceTypeCubemap 0x0C
|
||||
#define cudaSurfaceType1DLayered 0xF1
|
||||
#define cudaSurfaceType2DLayered 0xF2
|
||||
#define cudaSurfaceTypeCubemapLayered 0xFC
|
||||
|
||||
/**
|
||||
* CUDA Surface boundary modes
|
||||
*/
|
||||
enum __device_builtin__ cudaSurfaceBoundaryMode
|
||||
{
|
||||
cudaBoundaryModeZero = 0, /**< Zero boundary mode */
|
||||
cudaBoundaryModeClamp = 1, /**< Clamp boundary mode */
|
||||
cudaBoundaryModeTrap = 2 /**< Trap boundary mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA Surface format modes
|
||||
*/
|
||||
enum __device_builtin__ cudaSurfaceFormatMode
|
||||
{
|
||||
cudaFormatModeForced = 0, /**< Forced format mode */
|
||||
cudaFormatModeAuto = 1 /**< Auto format mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA Surface reference
|
||||
*/
|
||||
struct __device_builtin__ surfaceReference
|
||||
{
|
||||
/**
|
||||
* Channel descriptor for surface reference
|
||||
*/
|
||||
struct cudaChannelFormatDesc channelDesc;
|
||||
};
|
||||
|
||||
/**
|
||||
* An opaque value that represents a CUDA Surface object
|
||||
*/
|
||||
typedef __device_builtin__ unsigned long long cudaSurfaceObject_t;
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TYPES */
|
||||
|
||||
#endif /* !__SURFACE_TYPES_H__ */
|
213
include/isaac/driver/external/CUDA/texture_types.h
vendored
Normal file
213
include/isaac/driver/external/CUDA/texture_types.h
vendored
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__TEXTURE_TYPES_H__)
|
||||
#define __TEXTURE_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_TYPES
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#define cudaTextureType1D 0x01
|
||||
#define cudaTextureType2D 0x02
|
||||
#define cudaTextureType3D 0x03
|
||||
#define cudaTextureTypeCubemap 0x0C
|
||||
#define cudaTextureType1DLayered 0xF1
|
||||
#define cudaTextureType2DLayered 0xF2
|
||||
#define cudaTextureTypeCubemapLayered 0xFC
|
||||
|
||||
/**
|
||||
* CUDA texture address modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureAddressMode
|
||||
{
|
||||
cudaAddressModeWrap = 0, /**< Wrapping address mode */
|
||||
cudaAddressModeClamp = 1, /**< Clamp to edge address mode */
|
||||
cudaAddressModeMirror = 2, /**< Mirror address mode */
|
||||
cudaAddressModeBorder = 3 /**< Border address mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture filter modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureFilterMode
|
||||
{
|
||||
cudaFilterModePoint = 0, /**< Point filter mode */
|
||||
cudaFilterModeLinear = 1 /**< Linear filter mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture read modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureReadMode
|
||||
{
|
||||
cudaReadModeElementType = 0, /**< Read texture as specified element type */
|
||||
cudaReadModeNormalizedFloat = 1 /**< Read texture as normalized float */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture reference
|
||||
*/
|
||||
struct __device_builtin__ textureReference
|
||||
{
|
||||
/**
|
||||
* Indicates whether texture reads are normalized or not
|
||||
*/
|
||||
int normalized;
|
||||
/**
|
||||
* Texture filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode filterMode;
|
||||
/**
|
||||
* Texture address mode for up to 3 dimensions
|
||||
*/
|
||||
enum cudaTextureAddressMode addressMode[3];
|
||||
/**
|
||||
* Channel descriptor for the texture reference
|
||||
*/
|
||||
struct cudaChannelFormatDesc channelDesc;
|
||||
/**
|
||||
* Perform sRGB->linear conversion during texture read
|
||||
*/
|
||||
int sRGB;
|
||||
/**
|
||||
* Limit to the anisotropy ratio
|
||||
*/
|
||||
unsigned int maxAnisotropy;
|
||||
/**
|
||||
* Mipmap filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode mipmapFilterMode;
|
||||
/**
|
||||
* Offset applied to the supplied mipmap level
|
||||
*/
|
||||
float mipmapLevelBias;
|
||||
/**
|
||||
* Lower end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float minMipmapLevelClamp;
|
||||
/**
|
||||
* Upper end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float maxMipmapLevelClamp;
|
||||
int __cudaReserved[15];
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture descriptor
|
||||
*/
|
||||
struct __device_builtin__ cudaTextureDesc
|
||||
{
|
||||
/**
|
||||
* Texture address mode for up to 3 dimensions
|
||||
*/
|
||||
enum cudaTextureAddressMode addressMode[3];
|
||||
/**
|
||||
* Texture filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode filterMode;
|
||||
/**
|
||||
* Texture read mode
|
||||
*/
|
||||
enum cudaTextureReadMode readMode;
|
||||
/**
|
||||
* Perform sRGB->linear conversion during texture read
|
||||
*/
|
||||
int sRGB;
|
||||
/**
|
||||
* Indicates whether texture reads are normalized or not
|
||||
*/
|
||||
int normalizedCoords;
|
||||
/**
|
||||
* Limit to the anisotropy ratio
|
||||
*/
|
||||
unsigned int maxAnisotropy;
|
||||
/**
|
||||
* Mipmap filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode mipmapFilterMode;
|
||||
/**
|
||||
* Offset applied to the supplied mipmap level
|
||||
*/
|
||||
float mipmapLevelBias;
|
||||
/**
|
||||
* Lower end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float minMipmapLevelClamp;
|
||||
/**
|
||||
* Upper end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float maxMipmapLevelClamp;
|
||||
};
|
||||
|
||||
/**
|
||||
* An opaque value that represents a CUDA texture object
|
||||
*/
|
||||
typedef __device_builtin__ unsigned long long cudaTextureObject_t;
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TYPES */
|
||||
|
||||
#endif /* !__TEXTURE_TYPES_H__ */
|
177
include/isaac/driver/external/CUDA/vector_functions.h
vendored
Normal file
177
include/isaac/driver/external/CUDA/vector_functions.h
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_FUNCTIONS_H__)
|
||||
#define __VECTOR_FUNCTIONS_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||
#else /* !__CUDACC_RTC__ */
|
||||
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w);
|
||||
|
||||
#undef __VECTOR_FUNCTIONS_DECL__
|
||||
|
||||
#if !defined(__CUDACC_RTC__)
|
||||
#include "vector_functions.hpp"
|
||||
#endif /* !__CUDACC_RTC__ */
|
||||
|
||||
#endif /* !__VECTOR_FUNCTIONS_H__ */
|
318
include/isaac/driver/external/CUDA/vector_functions.hpp
vendored
Normal file
318
include/isaac/driver/external/CUDA/vector_functions.hpp
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_FUNCTIONS_HPP__)
|
||||
#define __VECTOR_FUNCTIONS_HPP__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||
#else /* !__CUDACC_RTC__ */
|
||||
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x)
|
||||
{
|
||||
char1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x)
|
||||
{
|
||||
uchar1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y)
|
||||
{
|
||||
char2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y)
|
||||
{
|
||||
uchar2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z)
|
||||
{
|
||||
char3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z)
|
||||
{
|
||||
uchar3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w)
|
||||
{
|
||||
char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w)
|
||||
{
|
||||
uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x)
|
||||
{
|
||||
short1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x)
|
||||
{
|
||||
ushort1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y)
|
||||
{
|
||||
short2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y)
|
||||
{
|
||||
ushort2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z)
|
||||
{
|
||||
short3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z)
|
||||
{
|
||||
ushort3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w)
|
||||
{
|
||||
short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
|
||||
{
|
||||
ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x)
|
||||
{
|
||||
int1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x)
|
||||
{
|
||||
uint1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y)
|
||||
{
|
||||
int2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y)
|
||||
{
|
||||
uint2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z)
|
||||
{
|
||||
int3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z)
|
||||
{
|
||||
uint3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w)
|
||||
{
|
||||
int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
|
||||
{
|
||||
uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x)
|
||||
{
|
||||
long1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x)
|
||||
{
|
||||
ulong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y)
|
||||
{
|
||||
long2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y)
|
||||
{
|
||||
ulong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z)
|
||||
{
|
||||
long3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z)
|
||||
{
|
||||
ulong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w)
|
||||
{
|
||||
long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w)
|
||||
{
|
||||
ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x)
|
||||
{
|
||||
float1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y)
|
||||
{
|
||||
float2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z)
|
||||
{
|
||||
float3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w)
|
||||
{
|
||||
float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x)
|
||||
{
|
||||
longlong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x)
|
||||
{
|
||||
ulonglong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y)
|
||||
{
|
||||
longlong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y)
|
||||
{
|
||||
ulonglong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z)
|
||||
{
|
||||
longlong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z)
|
||||
{
|
||||
ulonglong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w)
|
||||
{
|
||||
longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w)
|
||||
{
|
||||
ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x)
|
||||
{
|
||||
double1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y)
|
||||
{
|
||||
double2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z)
|
||||
{
|
||||
double3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w)
|
||||
{
|
||||
double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
#undef __VECTOR_FUNCTIONS_DECL__
|
||||
|
||||
#endif /* !__VECTOR_FUNCTIONS_HPP__ */
|
||||
|
431
include/isaac/driver/external/CUDA/vector_types.h
vendored
Normal file
431
include/isaac/driver/external/CUDA/vector_types.h
vendored
Normal file
@@ -0,0 +1,431 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_TYPES_H__)
|
||||
#define __VECTOR_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if !defined(__CUDA_LIBDEVICE__) && !defined(__CUDACC_RTC__)
|
||||
#define EXCLUDE_FROM_RTC
|
||||
#include "builtin_types.h"
|
||||
#undef EXCLUDE_FROM_RTC
|
||||
#endif /* !__CUDA_LIBDEVICE__ && !__CUDACC_RTC__ */
|
||||
#include "host_defines.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) && !defined(__CUDABE__) && \
|
||||
defined(_WIN32) && !defined(_WIN64)
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4201 4408)
|
||||
|
||||
#define __cuda_builtin_vector_align8(tag, members) \
|
||||
struct __device_builtin__ tag \
|
||||
{ \
|
||||
union \
|
||||
{ \
|
||||
struct { members }; \
|
||||
struct { long long int :1,:0; }; \
|
||||
}; \
|
||||
}
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDACC_RTC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
#define __cuda_builtin_vector_align8(tag, members) \
|
||||
struct __device_builtin__ __align__(8) tag \
|
||||
{ \
|
||||
members \
|
||||
}
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDACC_RTC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
struct __device_builtin__ char1
|
||||
{
|
||||
signed char x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uchar1
|
||||
{
|
||||
unsigned char x;
|
||||
};
|
||||
|
||||
|
||||
struct __device_builtin__ __align__(2) char2
|
||||
{
|
||||
signed char x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(2) uchar2
|
||||
{
|
||||
unsigned char x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ char3
|
||||
{
|
||||
signed char x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uchar3
|
||||
{
|
||||
unsigned char x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) char4
|
||||
{
|
||||
signed char x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) uchar4
|
||||
{
|
||||
unsigned char x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ short1
|
||||
{
|
||||
short x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ushort1
|
||||
{
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) short2
|
||||
{
|
||||
short x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) ushort2
|
||||
{
|
||||
unsigned short x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ short3
|
||||
{
|
||||
short x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ushort3
|
||||
{
|
||||
unsigned short x, y, z;
|
||||
};
|
||||
|
||||
__cuda_builtin_vector_align8(short4, short x; short y; short z; short w;);
|
||||
__cuda_builtin_vector_align8(ushort4, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
|
||||
|
||||
struct __device_builtin__ int1
|
||||
{
|
||||
int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uint1
|
||||
{
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
__cuda_builtin_vector_align8(int2, int x; int y;);
|
||||
__cuda_builtin_vector_align8(uint2, unsigned int x; unsigned int y;);
|
||||
|
||||
struct __device_builtin__ int3
|
||||
{
|
||||
int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uint3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) int4
|
||||
{
|
||||
int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) uint4
|
||||
{
|
||||
unsigned int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ long1
|
||||
{
|
||||
long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulong1
|
||||
{
|
||||
unsigned long x;
|
||||
};
|
||||
|
||||
#if defined(__CUDACC_RTC__) || defined(_WIN32)
|
||||
__cuda_builtin_vector_align8(long2, long int x; long int y;);
|
||||
__cuda_builtin_vector_align8(ulong2, unsigned long int x; unsigned long int y;);
|
||||
#else /* __CUDACC_RTC__ || _WIN32 */
|
||||
|
||||
struct __device_builtin__ __align__(2*sizeof(long int)) long2
|
||||
{
|
||||
long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(2*sizeof(unsigned long int)) ulong2
|
||||
{
|
||||
unsigned long int x, y;
|
||||
};
|
||||
|
||||
#endif /* __CUDACC_RTC__ || _WIN32 */
|
||||
|
||||
struct __device_builtin__ long3
|
||||
{
|
||||
long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulong3
|
||||
{
|
||||
unsigned long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) long4
|
||||
{
|
||||
long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulong4
|
||||
{
|
||||
unsigned long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ float1
|
||||
{
|
||||
float x;
|
||||
};
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__) && defined(__arm__) && \
|
||||
defined(__ARM_PCS_VFP) && __GNUC__ == 4 && __GNUC_MINOR__ == 6
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-pedantic"
|
||||
|
||||
struct __device_builtin__ __attribute__((aligned(8))) float2
|
||||
{
|
||||
float x; float y; float __cuda_gnu_arm_ice_workaround[0];
|
||||
};
|
||||
|
||||
#pragma GCC poison __cuda_gnu_arm_ice_workaround
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDABE__ && __arm__ && __ARM_PCS_VFP &&
|
||||
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||
|
||||
__cuda_builtin_vector_align8(float2, float x; float y;);
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ && __arm__ && __ARM_PCS_VFP &&
|
||||
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||
|
||||
struct __device_builtin__ float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) float4
|
||||
{
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ longlong1
|
||||
{
|
||||
long long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulonglong1
|
||||
{
|
||||
unsigned long long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) longlong2
|
||||
{
|
||||
long long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulonglong2
|
||||
{
|
||||
unsigned long long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ longlong3
|
||||
{
|
||||
long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulonglong3
|
||||
{
|
||||
unsigned long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) longlong4
|
||||
{
|
||||
long long int x, y, z ,w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulonglong4
|
||||
{
|
||||
unsigned long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ double1
|
||||
{
|
||||
double x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) double2
|
||||
{
|
||||
double x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ double3
|
||||
{
|
||||
double x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__) && \
|
||||
defined(_WIN32) && !defined(_WIN64)
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
typedef __device_builtin__ struct char1 char1;
|
||||
typedef __device_builtin__ struct uchar1 uchar1;
|
||||
typedef __device_builtin__ struct char2 char2;
|
||||
typedef __device_builtin__ struct uchar2 uchar2;
|
||||
typedef __device_builtin__ struct char3 char3;
|
||||
typedef __device_builtin__ struct uchar3 uchar3;
|
||||
typedef __device_builtin__ struct char4 char4;
|
||||
typedef __device_builtin__ struct uchar4 uchar4;
|
||||
typedef __device_builtin__ struct short1 short1;
|
||||
typedef __device_builtin__ struct ushort1 ushort1;
|
||||
typedef __device_builtin__ struct short2 short2;
|
||||
typedef __device_builtin__ struct ushort2 ushort2;
|
||||
typedef __device_builtin__ struct short3 short3;
|
||||
typedef __device_builtin__ struct ushort3 ushort3;
|
||||
typedef __device_builtin__ struct short4 short4;
|
||||
typedef __device_builtin__ struct ushort4 ushort4;
|
||||
typedef __device_builtin__ struct int1 int1;
|
||||
typedef __device_builtin__ struct uint1 uint1;
|
||||
typedef __device_builtin__ struct int2 int2;
|
||||
typedef __device_builtin__ struct uint2 uint2;
|
||||
typedef __device_builtin__ struct int3 int3;
|
||||
typedef __device_builtin__ struct uint3 uint3;
|
||||
typedef __device_builtin__ struct int4 int4;
|
||||
typedef __device_builtin__ struct uint4 uint4;
|
||||
typedef __device_builtin__ struct long1 long1;
|
||||
typedef __device_builtin__ struct ulong1 ulong1;
|
||||
typedef __device_builtin__ struct long2 long2;
|
||||
typedef __device_builtin__ struct ulong2 ulong2;
|
||||
typedef __device_builtin__ struct long3 long3;
|
||||
typedef __device_builtin__ struct ulong3 ulong3;
|
||||
typedef __device_builtin__ struct long4 long4;
|
||||
typedef __device_builtin__ struct ulong4 ulong4;
|
||||
typedef __device_builtin__ struct float1 float1;
|
||||
typedef __device_builtin__ struct float2 float2;
|
||||
typedef __device_builtin__ struct float3 float3;
|
||||
typedef __device_builtin__ struct float4 float4;
|
||||
typedef __device_builtin__ struct longlong1 longlong1;
|
||||
typedef __device_builtin__ struct ulonglong1 ulonglong1;
|
||||
typedef __device_builtin__ struct longlong2 longlong2;
|
||||
typedef __device_builtin__ struct ulonglong2 ulonglong2;
|
||||
typedef __device_builtin__ struct longlong3 longlong3;
|
||||
typedef __device_builtin__ struct ulonglong3 ulonglong3;
|
||||
typedef __device_builtin__ struct longlong4 longlong4;
|
||||
typedef __device_builtin__ struct ulonglong4 ulonglong4;
|
||||
typedef __device_builtin__ struct double1 double1;
|
||||
typedef __device_builtin__ struct double2 double2;
|
||||
typedef __device_builtin__ struct double3 double3;
|
||||
typedef __device_builtin__ struct double4 double4;
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
struct __device_builtin__ dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
#if defined(__cplusplus)
|
||||
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
|
||||
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
|
||||
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
|
||||
#endif /* __cplusplus */
|
||||
};
|
||||
|
||||
typedef __device_builtin__ struct dim3 dim3;
|
||||
|
||||
#undef __cuda_builtin_vector_align8
|
||||
|
||||
#endif /* !__VECTOR_TYPES_H__ */
|
@@ -85,11 +85,20 @@ public:
|
||||
virtual int is_invalid(expression_tree const & expressions, driver::Device const & device) const = 0;
|
||||
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & expressions) = 0;
|
||||
std::string generate(std::string const & suffix, expression_tree const & expressions, driver::Device const & device);
|
||||
std::shared_ptr<base> getptr() {
|
||||
return shared_from_this();
|
||||
}
|
||||
std::shared_ptr<base> getptr();
|
||||
};
|
||||
|
||||
class external_base: public base
|
||||
{
|
||||
private:
|
||||
virtual std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping);
|
||||
public:
|
||||
external_base();
|
||||
virtual unsigned int temporary_workspace(expression_tree const &) const;
|
||||
virtual unsigned int lmem_usage(expression_tree const &) const;
|
||||
virtual unsigned int registers_usage(expression_tree const &) const;
|
||||
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const = 0;
|
||||
};
|
||||
|
||||
class parameterized_base : public base
|
||||
{
|
||||
|
@@ -31,6 +31,21 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
|
||||
class cublas_gemm : public external_base
|
||||
{
|
||||
bool init();
|
||||
public:
|
||||
cublas_gemm(char A_trans, char B_trans);
|
||||
int is_invalid(expression_tree const &, driver::Device const &) const;
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
|
||||
private:
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
bool init_;
|
||||
};
|
||||
|
||||
class gemm : public parameterized_base
|
||||
{
|
||||
private:
|
||||
@@ -41,16 +56,16 @@ private:
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, const expression_tree::node &A, const expression_tree::node &B, const expression_tree::node &C,
|
||||
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, runtime::execution_options_type const & options);
|
||||
std::vector<int_t> infos(expression_tree const & expressions, isaac::symbolic::preset::gemm::args &arguments) const;
|
||||
|
||||
public:
|
||||
gemm(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, fetch_type Afetch , fetch_type Bfetch
|
||||
, int_t lf0, int_t lf1, char A_trans, char B_trans);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &ctr);
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & h);
|
||||
|
||||
private:
|
||||
//Parameters
|
||||
|
||||
unsigned int mL_;
|
||||
unsigned int kL_;
|
||||
unsigned int nL_;
|
||||
|
@@ -60,6 +60,10 @@ namespace driver
|
||||
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }
|
||||
|
||||
#define DEFINE13(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m)\
|
||||
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m); }
|
||||
|
||||
|
||||
//Specialized helpers for OpenCL
|
||||
#define OCL_DEFINE1(ret, fname, t1) DEFINE1(clinit, opencl_, ret, fname, t1)
|
||||
#define OCL_DEFINE2(ret, fname, t1, t2) DEFINE2(clinit, opencl_, ret, fname, t1, t2)
|
||||
@@ -96,6 +100,8 @@ namespace driver
|
||||
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||
|
||||
#define CUBLAS_DEFINE1(ret, fname, t1) DEFINE1(cublasinit, cublas_, ret, fname, t1)
|
||||
#define CUBLAS_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cublasinit, cublas_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
|
||||
|
||||
bool dispatch::clinit()
|
||||
{
|
||||
@@ -118,6 +124,16 @@ bool dispatch::nvrtcinit()
|
||||
return nvrtc_ != nullptr;
|
||||
}
|
||||
|
||||
bool dispatch::cublasinit()
|
||||
{
|
||||
if(cublas_==nullptr){
|
||||
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
|
||||
if(cublas_!=nullptr)
|
||||
cublasCreate(&cublas_handle_);
|
||||
}
|
||||
return cublas_ != nullptr;
|
||||
}
|
||||
|
||||
|
||||
//OpenCL
|
||||
|
||||
@@ -196,6 +212,20 @@ NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||
|
||||
CUBLAS_DEFINE1(void, cublasCreate, cublasHandle_t*)
|
||||
|
||||
void dispatch::cublasGetStream(cudaStream_t *a)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_, "cublasGetStream_v2", cublas_handle_, a); }
|
||||
|
||||
void dispatch::cublasSetStream(cudaStream_t a)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_, "cublasSetStream_v2", cublas_handle_, a); }
|
||||
|
||||
void dispatch::cublasSgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_, "cublasSgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
|
||||
void dispatch::cublasDgemm(cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
||||
{ f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_, "cublasDgemm_v2", cublas_handle_, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
|
||||
void dispatch::release()
|
||||
{
|
||||
if(opencl_){
|
||||
@@ -210,11 +240,17 @@ void dispatch::release()
|
||||
dlclose(nvrtc_);
|
||||
nvrtc_ = nullptr;
|
||||
}
|
||||
if(cublas_){
|
||||
dlclose(cublas_);
|
||||
cublas_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void * dispatch::opencl_;
|
||||
void * dispatch::cuda_;
|
||||
void * dispatch::nvrtc_;
|
||||
void * dispatch::cublas_;
|
||||
cublasHandle_t dispatch::cublas_handle_;
|
||||
|
||||
//OpenCL
|
||||
void* dispatch::clBuildProgram_;
|
||||
@@ -288,5 +324,11 @@ void* dispatch::nvrtcGetPTXSize_;
|
||||
void* dispatch::nvrtcCreateProgram_;
|
||||
void* dispatch::nvrtcGetProgramLog_;
|
||||
|
||||
void* dispatch::cublasCreate_;
|
||||
void* dispatch::cublasGetStream_;
|
||||
void* dispatch::cublasSetStream_;
|
||||
void* dispatch::cublasSgemm_;
|
||||
void* dispatch::cublasDgemm_;
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -66,6 +66,24 @@ std::string base::generate(std::string const & suffix, expression_tree const &
|
||||
return generate_impl(suffix, expression, device, mapping);
|
||||
}
|
||||
|
||||
|
||||
/* External base */
|
||||
external_base::external_base()
|
||||
{ }
|
||||
|
||||
std::string external_base::generate_impl(std::string const &, expression_tree const &, driver::Device const &, symbolic::symbols_table const &)
|
||||
{ return ""; }
|
||||
|
||||
unsigned int external_base::temporary_workspace(expression_tree const &) const
|
||||
{ return 0; }
|
||||
|
||||
unsigned int external_base::lmem_usage(expression_tree const &) const
|
||||
{ return 0; }
|
||||
|
||||
unsigned int external_base::registers_usage(expression_tree const &) const
|
||||
{ return 0; }
|
||||
|
||||
/* Parameterized base */
|
||||
int parameterized_base::is_invalid_impl(driver::Device const &, expression_tree const &) const
|
||||
{ return TEMPLATE_VALID; }
|
||||
|
||||
@@ -104,5 +122,8 @@ int parameterized_base::is_invalid(expression_tree const & expressions, driver:
|
||||
return is_invalid_impl(device, expressions);
|
||||
}
|
||||
|
||||
std::shared_ptr<base> base::getptr()
|
||||
{ return shared_from_this(); }
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/*
|
||||
/*
|
||||
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
|
||||
*
|
||||
* This file is part of ISAAC.
|
||||
@@ -20,6 +20,7 @@
|
||||
*/
|
||||
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/jit/syntax/expression/preset.h"
|
||||
#include "isaac/jit/syntax/engine/process.h"
|
||||
#include "isaac/jit/generation/gemm.h"
|
||||
@@ -37,8 +38,73 @@ namespace isaac
|
||||
namespace templates
|
||||
{
|
||||
|
||||
unsigned int gemm::lmem_usage(expression_tree const & expression) const
|
||||
{
|
||||
std::vector<int_t> infos(expression_tree const & tree, symbolic::preset::gemm::args& arguments, char A_trans)
|
||||
{
|
||||
expression_tree::data_type const & array = tree.data();
|
||||
std::size_t root = tree.root();
|
||||
arguments = symbolic::preset::gemm::check(array, root);
|
||||
int_t M = arguments.C->shape[0];
|
||||
int_t N = arguments.C->shape[1];
|
||||
int_t K = (A_trans=='T')?arguments.A->shape[0]:arguments.A->shape[1];
|
||||
return {M, N, K};
|
||||
}
|
||||
|
||||
/* ------------------ CUBLAS ------------------ */
|
||||
bool cublas_gemm::init()
|
||||
{
|
||||
return driver::dispatch::cublasinit();
|
||||
}
|
||||
|
||||
cublas_gemm::cublas_gemm(char A_trans, char B_trans): A_trans_(A_trans), B_trans_(B_trans), init_(driver::dispatch::cublasinit())
|
||||
{ }
|
||||
|
||||
int cublas_gemm::is_invalid(expression_tree const &, driver::Device const & device) const
|
||||
{ return init_ && device.backend()==driver::CUDA; }
|
||||
|
||||
std::vector<int_t> cublas_gemm::input_sizes(expression_tree const & expressions) const
|
||||
{
|
||||
symbolic::preset::gemm::args dummy;
|
||||
return infos((expression_tree&)expressions, dummy, A_trans_);
|
||||
}
|
||||
|
||||
void cublas_gemm::enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & control)
|
||||
{
|
||||
namespace drv = driver;;
|
||||
//Get GEMM info
|
||||
symbolic::preset::gemm::args args;
|
||||
std::vector<int_t> MNK = infos(control.x(), args, A_trans_);
|
||||
int_t M = MNK[0], N = MNK[1], K = MNK[2];
|
||||
CUdeviceptr cuA = args.A->array.handle.cu;
|
||||
CUdeviceptr cuB = args.B->array.handle.cu;
|
||||
CUdeviceptr cuC = args.C->array.handle.cu;
|
||||
runtime::execution_options_type const & opt = control.execution_options();
|
||||
auto cuT = [](char xt) { return xt=='N'?CUBLAS_OP_N:CUBLAS_OP_T; };
|
||||
//Set new stream
|
||||
cudaStream_t bkp;
|
||||
drv::Event event(drv::CUDA);
|
||||
drv::dispatch::cublasGetStream(&bkp);
|
||||
drv::dispatch::cublasSetStream((cudaStream_t)queue.handle().cu());
|
||||
values_holder alpha = args.alpha.values();
|
||||
values_holder beta = args.beta.values();
|
||||
if(opt.events)
|
||||
drv::check(drv::dispatch::cuEventRecord(event.handle().cu().first, queue.handle().cu()));
|
||||
if(args.C->dtype==FLOAT_TYPE)
|
||||
drv::dispatch::cublasSgemm(cuT(A_trans_), cuT(B_trans_), M, N, K, &alpha.float32, (float*)cuA, args.A->ld[1], (float*)cuB, args.B->ld[1], &beta.float32, (float*)cuC, args.C->ld[1]);
|
||||
else
|
||||
drv::dispatch::cublasDgemm(cuT(A_trans_), cuT(B_trans_), M, N, K, &alpha.float64, (double*)cuA, args.A->ld[1], (double*)cuB, args.B->ld[1], &beta.float64, (double*)cuC, args.C->ld[1]);
|
||||
if(opt.events){
|
||||
drv::check(drv::dispatch::cuEventRecord(event.handle().cu().second, queue.handle().cu()));
|
||||
opt.events->push_back(event);
|
||||
}
|
||||
//Revert old stream
|
||||
drv::dispatch::cublasSetStream(bkp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
unsigned int gemm::lmem_usage(expression_tree const & expression) const
|
||||
{
|
||||
unsigned int N = 0;
|
||||
size_t llda = (A_trans_=='N')?mL_:kL_+vwidth_;
|
||||
size_t lnda = (A_trans_=='N')?kL_:mL_;
|
||||
@@ -47,25 +113,25 @@ namespace templates
|
||||
N += llda*lnda;
|
||||
N += lldb*lndb;
|
||||
return N*size_of(expression.dtype());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int gemm::registers_usage(expression_tree const & expression) const
|
||||
{
|
||||
unsigned int gemm::registers_usage(expression_tree const & expression) const
|
||||
{
|
||||
unsigned int N = mS_ * nS_ + mS_ * kS_ + kS_ * nS_;
|
||||
return N*size_of(expression.dtype());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int gemm::temporary_workspace(expression_tree const & expressions) const
|
||||
{
|
||||
unsigned int gemm::temporary_workspace(expression_tree const & expressions) const
|
||||
{
|
||||
std::vector<int_t> MNK = input_sizes(expressions);
|
||||
int_t M = MNK[0]; int_t N = MNK[1];
|
||||
if(depth_ > 1)
|
||||
return M*N*depth_;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int gemm::is_invalid_impl(driver::Device const &, expression_tree const &) const
|
||||
{
|
||||
int gemm::is_invalid_impl(driver::Device const &, expression_tree const &) const
|
||||
{
|
||||
if(Afetch_!=FETCH_FROM_LOCAL || Bfetch_!=FETCH_FROM_LOCAL)
|
||||
return TEMPLATE_INVALID_FETCHING_POLICY_TYPE;
|
||||
|
||||
@@ -109,10 +175,10 @@ namespace templates
|
||||
}
|
||||
|
||||
return TEMPLATE_VALID;
|
||||
}
|
||||
}
|
||||
|
||||
std::string gemm::generate_impl(std::string const & suffix, expression_tree const & tree, driver::Device const & device, symbolic::symbols_table const &) const
|
||||
{
|
||||
std::string gemm::generate_impl(std::string const & suffix, expression_tree const & tree, driver::Device const & device, symbolic::symbols_table const &) const
|
||||
{
|
||||
using std::string;
|
||||
using tools::to_string;
|
||||
|
||||
@@ -124,7 +190,7 @@ namespace templates
|
||||
#define VSTORE_LDSB(value, offset, ptr) vstore(vwidth_, sdtype, value, offset, ptr, "1", backend, lldb%vwidth_==0)
|
||||
|
||||
symbolic::preset::gemm::args args;
|
||||
infos(tree, args);
|
||||
infos(tree, args, A_trans_);
|
||||
std::string ASTRIDE1 = (args.A->ld[0] > 1)?"*Astride1":"";
|
||||
std::string BSTRIDE1 = (args.B->ld[0] > 1)?"*Bstride1":"";
|
||||
std::string CSTRIDE1 = (args.C->ld[0] > 1)?"*Cstride1":"";
|
||||
@@ -574,13 +640,13 @@ namespace templates
|
||||
|
||||
#undef VLOAD
|
||||
#undef VST0RE
|
||||
}
|
||||
}
|
||||
|
||||
void gemm::enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||
void gemm::enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K,
|
||||
expression_tree::node const & A, expression_tree::node const & B, expression_tree::node const & C,
|
||||
value_scalar const & alpha, value_scalar const & beta,
|
||||
driver::Program const & program, std::string const & suffix, runtime::execution_options_type const & options)
|
||||
{
|
||||
{
|
||||
using tools::align;
|
||||
|
||||
if(M==0 || N==0 || K==0)
|
||||
@@ -665,45 +731,34 @@ namespace templates
|
||||
options.enqueue(program.context(), reduce, global, local);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int_t> gemm::infos(expression_tree const & tree, symbolic::preset::gemm::args& arguments) const
|
||||
{
|
||||
expression_tree::data_type const & array = tree.data();
|
||||
std::size_t root = tree.root();
|
||||
arguments = symbolic::preset::gemm::check(array, root);
|
||||
int_t M = arguments.C->shape[0];
|
||||
int_t N = arguments.C->shape[1];
|
||||
int_t K = (A_trans_=='T')?arguments.A->shape[0]:arguments.A->shape[1];
|
||||
return {M, N, K};
|
||||
}
|
||||
|
||||
gemm::gemm(unsigned int vwidth
|
||||
gemm::gemm(unsigned int vwidth
|
||||
,int_t ls0, int_t kL, int_t ls1, int_t D
|
||||
,int_t ms, int_t ks, int_t ns
|
||||
,fetch_type Afetch , fetch_type Bfetch
|
||||
,int_t lf0, int_t lf1, char A_trans, char B_trans) :
|
||||
parameterized_base(vwidth, ls0, ls1), mL_(ms*ls0), kL_(kL), nL_(ns*ls1), depth_(D), mS_(ms), kS_(ks), nS_(ns),
|
||||
Afetch_(Afetch), Bfetch_(Bfetch), lf0_(lf0), lf1_(lf1), A_trans_(A_trans), B_trans_(B_trans)
|
||||
{
|
||||
{
|
||||
if(A_trans_=='N' && B_trans_=='N') type_ = GEMM_NN;
|
||||
else if(A_trans_=='T' && B_trans_=='N') type_ = GEMM_TN;
|
||||
else if(A_trans_=='N' && B_trans_=='T') type_ = GEMM_NT;
|
||||
else if(A_trans_=='T' && B_trans_=='T') type_ = GEMM_TT;
|
||||
else throw;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int_t> gemm::input_sizes(expression_tree const & expressions) const
|
||||
{
|
||||
std::vector<int_t> gemm::input_sizes(expression_tree const & expressions) const
|
||||
{
|
||||
symbolic::preset::gemm::args dummy;
|
||||
return infos((expression_tree&)expressions, dummy);
|
||||
}
|
||||
return infos((expression_tree&)expressions, dummy, A_trans_);
|
||||
}
|
||||
|
||||
void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
void gemm::enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & control)
|
||||
{
|
||||
expression_tree const & expressions = control.x();
|
||||
symbolic::preset::gemm::args args;
|
||||
std::vector<int_t> MNK = infos(expressions, args);
|
||||
std::vector<int_t> MNK = infos(expressions, args, A_trans_);
|
||||
int_t M = MNK[0];
|
||||
int_t N = MNK[1];
|
||||
int_t K = MNK[2];
|
||||
@@ -713,44 +768,44 @@ namespace templates
|
||||
//Enqueue
|
||||
runtime::execution_options_type const & options = control.execution_options();
|
||||
enqueue_block(queue, M, N, K, *args.A, *args.B, *args.C, args.alpha, args.beta, program, suffix, options);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
gemm_nn::gemm_nn(unsigned int vwidth
|
||||
//
|
||||
gemm_nn::gemm_nn(unsigned int vwidth
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetch_type Afetch , fetch_type Bfetch
|
||||
, int_t lf0, int_t lf1) :
|
||||
gemm(vwidth, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lf0, lf1, 'N', 'N')
|
||||
{
|
||||
}
|
||||
{
|
||||
}
|
||||
|
||||
//
|
||||
gemm_tn::gemm_tn(unsigned int vwidth
|
||||
//
|
||||
gemm_tn::gemm_tn(unsigned int vwidth
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetch_type Afetch , fetch_type Bfetch
|
||||
, int_t lf0, int_t lf1) :
|
||||
gemm(vwidth, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lf0, lf1, 'T', 'N')
|
||||
{ }
|
||||
{ }
|
||||
|
||||
//
|
||||
gemm_nt::gemm_nt(unsigned int vwidth
|
||||
//
|
||||
gemm_nt::gemm_nt(unsigned int vwidth
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetch_type Afetch , fetch_type Bfetch
|
||||
, int_t lf0, int_t lf1) :
|
||||
gemm(vwidth, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lf0, lf1, 'N', 'T')
|
||||
{ }
|
||||
{ }
|
||||
|
||||
//
|
||||
gemm_tt::gemm_tt(unsigned int vwidth
|
||||
//
|
||||
gemm_tt::gemm_tt(unsigned int vwidth
|
||||
, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns
|
||||
, fetch_type Afetch , fetch_type Bfetch
|
||||
, int_t lf0, int_t lf1) :
|
||||
gemm(vwidth, ls0, KL, ls1, D, ms, ks, ns, Afetch, Bfetch, lf0, lf1, 'T', 'T')
|
||||
{ }
|
||||
{ }
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/profiles.cpp src/lib/runtime/database.cpp src/lib/runtime/execute.cpp src/lib/exception/driver.cpp src/lib/exception/api.cpp src/lib/random/rand.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/object.cpp src/lib/value_scalar.cpp src/lib/array.cpp src/lib/api/blas/cublas.cpp src/lib/api/blas/clBLAS.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/check.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/value_scalar.cpp src/lib/random/rand.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/runtime/execute.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/profiles.cpp src/lib/runtime/database.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user