[driver] added driver source code from isaac repository
This commit is contained in:
116
include/driver/backend.h
Executable file
116
include/driver/backend.h
Executable file
@@ -0,0 +1,116 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_BACKEND_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_BACKEND_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Buffer;
|
||||||
|
class Stream;
|
||||||
|
class Device;
|
||||||
|
class Context;
|
||||||
|
class Platform;
|
||||||
|
class Module;
|
||||||
|
class Kernel;
|
||||||
|
|
||||||
|
struct backend
|
||||||
|
{
|
||||||
|
|
||||||
|
class modules
|
||||||
|
{
|
||||||
|
friend class backend;
|
||||||
|
public:
|
||||||
|
static void release();
|
||||||
|
static Module& get(Stream const & stream, std::string const & name, std::string const &src);
|
||||||
|
private:
|
||||||
|
static std::map<std::tuple<Stream, std::string>, Module * > cache_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class kernels
|
||||||
|
{
|
||||||
|
friend class backend;
|
||||||
|
public:
|
||||||
|
static void release();
|
||||||
|
static Kernel & get(Module const & program, std::string const & name);
|
||||||
|
private:
|
||||||
|
static std::map<std::tuple<Module, std::string>, Kernel * > cache_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class contexts
|
||||||
|
{
|
||||||
|
friend class backend;
|
||||||
|
private:
|
||||||
|
static void init(std::vector<Platform> const &);
|
||||||
|
static void release();
|
||||||
|
public:
|
||||||
|
static Context const & get_default();
|
||||||
|
template<class T>
|
||||||
|
static Context const & import(T context)
|
||||||
|
{
|
||||||
|
for(driver::Context const * x: cache_)
|
||||||
|
if((T)*x==context)
|
||||||
|
return *x;
|
||||||
|
cache_.emplace_back(new Context(context, false));
|
||||||
|
return *cache_.back();
|
||||||
|
}
|
||||||
|
static void get(std::list<Context const *> &);
|
||||||
|
private:
|
||||||
|
static std::list<Context const *> cache_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class streams
|
||||||
|
{
|
||||||
|
friend class backend;
|
||||||
|
private:
|
||||||
|
static void init(std::list<Context const *> const &);
|
||||||
|
static void release();
|
||||||
|
public:
|
||||||
|
static void get(Context const &, std::vector<Stream *> &streams);
|
||||||
|
static Stream & get(Context const &, unsigned int id = 0);
|
||||||
|
static Stream & get_default();
|
||||||
|
private:
|
||||||
|
static std::map< Context, std::vector<Stream*> > cache_;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void init();
|
||||||
|
static void release();
|
||||||
|
|
||||||
|
static std::vector<Device> devices();
|
||||||
|
static std::vector<Platform> platforms();
|
||||||
|
static void synchronize(Context const &);
|
||||||
|
|
||||||
|
static unsigned int default_device;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
54
include/driver/buffer.h
Executable file
54
include/driver/buffer.h
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_BUFFER_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_BUFFER_H
|
||||||
|
|
||||||
|
#include "driver/handle.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Stream;
|
||||||
|
|
||||||
|
// Buffer
|
||||||
|
class Buffer: public HandleInterface<Buffer, CUdeviceptr>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Buffer(Context const & context, size_t size);
|
||||||
|
Buffer(Context const & context, CUdeviceptr cu, bool take_ownership);
|
||||||
|
void set_zero(Stream const & queue, size_t size);
|
||||||
|
Handle<CUdeviceptr> const & cu() const;
|
||||||
|
Handle<CUdeviceptr> & cu();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Context context_;
|
||||||
|
Handle<CUdeviceptr> cu_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
66
include/driver/context.h
Executable file
66
include/driver/context.h
Executable file
@@ -0,0 +1,66 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_CONTEXT_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_CONTEXT_H
|
||||||
|
|
||||||
|
#include "driver/device.h"
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Context: public HandleInterface<Context, CUcontext>
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
static std::string get_cache_path();
|
||||||
|
static CUdevice device(CUcontext);
|
||||||
|
|
||||||
|
public:
|
||||||
|
//Constructors
|
||||||
|
explicit Context(CUcontext context, bool take_ownership = true);
|
||||||
|
explicit Context(Device const & device);
|
||||||
|
//Accessors
|
||||||
|
Device const & device() const;
|
||||||
|
std::string const & cache_path() const;
|
||||||
|
Handle<CUcontext> const & cu() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Handle<CUcontext> cu_;
|
||||||
|
Device device_;
|
||||||
|
std::string cache_path_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ContextSwitcher{
|
||||||
|
public:
|
||||||
|
ContextSwitcher(Context const & ctx);
|
||||||
|
~ContextSwitcher();
|
||||||
|
private:
|
||||||
|
Context const & ctx_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
229
include/driver/cublas.h
Executable file
229
include/driver/cublas.h
Executable file
@@ -0,0 +1,229 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_CUBLAS_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_CUBLAS_H
|
||||||
|
|
||||||
|
#include "isaac/templates/common.hpp"
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
#include "driver/stream.h"
|
||||||
|
#include "driver/backend.h"
|
||||||
|
#include "driver/error.h"
|
||||||
|
#include "tools/bench.hpp"
|
||||||
|
#include "tools/collections.hpp"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
enum cublasStrategy_t{
|
||||||
|
CUBLAS_PREFER_FASTEST,
|
||||||
|
CUBLAS_HEURISTICS
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static const std::vector<cublasGemmAlgo_t> cublasAlgorithms = {
|
||||||
|
CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1, CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3,
|
||||||
|
CUBLAS_GEMM_ALGO4, CUBLAS_GEMM_ALGO5, CUBLAS_GEMM_ALGO6, CUBLAS_GEMM_ALGO7
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::map<DType, cudaDataType> cudtype = {{FLOAT_TYPE, CUDA_R_32F}, {DOUBLE_TYPE,CUDA_R_64F}};
|
||||||
|
static const std::map<char, cublasOperation_t> cuop = {{'N', CUBLAS_OP_N}, {'T', CUBLAS_OP_T}};
|
||||||
|
|
||||||
|
inline cublasGemmAlgo_t cublasGemmFastest(Stream& stream, cublasHandle_t handle, cudaDataType cudt, cublasOperation_t AT, cublasOperation_t BT, int32_t M, int32_t N, int32_t K,
|
||||||
|
void* alpha, CUdeviceptr A, int32_t lda, CUdeviceptr B, int32_t ldb,
|
||||||
|
void* beta, CUdeviceptr C, int32_t ldc){
|
||||||
|
|
||||||
|
typedef std::tuple<cudaDataType_t, cublasOperation_t, cublasOperation_t, int32_t, int32_t, int32_t> key_t;
|
||||||
|
// Benchmark fastest algorithm in cublasGemmEx
|
||||||
|
auto benchmark_fastest = [&](key_t const &){
|
||||||
|
std::vector<double> times;
|
||||||
|
for(cublasGemmAlgo_t a: cublasAlgorithms){
|
||||||
|
try{
|
||||||
|
times.push_back(bench([&](){ dispatch::cublasGemmEx(handle, AT, BT, M, N, K, alpha, (const void*)A, cudt, lda, (const void*)B, cudt, ldb, beta, (void*)C, cudt, ldc, cudt, a); },
|
||||||
|
[&](){ stream.synchronize(); },
|
||||||
|
stream.context().device()));
|
||||||
|
}catch(driver::exception::cublas::base const &){
|
||||||
|
times.push_back(INFINITY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t argmin = std::min_element(times.begin(), times.end()) - times.begin();
|
||||||
|
return cublasAlgorithms[argmin];
|
||||||
|
};
|
||||||
|
// Cache result
|
||||||
|
static cpp::CachedMap<key_t, cublasGemmAlgo_t> cache(benchmark_fastest);
|
||||||
|
return cache.get(std::make_tuple(cudt, AT, BT, M, N, K));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wrapper for cublasGemmEx */
|
||||||
|
inline void cublasGemmEx(cublasHandle_t handle, cudaDataType cudt, cublasOperation_t AT, cublasOperation_t BT, int32_t M, int32_t N, int32_t K,
|
||||||
|
void* alpha, CUdeviceptr A, int32_t lda, CUdeviceptr B, int32_t ldb,
|
||||||
|
void* beta, CUdeviceptr C, int32_t ldc, cublasGemmAlgo_t algo)
|
||||||
|
{ dispatch::cublasGemmEx(handle, AT, BT, M, N, K, alpha, (const void*)A, cudt, lda, (const void*)B, cudt, ldb, beta, (void*)C, cudt, ldc, cudt, algo); }
|
||||||
|
|
||||||
|
|
||||||
|
/* Simplified API for default GEMM */
|
||||||
|
inline void cublasGemm(DType dtype, Stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, Buffer const & A, int32_t lda, Buffer const & B, int32_t ldb, scalar beta, Buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){
|
||||||
|
ContextSwitcher ctx_switch(stream.context());
|
||||||
|
cublasHandle_t handle = dispatch::cublasHandle(stream.context());
|
||||||
|
dispatch::cublasSetStream_v2(handle, (CUstream)stream);
|
||||||
|
if(fastest)
|
||||||
|
*fastest = cublasGemmFastest(stream, handle, cudtype.at(dtype), cuop.at(cAT), cuop.at(cBT), M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc);
|
||||||
|
else
|
||||||
|
cublasGemmEx(handle, cudtype.at(dtype), cuop.at(cAT), cuop.at(cBT), M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc, algo);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cudnnDataType_t cudnnDtype(DType dtype){
|
||||||
|
switch(dtype){
|
||||||
|
case INT8X4_TYPE: return CUDNN_DATA_INT8x4;
|
||||||
|
case INT32_TYPE: return CUDNN_DATA_INT32;
|
||||||
|
case FLOAT_TYPE: return CUDNN_DATA_FLOAT;
|
||||||
|
case DOUBLE_TYPE: return CUDNN_DATA_DOUBLE;
|
||||||
|
}
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cudnnTensorFormat_t format(cudnnDataType_t cutype){
|
||||||
|
switch(cutype){
|
||||||
|
case CUDNN_DATA_INT8x4: return CUDNN_TENSOR_NCHW_VECT_C;
|
||||||
|
default: return CUDNN_TENSOR_NCHW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void cudnnConv(DType dtype, Stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t C, int32_t T, int32_t R, int32_t S,
|
||||||
|
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, Buffer const & I, Buffer const & F, scalar beta, Buffer const & O){
|
||||||
|
driver::Context const & ctx = stream.context();
|
||||||
|
ContextSwitcher switch_ctx(ctx);
|
||||||
|
|
||||||
|
std::vector<int> pad = {pad_d, pad_h, pad_w};
|
||||||
|
std::vector<int> stride = {stride_d, stride_h, stride_w};
|
||||||
|
std::vector<int> upscale = {1, 1, 1};
|
||||||
|
std::vector<int> Oshapes = {N, K, M, P, Q};
|
||||||
|
std::vector<int> Fshapes = {K, C, T, R, S};
|
||||||
|
std::vector<int> Ishapes = {N, C, D, H, W};
|
||||||
|
if(M == 1 && T == 1 && D == 1){
|
||||||
|
pad.erase(pad.begin());
|
||||||
|
stride.erase(stride.begin());
|
||||||
|
upscale.erase(upscale.begin());
|
||||||
|
Oshapes.erase(Oshapes.begin() + 2);
|
||||||
|
Ishapes.erase(Ishapes.begin() + 2);
|
||||||
|
Fshapes.erase(Fshapes.begin() + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
cudnnHandle_t handle = dispatch::cudnnHandle(ctx);
|
||||||
|
cudnnDataType_t in_cutype = cudnnDtype(dtype);
|
||||||
|
cudnnDataType_t conv_cutype = (dtype == INT8X4_TYPE)?CUDNN_DATA_INT32:in_cutype;
|
||||||
|
|
||||||
|
dispatch::cudnnSetStream(handle, (CUstream)stream);
|
||||||
|
cudnnTensorDescriptor_t tO, tI;
|
||||||
|
cudnnFilterDescriptor_t tF;
|
||||||
|
cudnnConvolutionDescriptor_t conv;
|
||||||
|
cudnnConvolutionFwdAlgo_t algo;
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tO);
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tI);
|
||||||
|
dispatch::cudnnCreateFilterDescriptor(&tF);
|
||||||
|
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tO, format(in_cutype), in_cutype, Oshapes.size(), Oshapes.data());
|
||||||
|
dispatch::cudnnSetFilterNdDescriptor(tF, in_cutype, format(in_cutype), Fshapes.size(), Fshapes.data());
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tI, format(in_cutype), in_cutype, Ishapes.size(), Ishapes.data());
|
||||||
|
|
||||||
|
dispatch::cudnnCreateConvolutionDescriptor(&conv);
|
||||||
|
dispatch::cudnnSetConvolutionNdDescriptor(conv, pad.size(), pad.data(), stride.data(), upscale.data(), CUDNN_CROSS_CORRELATION, conv_cutype);
|
||||||
|
dispatch::cudnnGetConvolutionForwardAlgorithm(handle, tI, tF, conv, tO, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, 1024*1024*64, &algo);
|
||||||
|
|
||||||
|
size_t workspace_size;
|
||||||
|
dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size);
|
||||||
|
static Buffer work(ctx, 1024*1024*64);
|
||||||
|
CUdeviceptr twork = work;
|
||||||
|
CUdeviceptr pI = I, pF = F, pO = O;
|
||||||
|
dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void cudnnPool(DType dtype, Stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t T, int32_t R, int32_t S,
|
||||||
|
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, Buffer const & I, scalar beta, Buffer const & O){
|
||||||
|
driver::Context const & ctx = stream.context();
|
||||||
|
ContextSwitcher switch_ctx(ctx);
|
||||||
|
|
||||||
|
std::vector<int> pad = {pad_d, pad_h, pad_w};
|
||||||
|
std::vector<int> stride = {stride_d, stride_h, stride_w};
|
||||||
|
std::vector<int> upscale = {1, 1, 1};
|
||||||
|
std::vector<int> Oshapes = {N, K, M, P, Q};
|
||||||
|
std::vector<int> Ishapes = {N, K, D, H, W};
|
||||||
|
std::vector<int> window = {T, R, S};
|
||||||
|
if(M == 1 && T == 1 && D == 1){
|
||||||
|
window.erase(window.begin());
|
||||||
|
pad.erase(pad.begin());
|
||||||
|
stride.erase(stride.begin());
|
||||||
|
upscale.erase(upscale.begin());
|
||||||
|
Oshapes.erase(Oshapes.begin() + 2);
|
||||||
|
Ishapes.erase(Ishapes.begin() + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
cudnnHandle_t handle = dispatch::cudnnHandle(ctx);
|
||||||
|
cudnnDataType_t cutype = cudnnDtype(dtype);
|
||||||
|
|
||||||
|
dispatch::cudnnSetStream(handle, (CUstream)stream);
|
||||||
|
cudnnTensorDescriptor_t tO, tI;
|
||||||
|
cudnnPoolingDescriptor_t desc;
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tO);
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tI);
|
||||||
|
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tO, CUDNN_TENSOR_NCHW, cutype, Oshapes.size(), Oshapes.data());
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tI, CUDNN_TENSOR_NCHW, cutype, Ishapes.size(), Ishapes.data());
|
||||||
|
|
||||||
|
dispatch::cudnnCreatePoolingDescriptor(&desc);
|
||||||
|
dispatch::cudnnSetPoolingNdDescriptor(desc, CUDNN_POOLING_MAX, CUDNN_NOT_PROPAGATE_NAN, window.size(), window.data(), pad.data(), stride.data());
|
||||||
|
|
||||||
|
CUdeviceptr pI = I, pO = O;
|
||||||
|
dispatch::cudnnPoolingForward(handle, desc, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void cudnnTransformTensor(driver::Stream & stream,
|
||||||
|
DType in_dtype, DType out_dtype,
|
||||||
|
cudnnTensorFormat_t in_layout, cudnnTensorFormat_t out_layout,
|
||||||
|
int32_t N, int32_t C, int32_t D, int32_t H, int32_t W,
|
||||||
|
scalar alpha, driver::Buffer const & I, scalar beta, driver::Buffer& O)
|
||||||
|
{
|
||||||
|
cudnnHandle_t handle = dispatch::cudnnHandle(stream.context());
|
||||||
|
dispatch::cudnnSetStream(handle, (CUstream)stream);
|
||||||
|
|
||||||
|
cudnnTensorDescriptor_t tO, tI;
|
||||||
|
std::vector<int> shapes = {N, C, D, H, W};
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tI);
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tI, in_layout, cudnnDtype(in_dtype), shapes.size(), shapes.data());
|
||||||
|
dispatch::cudnnCreateTensorDescriptor(&tO);
|
||||||
|
dispatch::cudnnSetTensorNdDescriptorEx(tO, out_layout, cudnnDtype(out_dtype), shapes.size(), shapes.data());
|
||||||
|
|
||||||
|
CUdeviceptr pI = I, pO = O;
|
||||||
|
dispatch::cudnnTransformTensor(handle, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
98
include/driver/device.h
Executable file
98
include/driver/device.h
Executable file
@@ -0,0 +1,98 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_DEVICE_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_DEVICE_H
|
||||||
|
|
||||||
|
#include "driver/platform.h"
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
// Device
|
||||||
|
class Device: public HandleInterface<Device, CUdevice>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//Supported architectures
|
||||||
|
enum class Architecture{
|
||||||
|
//NVidia
|
||||||
|
SM_2_0,
|
||||||
|
SM_2_1,
|
||||||
|
SM_3_0,
|
||||||
|
SM_3_5,
|
||||||
|
SM_3_7,
|
||||||
|
SM_5_0,
|
||||||
|
SM_5_2,
|
||||||
|
SM_6_0,
|
||||||
|
SM_6_1,
|
||||||
|
SM_7_0,
|
||||||
|
UNKNOWN
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
//Metaprogramming elper to get cuda info from attribute
|
||||||
|
template<CUdevice_attribute attr>
|
||||||
|
int cuGetInfo() const;
|
||||||
|
|
||||||
|
inline Architecture nv_arch(std::pair<unsigned int, unsigned int> sm) const;
|
||||||
|
inline nvmlDevice_t nvml_device() const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Device(CUdevice cu = CUdevice(), bool take_ownership = true): cu_(cu, take_ownership){}
|
||||||
|
//Accessors
|
||||||
|
Architecture architecture() const;
|
||||||
|
Handle<CUdevice> const & cu() const;
|
||||||
|
//Informations
|
||||||
|
std::string infos() const;
|
||||||
|
size_t address_bits() const;
|
||||||
|
driver::Platform platform() const;
|
||||||
|
std::vector<size_t> max_block_dim() const;
|
||||||
|
size_t max_threads_per_block() const;
|
||||||
|
size_t max_shared_memory() const;
|
||||||
|
size_t warp_size() const;
|
||||||
|
//Compute Capability
|
||||||
|
void interpret_as(std::pair<size_t, size_t> cc);
|
||||||
|
std::pair<size_t, size_t> compute_capability() const;
|
||||||
|
//Identifier
|
||||||
|
std::string name() const;
|
||||||
|
std::string pci_bus_id() const;
|
||||||
|
//Clocks
|
||||||
|
size_t current_sm_clock() const;
|
||||||
|
size_t current_mem_clock() const;
|
||||||
|
|
||||||
|
size_t max_sm_clock() const;
|
||||||
|
size_t max_mem_clock() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Handle<CUdevice> cu_;
|
||||||
|
std::shared_ptr<std::pair<size_t, size_t>> interpreted_as_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
258
include/driver/dispatch.h
Executable file
258
include/driver/dispatch.h
Executable file
@@ -0,0 +1,258 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_DISPATCHER_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_DISPATCHER_H
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
#include <dlfcn.h>
|
||||||
|
|
||||||
|
//CUDA Backend
|
||||||
|
#include "external/CUDA/cuda.h"
|
||||||
|
#include "external/CUDA/nvrtc.h"
|
||||||
|
#include "external/CUDA/cublas_v2.h"
|
||||||
|
#include "external/CUDA/cudnn.h"
|
||||||
|
#include "external/CUDA/nvml.h"
|
||||||
|
|
||||||
|
//Exceptions
|
||||||
|
#include <iostream>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Context;
|
||||||
|
|
||||||
|
template<class T> void check(T){}
|
||||||
|
void check(nvrtcResult err);
|
||||||
|
void check(CUresult err);
|
||||||
|
void check(cublasStatus_t err);
|
||||||
|
void check(cudnnStatus_t err);
|
||||||
|
|
||||||
|
class dispatch
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
template <class F>
|
||||||
|
struct return_type;
|
||||||
|
|
||||||
|
template <class R, class... A>
|
||||||
|
struct return_type<R (*)(A...)>
|
||||||
|
{ typedef R type; };
|
||||||
|
|
||||||
|
typedef bool (*f_init_t)();
|
||||||
|
|
||||||
|
template<f_init_t initializer, typename FunPtrT, typename... Args>
|
||||||
|
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
|
||||||
|
{
|
||||||
|
initializer();
|
||||||
|
if(cache == nullptr){
|
||||||
|
cache = dlsym(lib_h, name);
|
||||||
|
if(cache == 0)
|
||||||
|
throw std::runtime_error("dlsym unable to load function");
|
||||||
|
}
|
||||||
|
FunPtrT fptr;
|
||||||
|
*reinterpret_cast<void **>(&fptr) = cache;
|
||||||
|
typename return_type<FunPtrT>::type res = (*fptr)(args...);
|
||||||
|
check(res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
static bool nvrtcinit();
|
||||||
|
static bool nvmlinit();
|
||||||
|
static bool cuinit();
|
||||||
|
static bool cublasinit();
|
||||||
|
static bool cudnninit();
|
||||||
|
|
||||||
|
static void release();
|
||||||
|
|
||||||
|
//CUDA
|
||||||
|
static CUresult cuCtxGetCurrent(CUcontext *pctx);
|
||||||
|
static CUresult cuCtxSetCurrent(CUcontext ctx);
|
||||||
|
|
||||||
|
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||||
|
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||||
|
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||||
|
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||||||
|
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||||
|
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
|
||||||
|
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||||
|
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||||||
|
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||||
|
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||||
|
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
|
||||||
|
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t* bytes, CUmodule hmod, const char *name);
|
||||||
|
|
||||||
|
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
|
||||||
|
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||||
|
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||||
|
static CUresult cuModuleUnload(CUmodule hmod);
|
||||||
|
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||||
|
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||||
|
static CUresult cuDeviceGetCount(int *count);
|
||||||
|
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||||
|
static CUresult cuInit(unsigned int Flags);
|
||||||
|
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||||
|
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||||
|
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
|
||||||
|
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
|
||||||
|
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||||
|
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||||
|
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||||
|
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||||
|
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||||
|
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||||||
|
static CUresult cuCtxGetDevice(CUdevice* result);
|
||||||
|
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
|
||||||
|
|
||||||
|
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
|
||||||
|
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||||
|
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||||
|
|
||||||
|
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
|
||||||
|
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||||
|
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||||
|
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||||
|
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||||
|
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||||
|
|
||||||
|
static cublasHandle_t cublasHandle(Context const & ctx);
|
||||||
|
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||||
|
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
|
||||||
|
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
|
||||||
|
static cublasStatus_t cublasSgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||||
|
static cublasStatus_t cublasDgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||||
|
static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc);
|
||||||
|
static cublasStatus_t cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, const void *beta, void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo);
|
||||||
|
|
||||||
|
static cudnnHandle_t cudnnHandle(Context const & ctx);
|
||||||
|
static cudnnStatus_t cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
||||||
|
static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
|
||||||
|
static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
||||||
|
static cudnnStatus_t cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
||||||
|
static cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
|
||||||
|
static cudnnStatus_t cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
|
||||||
|
static cudnnStatus_t cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int k, int c, int h, int w);
|
||||||
|
static cudnnStatus_t cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]);
|
||||||
|
static cudnnStatus_t cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int nbDims, const int filterDimA[]);
|
||||||
|
static cudnnStatus_t cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, int pad_h, int pad_w, int u, int v, int upscalex, int upscaley, cudnnConvolutionMode_t mode);
|
||||||
|
static cudnnStatus_t cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, int arrayLength, const int padA[], const int filterStrideA[], const int upscaleA[], cudnnConvolutionMode_t mode, cudnnDataType_t dataType);
|
||||||
|
static cudnnStatus_t cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]);
|
||||||
|
static cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, cudnnConvolutionFwdAlgo_t *algo);
|
||||||
|
static cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t *sizeInBytes);
|
||||||
|
static cudnnStatus_t cudnnConvolutionForward(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const cudnnFilterDescriptor_t wDesc, const void *w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void *workSpace, size_t workSpaceSizeInBytes, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
|
||||||
|
static cudnnStatus_t cudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
|
||||||
|
static cudnnStatus_t cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
||||||
|
static cudnnStatus_t cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void* cuda_;
|
||||||
|
static void* nvrtc_;
|
||||||
|
static void* nvml_;
|
||||||
|
static void* cublas_;
|
||||||
|
static void* cudnn_;
|
||||||
|
|
||||||
|
//CUDA
|
||||||
|
static void* cuCtxGetCurrent_;
|
||||||
|
static void* cuCtxSetCurrent_;
|
||||||
|
static void* cuCtxDestroy_v2_;
|
||||||
|
static void* cuEventCreate_;
|
||||||
|
static void* cuDeviceGet_;
|
||||||
|
static void* cuMemcpyDtoH_v2_;
|
||||||
|
static void* cuStreamCreate_;
|
||||||
|
static void* cuEventElapsedTime_;
|
||||||
|
static void* cuMemFree_v2_;
|
||||||
|
static void* cuMemcpyDtoHAsync_v2_;
|
||||||
|
static void* cuDriverGetVersion_;
|
||||||
|
static void* cuDeviceGetName_;
|
||||||
|
static void* cuDeviceGetPCIBusId_;
|
||||||
|
static void* cuModuleGetGlobal_v2_;
|
||||||
|
|
||||||
|
static void* cuMemcpyHtoDAsync_v2_;
|
||||||
|
static void* cuModuleLoad_;
|
||||||
|
static void* cuLaunchKernel_;
|
||||||
|
static void* cuModuleUnload_;
|
||||||
|
static void* cuModuleLoadDataEx_;
|
||||||
|
static void* cuDeviceGetAttribute_;
|
||||||
|
static void* cuDeviceGetCount_;
|
||||||
|
static void* cuMemcpyHtoD_v2_;
|
||||||
|
static void* cuInit_;
|
||||||
|
static void* cuEventRecord_;
|
||||||
|
static void* cuCtxCreate_v2_;
|
||||||
|
static void* cuModuleGetFunction_;
|
||||||
|
static void* cuStreamSynchronize_;
|
||||||
|
static void* cuStreamDestroy_v2_;
|
||||||
|
static void* cuEventDestroy_v2_;
|
||||||
|
static void* cuMemAlloc_v2_;
|
||||||
|
static void* cuPointerGetAttribute_;
|
||||||
|
static void* cuCtxGetDevice_;
|
||||||
|
static void* cuMemsetD8Async_;
|
||||||
|
static void* cuCtxPushCurrent_v2_;
|
||||||
|
static void* cuCtxPopCurrent_v2_;
|
||||||
|
|
||||||
|
static void* nvmlInit_v2_;
|
||||||
|
static void* nvmlDeviceGetHandleByPciBusId_v2_;
|
||||||
|
static void* nvmlDeviceGetClockInfo_;
|
||||||
|
static void* nvmlDeviceGetMaxClockInfo_;
|
||||||
|
|
||||||
|
static void* nvrtcCompileProgram_;
|
||||||
|
static void* nvrtcGetProgramLogSize_;
|
||||||
|
static void* nvrtcGetPTX_;
|
||||||
|
static void* nvrtcGetPTXSize_;
|
||||||
|
static void* nvrtcCreateProgram_;
|
||||||
|
static void* nvrtcGetProgramLog_;
|
||||||
|
|
||||||
|
static void* cublasCreate_v2_;
|
||||||
|
static void* cublasGetStream_v2_;
|
||||||
|
static void* cublasSetStream_v2_;
|
||||||
|
static void* cublasHgemm_;
|
||||||
|
static void* cublasSgemm_v2_;
|
||||||
|
static void* cublasDgemm_v2_;
|
||||||
|
static void* cublasGemmEx_;
|
||||||
|
|
||||||
|
static void* cudnnCreateConvolutionDescriptor_;
|
||||||
|
static void* cudnnCreatePoolingDescriptor_;
|
||||||
|
static void* cudnnCreateTensorDescriptor_;
|
||||||
|
static void* cudnnCreateFilterDescriptor_;
|
||||||
|
static void* cudnnCreate_;
|
||||||
|
static void* cudnnSetTensor4dDescriptor_;
|
||||||
|
static void* cudnnSetFilter4dDescriptor_;
|
||||||
|
static void* cudnnSetTensorNdDescriptorEx_;
|
||||||
|
static void* cudnnSetFilterNdDescriptor_;
|
||||||
|
static void* cudnnSetConvolution2dDescriptor_;
|
||||||
|
static void* cudnnSetConvolutionNdDescriptor_;
|
||||||
|
static void* cudnnSetPoolingNdDescriptor_;
|
||||||
|
static void* cudnnGetConvolutionForwardAlgorithm_;
|
||||||
|
static void* cudnnGetConvolutionForwardWorkspaceSize_;
|
||||||
|
static void* cudnnConvolutionForward_;
|
||||||
|
static void* cudnnPoolingForward_;
|
||||||
|
static void* cudnnSetStream_;
|
||||||
|
static void* cudnnTransformTensor_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
228
include/driver/error.h
Executable file
228
include/driver/error.h
Executable file
@@ -0,0 +1,228 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_ERROR_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_ERROR_H
|
||||||
|
|
||||||
|
#include <exception>
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace exception
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace nvrtc
|
||||||
|
{
|
||||||
|
|
||||||
|
#define ISAAC_CREATE_NVRTC_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "NVRTC: Error- " msg; } }
|
||||||
|
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
|
||||||
|
ISAAC_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
|
||||||
|
|
||||||
|
#undef ISAAC_CREATE_NVRTC_EXCEPTION
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace cuda
|
||||||
|
{
|
||||||
|
class base: public std::exception{};
|
||||||
|
|
||||||
|
#define ISAAC_CREATE_CUDA_EXCEPTION(name, msg) class name: public base { public:const char * what() const throw(){ return "CUDA: Error- " msg; } }
|
||||||
|
|
||||||
|
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(no_device ,"no device");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_found ,"not found");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
|
||||||
|
ISAAC_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
|
||||||
|
|
||||||
|
#undef ISAAC_CREATE_CUDA_EXCEPTION
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cublas
|
||||||
|
{
|
||||||
|
class base: public std::exception{};
|
||||||
|
|
||||||
|
#define ISAAC_CREATE_CUBLAS_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "CUBLAS: Error- " msg; } }
|
||||||
|
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
|
||||||
|
ISAAC_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
|
||||||
|
|
||||||
|
#undef ISAAC_CREATE_CUBLAS_EXCEPTION
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cudnn
|
||||||
|
{
|
||||||
|
#define ISAAC_CREATE_CUDNN_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "CUDNN: Error- " msg; } }
|
||||||
|
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(not_initialized ,"not initialized");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(alloc_failed ,"allocation failed");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(bad_param ,"bad param");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(internal_error ,"internal error");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(invalid_value ,"invalid value");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(mapping_error ,"mapping error");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(execution_failed ,"execution failed");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(not_supported ,"not supported");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(license_error ,"license error");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing ,"prerequisite missing");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_in_progress ,"runtime in progress");
|
||||||
|
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace ocl
|
||||||
|
{
|
||||||
|
|
||||||
|
class base: public std::exception{};
|
||||||
|
|
||||||
|
#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } }
|
||||||
|
|
||||||
|
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level");
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size");
|
||||||
|
#ifdef CL_INVALID_PROPERTY
|
||||||
|
ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
49
include/driver/event.h
Executable file
49
include/driver/event.h
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_EVENT_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_EVENT_H
|
||||||
|
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
// Event
|
||||||
|
class Event: public HandleInterface<Event, cu_event_t>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
float elapsed_time() const;
|
||||||
|
Handle<cu_event_t> const & cu() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Handle<cu_event_t> cu_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
82
include/driver/handle.h
Executable file
82
include/driver/handle.h
Executable file
@@ -0,0 +1,82 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_HANDLE_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_HANDLE_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <iostream>
|
||||||
|
#include <functional>
|
||||||
|
#include <type_traits>
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
struct cu_event_t{
|
||||||
|
operator bool() const { return first && second; }
|
||||||
|
CUevent first;
|
||||||
|
CUevent second;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct cu_platform{
|
||||||
|
cu_platform() : status_(dispatch::cuInit(0)) { }
|
||||||
|
operator bool() const { return status_; }
|
||||||
|
private:
|
||||||
|
CUresult status_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T, class CUType>
|
||||||
|
class HandleInterface{
|
||||||
|
public:
|
||||||
|
//Accessors
|
||||||
|
operator CUType() const { return *(((T*)this)->cu().h_); }
|
||||||
|
//Comparison
|
||||||
|
bool operator==(HandleInterface const & y) { return (CUType)(*this) == (CUType)(y); }
|
||||||
|
bool operator!=(HandleInterface const & y) { return (CUType)(*this) != (CUType)(y); }
|
||||||
|
bool operator<(HandleInterface const & y) { return (CUType)(*this) < (CUType)(y); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class CUType>
|
||||||
|
class Handle{
|
||||||
|
public:
|
||||||
|
template<class, class> friend class HandleInterface;
|
||||||
|
public:
|
||||||
|
//Constructors
|
||||||
|
Handle(CUType cu = CUType(), bool take_ownership = true);
|
||||||
|
~Handle();
|
||||||
|
CUType& operator*() { return *h_; }
|
||||||
|
CUType const & operator*() const { return *h_; }
|
||||||
|
CUType* operator->() const { return h_.get(); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::shared_ptr<CUType> h_;
|
||||||
|
bool has_ownership_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
68
include/driver/kernel.h
Executable file
68
include/driver/kernel.h
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_KERNEL_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_KERNEL_H
|
||||||
|
|
||||||
|
#include "driver/module.h"
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Buffer;
|
||||||
|
|
||||||
|
// Kernel
|
||||||
|
class Kernel: public HandleInterface<Kernel, CUfunction>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//Constructors
|
||||||
|
Kernel(Module const & program, const char * name);
|
||||||
|
//Accessors
|
||||||
|
Handle<CUfunction> const & cu() const;
|
||||||
|
Module const & module() const;
|
||||||
|
//Arguments setters
|
||||||
|
void setArg(unsigned int index, std::size_t size, void* ptr);
|
||||||
|
void setArg(unsigned int index, Buffer const &);
|
||||||
|
template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); }
|
||||||
|
//Arguments getters
|
||||||
|
void* const* cu_params() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Handle<CUfunction> cu_;
|
||||||
|
Module program_;
|
||||||
|
unsigned int address_bits_;
|
||||||
|
std::vector<std::shared_ptr<void> > cu_params_store_;
|
||||||
|
std::vector<void*> cu_params_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
61
include/driver/module.h
Executable file
61
include/driver/module.h
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_MODULE_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_MODULE_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include "driver/handle.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Context;
|
||||||
|
class Device;
|
||||||
|
|
||||||
|
class Module: public HandleInterface<Module, CUmodule>
|
||||||
|
{
|
||||||
|
static std::string header(Device const & device);
|
||||||
|
|
||||||
|
public:
|
||||||
|
Module(Context const & context, std::string const & source);
|
||||||
|
Context const & context() const;
|
||||||
|
Handle<CUmodule> const & cu() const;
|
||||||
|
Buffer symbol(const char * name) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Handle<CUmodule> cu_;
|
||||||
|
Context context_;
|
||||||
|
std::string source_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
54
include/driver/platform.h
Executable file
54
include/driver/platform.h
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_PLATFORM_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_PLATFORM_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
|
class Platform
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//Accessors
|
||||||
|
std::string name() const;
|
||||||
|
std::string version() const;
|
||||||
|
std::vector<Device> devices() const;
|
||||||
|
private:
|
||||||
|
Handle<cu_platform> cu_;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
82
include/driver/stream.h
Executable file
82
include/driver/stream.h
Executable file
@@ -0,0 +1,82 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_INCLUDE_DRIVER_STREAM_H
|
||||||
|
#define TDL_INCLUDE_DRIVER_STREAM_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/device.h"
|
||||||
|
#include "driver/handle.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
class Kernel;
|
||||||
|
class Event;
|
||||||
|
class Range;
|
||||||
|
class Buffer;
|
||||||
|
|
||||||
|
// Command Queue
|
||||||
|
class Stream: public HandleInterface<Stream, CUstream>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//Constructors
|
||||||
|
Stream(CUstream stream, bool take_ownership);
|
||||||
|
Stream(Context const & context);
|
||||||
|
|
||||||
|
//Accessors
|
||||||
|
Handle<CUstream> const & cu() const;
|
||||||
|
Context const & context() const;
|
||||||
|
|
||||||
|
//Synchronize
|
||||||
|
void synchronize();
|
||||||
|
|
||||||
|
//Enqueue
|
||||||
|
void enqueue(Kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
|
||||||
|
|
||||||
|
// Write
|
||||||
|
void write(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
|
||||||
|
|
||||||
|
template<class T> void write(Buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
|
||||||
|
{ write(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
|
||||||
|
|
||||||
|
// Read
|
||||||
|
void read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||||
|
|
||||||
|
template<class T> void read(Buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
|
||||||
|
{ read(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
|
||||||
|
private:
|
||||||
|
Context context_;
|
||||||
|
Handle<CUstream> cu_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
64
include/external/CUDA/builtin_types.h
vendored
Executable file
64
include/external/CUDA/builtin_types.h
vendored
Executable file
@@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "device_types.h"
|
||||||
|
#if !defined(__CUDACC_RTC__)
|
||||||
|
#define EXCLUDE_FROM_RTC
|
||||||
|
#include "driver_types.h"
|
||||||
|
#undef EXCLUDE_FROM_RTC
|
||||||
|
#endif /* !__CUDACC_RTC__ */
|
||||||
|
#include "surface_types.h"
|
||||||
|
#include "texture_types.h"
|
||||||
|
#include "vector_types.h"
|
412
include/external/CUDA/channel_descriptor.h
vendored
Executable file
412
include/external/CUDA/channel_descriptor.h
vendored
Executable file
@@ -0,0 +1,412 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__CHANNEL_DESCRIPTOR_H__)
|
||||||
|
#define __CHANNEL_DESCRIPTOR_H__
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "driver_types.h"
|
||||||
|
#include "cuda_runtime_api.h"
|
||||||
|
#include "host_defines.h"
|
||||||
|
#include "vector_types.h"
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \addtogroup CUDART_HIGHLEVEL
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief \hl Returns a channel descriptor using the specified format
|
||||||
|
*
|
||||||
|
* Returns a channel descriptor with format \p f and number of bits of each
|
||||||
|
* component \p x, \p y, \p z, and \p w. The ::cudaChannelFormatDesc is
|
||||||
|
* defined as:
|
||||||
|
* \code
|
||||||
|
struct cudaChannelFormatDesc {
|
||||||
|
int x, y, z, w;
|
||||||
|
enum cudaChannelFormatKind f;
|
||||||
|
};
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* where ::cudaChannelFormatKind is one of ::cudaChannelFormatKindSigned,
|
||||||
|
* ::cudaChannelFormatKindUnsigned, or ::cudaChannelFormatKindFloat.
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* Channel descriptor with format \p f
|
||||||
|
*
|
||||||
|
* \sa \ref ::cudaCreateChannelDesc(int,int,int,int,cudaChannelFormatKind) "cudaCreateChannelDesc (Low level)",
|
||||||
|
* ::cudaGetChannelDesc, ::cudaGetTextureReference,
|
||||||
|
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture (High level)",
|
||||||
|
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, size_t) "cudaBindTexture (High level, inherited channel descriptor)",
|
||||||
|
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t) "cudaBindTexture2D (High level)",
|
||||||
|
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t, const struct cudaChannelFormatDesc&) "cudaBindTextureToArray (High level)",
|
||||||
|
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t) "cudaBindTextureToArray (High level, inherited channel descriptor)",
|
||||||
|
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cudaUnbindTexture (High level)",
|
||||||
|
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, dim, readMode>&) "cudaGetTextureAlignmentOffset (High level)"
|
||||||
|
*/
|
||||||
|
template<class T> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc(void)
|
||||||
|
{
|
||||||
|
return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf1(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf2(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf4(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(char) * 8;
|
||||||
|
|
||||||
|
#if defined(_CHAR_UNSIGNED) || defined(__CHAR_UNSIGNED__)
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
#else /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
#endif /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<signed char>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(signed char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned char>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(signed char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(signed char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(signed char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned char) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned short>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned short) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned int>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned int) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(__LP64__)
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned long>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(unsigned long) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !__LP64__ */
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(float) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float1>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(float) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float2>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(float) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float4>(void)
|
||||||
|
{
|
||||||
|
int e = (int)sizeof(float) * 8;
|
||||||
|
|
||||||
|
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
/** @} */ /* END CUDART_TEXTURE_HL */
|
||||||
|
|
||||||
|
#endif /* !__CHANNEL_DESCRIPTOR_H__ */
|
266
include/external/CUDA/crt/host_config.h
vendored
Normal file
266
include/external/CUDA/crt/host_config.h
vendored
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2016 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__HOST_CONFIG_H__)
|
||||||
|
#define __HOST_CONFIG_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#if defined(__CUDACC__)
|
||||||
|
|
||||||
|
#if defined(__CUDACC_RTC__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#else /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
/* check for host compilers that are compatible with nvcc */
|
||||||
|
#if !defined(__GNUC__) && !defined(_WIN32)
|
||||||
|
|
||||||
|
#error --- !!! UNSUPPORTED COMPILER !!! ---
|
||||||
|
|
||||||
|
#endif /* !__GNUC__ && !_WIN32 */
|
||||||
|
|
||||||
|
#if defined(__ICC)
|
||||||
|
|
||||||
|
#if (__ICC != 1500 && __ICC != 1600 && __ICC != 1700) || !defined(__GNUC__) || !defined(__LP64__)
|
||||||
|
|
||||||
|
#error -- unsupported ICC configuration! Only ICC 15.0, ICC 16.0, and ICC 17.0 on Linux x86_64 are supported!
|
||||||
|
|
||||||
|
#endif /* (__ICC != 1500 && __ICC != 1600 && __ICC != 17.0) || !__GNUC__ || !__LP64__ */
|
||||||
|
|
||||||
|
#endif /* __ICC */
|
||||||
|
|
||||||
|
#if defined(__PGIC__)
|
||||||
|
|
||||||
|
#if (!(__PGIC__ == 17) && \
|
||||||
|
!(__PGIC__ == 99 && __PGIC_MINOR__ == 99)) || \
|
||||||
|
!defined(__GNUC__) || !defined(__LP64__)
|
||||||
|
|
||||||
|
#error -- unsupported pgc++ configuration! Only pgc++ 17 on Linux x86_64 is supported!
|
||||||
|
|
||||||
|
#endif /* (!(__PGIC__ == 17) &&
|
||||||
|
!(__PGIC__ == 99 && __PGIC_MINOR__ == 99 )) ||
|
||||||
|
!__GNUC__ || !__LP64__ */
|
||||||
|
|
||||||
|
#endif /* __PGIC__ */
|
||||||
|
|
||||||
|
#if defined(__powerpc__)
|
||||||
|
|
||||||
|
#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
|
||||||
|
|
||||||
|
#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
|
||||||
|
|
||||||
|
#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
|
||||||
|
|
||||||
|
#if defined(__ibmxl_vrm__) && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000)
|
||||||
|
|
||||||
|
#error -- unsupported xlC version! only xlC 13.1 is supported
|
||||||
|
|
||||||
|
#endif /* __ibmxl_vrm__ && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000) */
|
||||||
|
|
||||||
|
#endif /* __powerpc__ */
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
|
||||||
|
#if __GNUC__ > 6
|
||||||
|
|
||||||
|
#error -- unsupported GNU version! gcc versions later than 6 are not supported!
|
||||||
|
|
||||||
|
#endif /* __GNUC__ > 6 */
|
||||||
|
|
||||||
|
#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
|
||||||
|
#error -- clang and clang++ are the only supported host compilers on Mac OS X!
|
||||||
|
#endif /* __APPLE__ && __MACH__ && !__clang__ */
|
||||||
|
|
||||||
|
#endif /* __GNUC__ */
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
|
||||||
|
#if _MSC_VER < 1600 || _MSC_VER > 1911
|
||||||
|
|
||||||
|
#error -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!
|
||||||
|
|
||||||
|
#elif _MSC_VER == 1600 /* _MSC_VERION == 1600 */
|
||||||
|
|
||||||
|
#pragma message("support for Microsoft Visual Studio 2010 has been deprecated!")
|
||||||
|
|
||||||
|
#endif /* _MSC_VER < 1600 || _MSC_VER > 1800 || _MSC_VERSION == 1600 */
|
||||||
|
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
/* configure host compiler */
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define _ACRTIMP
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#if defined(__BLOCKS__) /* nvcc does not support closures */
|
||||||
|
|
||||||
|
#undef __BLOCKS__
|
||||||
|
|
||||||
|
#endif /* __BLOCKS__ */
|
||||||
|
|
||||||
|
#elif defined(__ANDROID__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define _ACRTIMP
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#elif defined(__QNX__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define _ACRTIMP
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#elif defined(__HORIZON__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define _ACRTIMP
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
|
||||||
|
#define _CRTIMP
|
||||||
|
#define _ACRTIMP
|
||||||
|
|
||||||
|
#include <features.h> /* for __THROW */
|
||||||
|
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
|
||||||
|
#if _MSC_VER >= 1500
|
||||||
|
|
||||||
|
#undef _USE_DECLSPECS_FOR_SAL
|
||||||
|
#define _USE_DECLSPECS_FOR_SAL \
|
||||||
|
1
|
||||||
|
|
||||||
|
#endif /* _MSC_VER >= 1500 */
|
||||||
|
|
||||||
|
#if !defined(_CRT_NONSTDC_NO_WARNINGS)
|
||||||
|
|
||||||
|
#define _CRT_NONSTDC_NO_WARNINGS /* to suppress warnings */
|
||||||
|
|
||||||
|
#endif /* !_CRT_NONSTDC_NO_WARNINGS */
|
||||||
|
|
||||||
|
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||||
|
|
||||||
|
#define _CRT_SECURE_NO_WARNINGS /* to suppress warnings */
|
||||||
|
|
||||||
|
#endif /* !_CRT_SECURE_NO_WARNINGS */
|
||||||
|
|
||||||
|
#if !defined(NOMINMAX)
|
||||||
|
|
||||||
|
#define NOMINMAX /* min and max are part of cuda runtime */
|
||||||
|
|
||||||
|
#endif /* !NOMINMAX */
|
||||||
|
|
||||||
|
#include <crtdefs.h> /* for _CRTIMP */
|
||||||
|
#if _MSC_VER >= 1900
|
||||||
|
#include <corecrt.h> /* for _ACRTIMP */
|
||||||
|
#endif /* _MSC_VER >= 1900 */
|
||||||
|
|
||||||
|
#define __THROW
|
||||||
|
|
||||||
|
#endif /* __APPLE__ */
|
||||||
|
|
||||||
|
#endif /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__cplusplus) && defined(__CUDA_ARCH__) && (defined(__PGIC__) || defined(__CUDACC_RTC__) || (defined(_WIN32) && defined(_MSC_VER)))
|
||||||
|
|
||||||
|
#if __CUDACC_RTC__
|
||||||
|
typedef char *va_list;
|
||||||
|
#else /* !__CUDACC_RTC__ */
|
||||||
|
#include <cstdarg>
|
||||||
|
#endif /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
|
||||||
|
#undef va_start
|
||||||
|
#undef va_end
|
||||||
|
#undef va_arg
|
||||||
|
|
||||||
|
#ifdef __PGIC__
|
||||||
|
|
||||||
|
#undef __builtin_va_end
|
||||||
|
|
||||||
|
#define va_start(v,l) __builtin_alt_va_start(v,l)
|
||||||
|
#define va_end(v) __builtin_va_end(v)
|
||||||
|
#define va_arg(v,l) __builtin_alt_va_arg(v,l)
|
||||||
|
|
||||||
|
#if (__cplusplus >= 201103L)
|
||||||
|
#undef va_copy
|
||||||
|
#define va_copy(d,s) __builtin_va_copy(d,s)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else /* !__PGIC__ */
|
||||||
|
|
||||||
|
|
||||||
|
#define va_start(ap, x) (__cu_va_start(&ap, x))
|
||||||
|
#define va_end(ap) (__cu_va_end(&ap))
|
||||||
|
#define va_arg(ap, t) (*((t *)__cu_va_arg(&ap, (t *)0)))
|
||||||
|
|
||||||
|
#if (_MSC_VER >= 1800) || (defined(__CUDACC_RTC__) && (__cplusplus >= 201103L))
|
||||||
|
#undef va_copy
|
||||||
|
#define va_copy(apd, aps) (__cu_va_copy(&(apd), &(aps)))
|
||||||
|
#endif /* (_MSC_VER >= 1800) || (defined(__CUDACC_RTC__) && (__cplusplus >= 201103L)) */
|
||||||
|
#endif /* __PGIC__ */
|
||||||
|
|
||||||
|
#endif /* defined(__cplusplus) && (defined(__CUDACC_RTC__) || (defined(_WIN32) && defined(_MSC_VER))) */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* __CUDACC__ */
|
||||||
|
|
||||||
|
#endif /* !__HOST_CONFIG_H__ */
|
216
include/external/CUDA/crt/host_defines.h
vendored
Normal file
216
include/external/CUDA/crt/host_defines.h
vendored
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__HOST_DEFINES_H__)
|
||||||
|
#define __HOST_DEFINES_H__
|
||||||
|
|
||||||
|
/* CUDA JIT mode (__CUDACC_RTC__) also uses GNU style attributes */
|
||||||
|
#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)
|
||||||
|
|
||||||
|
#if defined(__CUDACC_RTC__)
|
||||||
|
#define __volatile__ volatile
|
||||||
|
#endif /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
#define __no_return__ \
|
||||||
|
__attribute__((noreturn))
|
||||||
|
|
||||||
|
#if defined(__CUDACC__) || defined(__CUDA_ARCH__) || defined(__CUDA_LIBDEVICE__)
|
||||||
|
/* gcc allows users to define attributes with underscores,
|
||||||
|
e.g., __attribute__((__noinline__)).
|
||||||
|
Consider a non-CUDA source file (e.g. .cpp) that has the
|
||||||
|
above attribute specification, and includes this header file. In that case,
|
||||||
|
defining __noinline__ as below would cause a gcc compilation error.
|
||||||
|
Hence, only define __noinline__ when the code is being processed
|
||||||
|
by a CUDA compiler component.
|
||||||
|
*/
|
||||||
|
#define __noinline__ \
|
||||||
|
__attribute__((noinline))
|
||||||
|
#endif /* __CUDACC__ || __CUDA_ARCH__ || __CUDA_LIBDEVICE__ */
|
||||||
|
|
||||||
|
#define __forceinline__ \
|
||||||
|
__inline__ __attribute__((always_inline))
|
||||||
|
#define __align__(n) \
|
||||||
|
__attribute__((aligned(n)))
|
||||||
|
#define __thread__ \
|
||||||
|
__thread
|
||||||
|
#define __import__
|
||||||
|
#define __export__
|
||||||
|
#define __cdecl
|
||||||
|
#define __annotate__(a) \
|
||||||
|
__attribute__((a))
|
||||||
|
#define __location__(a) \
|
||||||
|
__annotate__(a)
|
||||||
|
#define CUDARTAPI
|
||||||
|
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
|
||||||
|
#if _MSC_VER >= 1400
|
||||||
|
|
||||||
|
#define __restrict__ \
|
||||||
|
__restrict
|
||||||
|
|
||||||
|
#else /* _MSC_VER >= 1400 */
|
||||||
|
|
||||||
|
#define __restrict__
|
||||||
|
|
||||||
|
#endif /* _MSC_VER >= 1400 */
|
||||||
|
|
||||||
|
#define __inline__ \
|
||||||
|
__inline
|
||||||
|
#define __no_return__ \
|
||||||
|
__declspec(noreturn)
|
||||||
|
#define __noinline__ \
|
||||||
|
__declspec(noinline)
|
||||||
|
#define __forceinline__ \
|
||||||
|
__forceinline
|
||||||
|
#define __align__(n) \
|
||||||
|
__declspec(align(n))
|
||||||
|
#define __thread__ \
|
||||||
|
__declspec(thread)
|
||||||
|
#define __import__ \
|
||||||
|
__declspec(dllimport)
|
||||||
|
#define __export__ \
|
||||||
|
__declspec(dllexport)
|
||||||
|
#define __annotate__(a) \
|
||||||
|
__declspec(a)
|
||||||
|
#define __location__(a) \
|
||||||
|
__annotate__(__##a##__)
|
||||||
|
#define CUDARTAPI \
|
||||||
|
__stdcall
|
||||||
|
|
||||||
|
#else /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
#define __inline__
|
||||||
|
|
||||||
|
#if !defined(__align__)
|
||||||
|
|
||||||
|
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for '__align__' !!! ---
|
||||||
|
|
||||||
|
#endif /* !__align__ */
|
||||||
|
|
||||||
|
#if !defined(CUDARTAPI)
|
||||||
|
|
||||||
|
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for 'CUDARTAPI' !!! ---
|
||||||
|
|
||||||
|
#endif /* !CUDARTAPI */
|
||||||
|
|
||||||
|
#endif /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
#if (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !defined(__clang__)))) || \
|
||||||
|
(defined(_MSC_VER) && _MSC_VER < 1900) || \
|
||||||
|
(!defined(__GNUC__) && !defined(_MSC_VER))
|
||||||
|
|
||||||
|
#define __specialization_static \
|
||||||
|
static
|
||||||
|
|
||||||
|
#else /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||||
|
(_MSC_VER && _MSC_VER < 1900) ||
|
||||||
|
(!__GNUC__ && !_MSC_VER) */
|
||||||
|
|
||||||
|
#define __specialization_static
|
||||||
|
|
||||||
|
#endif /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||||
|
(_MSC_VER && _MSC_VER < 1900) ||
|
||||||
|
(!__GNUC__ && !_MSC_VER) */
|
||||||
|
|
||||||
|
#if !defined(__CUDACC__) && !defined(__CUDA_LIBDEVICE__)
|
||||||
|
|
||||||
|
#undef __annotate__
|
||||||
|
#define __annotate__(a)
|
||||||
|
|
||||||
|
#else /* !__CUDACC__ && !__CUDA_LIBDEVICE__ */
|
||||||
|
|
||||||
|
#define __launch_bounds__(...) \
|
||||||
|
__annotate__(launch_bounds(__VA_ARGS__))
|
||||||
|
|
||||||
|
#endif /* !__CUDACC__ && !__CUDA_LIBDEVICE__ */
|
||||||
|
|
||||||
|
#if defined(__CUDACC__) || defined(__CUDA_LIBDEVICE__) || \
|
||||||
|
defined(__GNUC__) || defined(_WIN64)
|
||||||
|
|
||||||
|
#define __builtin_align__(a) \
|
||||||
|
__align__(a)
|
||||||
|
|
||||||
|
#else /* __CUDACC__ || __CUDA_LIBDEVICE__ || __GNUC__ || _WIN64 */
|
||||||
|
|
||||||
|
#define __builtin_align__(a)
|
||||||
|
|
||||||
|
#endif /* __CUDACC__ || __CUDA_LIBDEVICE__ || __GNUC__ || _WIN64 */
|
||||||
|
|
||||||
|
#define __host__ \
|
||||||
|
__location__(host)
|
||||||
|
#define __device__ \
|
||||||
|
__location__(device)
|
||||||
|
#define __global__ \
|
||||||
|
__location__(global)
|
||||||
|
#define __shared__ \
|
||||||
|
__location__(shared)
|
||||||
|
#define __constant__ \
|
||||||
|
__location__(constant)
|
||||||
|
#define __managed__ \
|
||||||
|
__location__(managed)
|
||||||
|
|
||||||
|
#if !defined(__CUDACC__)
|
||||||
|
#define __device_builtin__
|
||||||
|
#define __device_builtin_texture_type__
|
||||||
|
#define __device_builtin_surface_type__
|
||||||
|
#define __cudart_builtin__
|
||||||
|
#else /* defined(__CUDACC__) */
|
||||||
|
#define __device_builtin__ \
|
||||||
|
__location__(device_builtin)
|
||||||
|
#define __device_builtin_texture_type__ \
|
||||||
|
__location__(device_builtin_texture_type)
|
||||||
|
#define __device_builtin_surface_type__ \
|
||||||
|
__location__(device_builtin_surface_type)
|
||||||
|
#define __cudart_builtin__ \
|
||||||
|
__location__(cudart_builtin)
|
||||||
|
#endif /* !defined(__CUDACC__) */
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* !__HOST_DEFINES_H__ */
|
338
include/external/CUDA/cuComplex.h
vendored
Executable file
338
include/external/CUDA/cuComplex.h
vendored
Executable file
@@ -0,0 +1,338 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CU_COMPLEX_H_)
|
||||||
|
#define CU_COMPLEX_H_
|
||||||
|
|
||||||
|
/* When trying to include C header file in C++ Code extern "C" is required
|
||||||
|
* But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
|
||||||
|
* extern "C" cannot be nested
|
||||||
|
* Hence keep the header out of extern "C" block
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h> /* import fabsf, sqrt */
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#include "vector_types.h"
|
||||||
|
|
||||||
|
typedef float2 cuFloatComplex;
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
|
||||||
|
{
|
||||||
|
return x.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
|
||||||
|
{
|
||||||
|
return x.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
|
||||||
|
(float r, float i)
|
||||||
|
{
|
||||||
|
cuFloatComplex res;
|
||||||
|
res.x = r;
|
||||||
|
res.y = i;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComplex x)
|
||||||
|
{
|
||||||
|
return make_cuFloatComplex (cuCrealf(x), -cuCimagf(x));
|
||||||
|
}
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComplex x,
|
||||||
|
cuFloatComplex y)
|
||||||
|
{
|
||||||
|
return make_cuFloatComplex (cuCrealf(x) + cuCrealf(y),
|
||||||
|
cuCimagf(x) + cuCimagf(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComplex x,
|
||||||
|
cuFloatComplex y)
|
||||||
|
{
|
||||||
|
return make_cuFloatComplex (cuCrealf(x) - cuCrealf(y),
|
||||||
|
cuCimagf(x) - cuCimagf(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This implementation could suffer from intermediate overflow even though
|
||||||
|
* the final result would be in range. However, various implementations do
|
||||||
|
* not guard against this (presumably to avoid losing performance), so we
|
||||||
|
* don't do it either to stay competitive.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComplex x,
|
||||||
|
cuFloatComplex y)
|
||||||
|
{
|
||||||
|
cuFloatComplex prod;
|
||||||
|
prod = make_cuFloatComplex ((cuCrealf(x) * cuCrealf(y)) -
|
||||||
|
(cuCimagf(x) * cuCimagf(y)),
|
||||||
|
(cuCrealf(x) * cuCimagf(y)) +
|
||||||
|
(cuCimagf(x) * cuCrealf(y)));
|
||||||
|
return prod;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This implementation guards against intermediate underflow and overflow
|
||||||
|
* by scaling. Such guarded implementations are usually the default for
|
||||||
|
* complex library implementations, with some also offering an unguarded,
|
||||||
|
* faster version.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComplex x,
|
||||||
|
cuFloatComplex y)
|
||||||
|
{
|
||||||
|
cuFloatComplex quot;
|
||||||
|
float s = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y));
|
||||||
|
float oos = 1.0f / s;
|
||||||
|
float ars = cuCrealf(x) * oos;
|
||||||
|
float ais = cuCimagf(x) * oos;
|
||||||
|
float brs = cuCrealf(y) * oos;
|
||||||
|
float bis = cuCimagf(y) * oos;
|
||||||
|
s = (brs * brs) + (bis * bis);
|
||||||
|
oos = 1.0f / s;
|
||||||
|
quot = make_cuFloatComplex (((ars * brs) + (ais * bis)) * oos,
|
||||||
|
((ais * brs) - (ars * bis)) * oos);
|
||||||
|
return quot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We would like to call hypotf(), but it's not available on all platforms.
|
||||||
|
* This discrete implementation guards against intermediate underflow and
|
||||||
|
* overflow by scaling. Otherwise we would lose half the exponent range.
|
||||||
|
* There are various ways of doing guarded computation. For now chose the
|
||||||
|
* simplest and fastest solution, however this may suffer from inaccuracies
|
||||||
|
* if sqrt and division are not IEEE compliant.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
|
||||||
|
{
|
||||||
|
float a = cuCrealf(x);
|
||||||
|
float b = cuCimagf(x);
|
||||||
|
float v, w, t;
|
||||||
|
a = fabsf(a);
|
||||||
|
b = fabsf(b);
|
||||||
|
if (a > b) {
|
||||||
|
v = a;
|
||||||
|
w = b;
|
||||||
|
} else {
|
||||||
|
v = b;
|
||||||
|
w = a;
|
||||||
|
}
|
||||||
|
t = w / v;
|
||||||
|
t = 1.0f + t * t;
|
||||||
|
t = v * sqrtf(t);
|
||||||
|
if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) {
|
||||||
|
t = v + w;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Double precision */
|
||||||
|
typedef double2 cuDoubleComplex;
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
|
||||||
|
{
|
||||||
|
return x.x;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
|
||||||
|
{
|
||||||
|
return x.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
|
||||||
|
(double r, double i)
|
||||||
|
{
|
||||||
|
cuDoubleComplex res;
|
||||||
|
res.x = r;
|
||||||
|
res.y = i;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComplex x)
|
||||||
|
{
|
||||||
|
return make_cuDoubleComplex (cuCreal(x), -cuCimag(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComplex x,
|
||||||
|
cuDoubleComplex y)
|
||||||
|
{
|
||||||
|
return make_cuDoubleComplex (cuCreal(x) + cuCreal(y),
|
||||||
|
cuCimag(x) + cuCimag(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComplex x,
|
||||||
|
cuDoubleComplex y)
|
||||||
|
{
|
||||||
|
return make_cuDoubleComplex (cuCreal(x) - cuCreal(y),
|
||||||
|
cuCimag(x) - cuCimag(y));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This implementation could suffer from intermediate overflow even though
|
||||||
|
* the final result would be in range. However, various implementations do
|
||||||
|
* not guard against this (presumably to avoid losing performance), so we
|
||||||
|
* don't do it either to stay competitive.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComplex x,
|
||||||
|
cuDoubleComplex y)
|
||||||
|
{
|
||||||
|
cuDoubleComplex prod;
|
||||||
|
prod = make_cuDoubleComplex ((cuCreal(x) * cuCreal(y)) -
|
||||||
|
(cuCimag(x) * cuCimag(y)),
|
||||||
|
(cuCreal(x) * cuCimag(y)) +
|
||||||
|
(cuCimag(x) * cuCreal(y)));
|
||||||
|
return prod;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This implementation guards against intermediate underflow and overflow
|
||||||
|
* by scaling. Such guarded implementations are usually the default for
|
||||||
|
* complex library implementations, with some also offering an unguarded,
|
||||||
|
* faster version.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComplex x,
|
||||||
|
cuDoubleComplex y)
|
||||||
|
{
|
||||||
|
cuDoubleComplex quot;
|
||||||
|
double s = (fabs(cuCreal(y))) + (fabs(cuCimag(y)));
|
||||||
|
double oos = 1.0 / s;
|
||||||
|
double ars = cuCreal(x) * oos;
|
||||||
|
double ais = cuCimag(x) * oos;
|
||||||
|
double brs = cuCreal(y) * oos;
|
||||||
|
double bis = cuCimag(y) * oos;
|
||||||
|
s = (brs * brs) + (bis * bis);
|
||||||
|
oos = 1.0 / s;
|
||||||
|
quot = make_cuDoubleComplex (((ars * brs) + (ais * bis)) * oos,
|
||||||
|
((ais * brs) - (ars * bis)) * oos);
|
||||||
|
return quot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This implementation guards against intermediate underflow and overflow
|
||||||
|
* by scaling. Otherwise we would lose half the exponent range. There are
|
||||||
|
* various ways of doing guarded computation. For now chose the simplest
|
||||||
|
* and fastest solution, however this may suffer from inaccuracies if sqrt
|
||||||
|
* and division are not IEEE compliant.
|
||||||
|
*/
|
||||||
|
__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
|
||||||
|
{
|
||||||
|
double a = cuCreal(x);
|
||||||
|
double b = cuCimag(x);
|
||||||
|
double v, w, t;
|
||||||
|
a = fabs(a);
|
||||||
|
b = fabs(b);
|
||||||
|
if (a > b) {
|
||||||
|
v = a;
|
||||||
|
w = b;
|
||||||
|
} else {
|
||||||
|
v = b;
|
||||||
|
w = a;
|
||||||
|
}
|
||||||
|
t = w / v;
|
||||||
|
t = 1.0 + t * t;
|
||||||
|
t = v * sqrt(t);
|
||||||
|
if ((v == 0.0) ||
|
||||||
|
(v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {
|
||||||
|
t = v + w;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
/* aliases */
|
||||||
|
typedef cuFloatComplex cuComplex;
|
||||||
|
__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
|
||||||
|
float y)
|
||||||
|
{
|
||||||
|
return make_cuFloatComplex (x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* float-to-double promotion */
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuComplexFloatToDouble
|
||||||
|
(cuFloatComplex c)
|
||||||
|
{
|
||||||
|
return make_cuDoubleComplex ((double)cuCrealf(c), (double)cuCimagf(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuFloatComplex cuComplexDoubleToFloat
|
||||||
|
(cuDoubleComplex c)
|
||||||
|
{
|
||||||
|
return make_cuFloatComplex ((float)cuCreal(c), (float)cuCimag(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuComplex cuCfmaf( cuComplex x, cuComplex y, cuComplex d)
|
||||||
|
{
|
||||||
|
float real_res;
|
||||||
|
float imag_res;
|
||||||
|
|
||||||
|
real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d);
|
||||||
|
imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d);
|
||||||
|
|
||||||
|
real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res;
|
||||||
|
imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res;
|
||||||
|
|
||||||
|
return make_cuComplex(real_res, imag_res);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ static __inline__ cuDoubleComplex cuCfma( cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)
|
||||||
|
{
|
||||||
|
double real_res;
|
||||||
|
double imag_res;
|
||||||
|
|
||||||
|
real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d);
|
||||||
|
imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d);
|
||||||
|
|
||||||
|
real_res = -(cuCimag(x) * cuCimag(y)) + real_res;
|
||||||
|
imag_res = (cuCimag(x) * cuCreal(y)) + imag_res;
|
||||||
|
|
||||||
|
return make_cuDoubleComplex(real_res, imag_res);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !defined(CU_COMPLEX_H_) */
|
565
include/external/CUDA/cublas.h
vendored
Executable file
565
include/external/CUDA/cublas.h
vendored
Executable file
@@ -0,0 +1,565 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the public header file for the CUBLAS library, defining the API
|
||||||
|
*
|
||||||
|
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
|
||||||
|
* on top of the CUDA runtime.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CUBLAS_H_)
|
||||||
|
#define CUBLAS_H_
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
#ifndef CUBLASWINAPI
|
||||||
|
#ifdef _WIN32
|
||||||
|
#define CUBLASWINAPI __stdcall
|
||||||
|
#else
|
||||||
|
#define CUBLASWINAPI
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef CUBLASAPI
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define CUBLASAPI __host__
|
||||||
|
#else
|
||||||
|
#define CUBLASAPI
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "cublas_api.h"
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* CUBLAS data types */
|
||||||
|
#define cublasStatus cublasStatus_t
|
||||||
|
|
||||||
|
cublasStatus CUBLASWINAPI cublasInit (void);
|
||||||
|
cublasStatus CUBLASWINAPI cublasShutdown (void);
|
||||||
|
cublasStatus CUBLASWINAPI cublasGetError (void);
|
||||||
|
|
||||||
|
cublasStatus CUBLASWINAPI cublasGetVersion(int *version);
|
||||||
|
cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr);
|
||||||
|
|
||||||
|
cublasStatus CUBLASWINAPI cublasFree (void *devicePtr);
|
||||||
|
|
||||||
|
|
||||||
|
cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ---------------- CUBLAS BLAS1 functions ---------------- */
|
||||||
|
/* NRM2 */
|
||||||
|
float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx);
|
||||||
|
double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx);
|
||||||
|
float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx);
|
||||||
|
double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* DOT */
|
||||||
|
float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y,
|
||||||
|
int incy);
|
||||||
|
double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y,
|
||||||
|
int incy);
|
||||||
|
cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||||
|
int incy);
|
||||||
|
cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||||
|
int incy);
|
||||||
|
cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||||
|
int incy);
|
||||||
|
cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||||
|
int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SCAL */
|
||||||
|
void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* AXPY */
|
||||||
|
void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx,
|
||||||
|
float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x,
|
||||||
|
int incx, double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x,
|
||||||
|
int incx, cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||||
|
int incx, cuDoubleComplex *y, int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* COPY */
|
||||||
|
void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
|
||||||
|
int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SWAP */
|
||||||
|
void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* AMAX */
|
||||||
|
int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx);
|
||||||
|
int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx);
|
||||||
|
int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx);
|
||||||
|
int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* AMIN */
|
||||||
|
int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx);
|
||||||
|
int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx);
|
||||||
|
|
||||||
|
int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx);
|
||||||
|
int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* ASUM */
|
||||||
|
float CUBLASWINAPI cublasSasum (int n, const float *x, int incx);
|
||||||
|
double CUBLASWINAPI cublasDasum (int n, const double *x, int incx);
|
||||||
|
float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx);
|
||||||
|
double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* ROT */
|
||||||
|
void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy,
|
||||||
|
float sc, float ss);
|
||||||
|
void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy,
|
||||||
|
double sc, double ss);
|
||||||
|
void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||||
|
int incy, float c, cuComplex s);
|
||||||
|
void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx,
|
||||||
|
cuDoubleComplex *y, int incy, double sc,
|
||||||
|
cuDoubleComplex cs);
|
||||||
|
void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||||
|
int incy, float c, float s);
|
||||||
|
void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx,
|
||||||
|
cuDoubleComplex *y, int incy, double c, double s);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* ROTG */
|
||||||
|
void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss);
|
||||||
|
void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss);
|
||||||
|
void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
|
||||||
|
cuComplex *cs);
|
||||||
|
void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
|
||||||
|
cuDoubleComplex *cs);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* ROTM */
|
||||||
|
void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
|
||||||
|
const float* sparam);
|
||||||
|
void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
|
||||||
|
const double* sparam);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* ROTMG */
|
||||||
|
void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1,
|
||||||
|
const float *sy1, float* sparam);
|
||||||
|
void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1,
|
||||||
|
const double *sy1, double* sparam);
|
||||||
|
|
||||||
|
/* --------------- CUBLAS BLAS2 functions ---------------- */
|
||||||
|
/* GEMV */
|
||||||
|
void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
|
||||||
|
const float *A, int lda, const float *x, int incx,
|
||||||
|
float beta, float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
|
||||||
|
const double *A, int lda, const double *x, int incx,
|
||||||
|
double beta, double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
|
||||||
|
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||||
|
cuComplex beta, cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||||
|
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* GBMV */
|
||||||
|
void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku,
|
||||||
|
float alpha, const float *A, int lda,
|
||||||
|
const float *x, int incx, float beta, float *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku,
|
||||||
|
double alpha, const double *A, int lda,
|
||||||
|
const double *x, int incx, double beta, double *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
const cuComplex *x, int incx, cuComplex beta, cuComplex *y,
|
||||||
|
int incy);
|
||||||
|
void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku,
|
||||||
|
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||||
|
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y,
|
||||||
|
int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TRMV */
|
||||||
|
void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n,
|
||||||
|
const float *A, int lda, float *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n,
|
||||||
|
const double *A, int lda, double *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n,
|
||||||
|
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n,
|
||||||
|
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TBMV */
|
||||||
|
void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k,
|
||||||
|
const float *A, int lda, float *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k,
|
||||||
|
const double *A, int lda, double *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k,
|
||||||
|
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k,
|
||||||
|
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TPMV */
|
||||||
|
void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TRSV */
|
||||||
|
void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda,
|
||||||
|
cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TPSV */
|
||||||
|
void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP,
|
||||||
|
float *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
|
||||||
|
cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TBSV */
|
||||||
|
void CUBLASWINAPI cublasStbsv(char uplo, char trans,
|
||||||
|
char diag, int n, int k, const float *A,
|
||||||
|
int lda, float *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDtbsv(char uplo, char trans,
|
||||||
|
char diag, int n, int k, const double *A,
|
||||||
|
int lda, double *x, int incx);
|
||||||
|
void CUBLASWINAPI cublasCtbsv(char uplo, char trans,
|
||||||
|
char diag, int n, int k, const cuComplex *A,
|
||||||
|
int lda, cuComplex *x, int incx);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZtbsv(char uplo, char trans,
|
||||||
|
char diag, int n, int k, const cuDoubleComplex *A,
|
||||||
|
int lda, cuDoubleComplex *x, int incx);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SYMV/HEMV */
|
||||||
|
void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
|
||||||
|
int lda, const float *x, int incx, float beta,
|
||||||
|
float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
|
||||||
|
int lda, const double *x, int incx, double beta,
|
||||||
|
double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
|
||||||
|
int lda, const cuComplex *x, int incx, cuComplex beta,
|
||||||
|
cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
|
||||||
|
int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta,
|
||||||
|
cuDoubleComplex *y, int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SBMV/HBMV */
|
||||||
|
void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha,
|
||||||
|
const float *A, int lda, const float *x, int incx,
|
||||||
|
float beta, float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha,
|
||||||
|
const double *A, int lda, const double *x, int incx,
|
||||||
|
double beta, double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha,
|
||||||
|
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||||
|
cuComplex beta, cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||||
|
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SPMV/HPMV */
|
||||||
|
void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
|
||||||
|
const float *AP, const float *x,
|
||||||
|
int incx, float beta, float *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
|
||||||
|
const double *AP, const double *x,
|
||||||
|
int incx, double beta, double *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
|
||||||
|
const cuComplex *AP, const cuComplex *x,
|
||||||
|
int incx, cuComplex beta, cuComplex *y, int incy);
|
||||||
|
void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *AP, const cuDoubleComplex *x,
|
||||||
|
int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* GER */
|
||||||
|
void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
|
||||||
|
const float *y, int incy, float *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
|
||||||
|
const double *y, int incy, double *A, int lda);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||||
|
int incx, const cuComplex *y, int incy,
|
||||||
|
cuComplex *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||||
|
int incx, const cuComplex *y, int incy,
|
||||||
|
cuComplex *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||||
|
int incx, const cuDoubleComplex *y, int incy,
|
||||||
|
cuDoubleComplex *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||||
|
int incx, const cuDoubleComplex *y, int incy,
|
||||||
|
cuDoubleComplex *A, int lda);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SYR/HER */
|
||||||
|
void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
|
||||||
|
int incx, float *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
|
||||||
|
int incx, double *A, int lda);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCher (char uplo, int n, float alpha,
|
||||||
|
const cuComplex *x, int incx, cuComplex *A, int lda);
|
||||||
|
void CUBLASWINAPI cublasZher (char uplo, int n, double alpha,
|
||||||
|
const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda);
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SPR/HPR */
|
||||||
|
void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
|
||||||
|
int incx, float *AP);
|
||||||
|
void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
|
||||||
|
int incx, double *AP);
|
||||||
|
void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
|
||||||
|
int incx, cuComplex *AP);
|
||||||
|
void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
|
||||||
|
int incx, cuDoubleComplex *AP);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SYR2/HER2 */
|
||||||
|
void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x,
|
||||||
|
int incx, const float *y, int incy, float *A,
|
||||||
|
int lda);
|
||||||
|
void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x,
|
||||||
|
int incx, const double *y, int incy, double *A,
|
||||||
|
int lda);
|
||||||
|
void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x,
|
||||||
|
int incx, const cuComplex *y, int incy, cuComplex *A,
|
||||||
|
int lda);
|
||||||
|
void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||||
|
int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A,
|
||||||
|
int lda);
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SPR2/HPR2 */
|
||||||
|
void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x,
|
||||||
|
int incx, const float *y, int incy, float *AP);
|
||||||
|
void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
|
||||||
|
const double *x, int incx, const double *y,
|
||||||
|
int incy, double *AP);
|
||||||
|
void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
|
||||||
|
const cuComplex *x, int incx, const cuComplex *y,
|
||||||
|
int incy, cuComplex *AP);
|
||||||
|
void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||||
|
int incy, cuDoubleComplex *AP);
|
||||||
|
/* ------------------------BLAS3 Functions ------------------------------- */
|
||||||
|
/* GEMM */
|
||||||
|
void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k,
|
||||||
|
float alpha, const float *A, int lda,
|
||||||
|
const float *B, int ldb, float beta, float *C,
|
||||||
|
int ldc);
|
||||||
|
void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
|
||||||
|
double alpha, const double *A, int lda,
|
||||||
|
const double *B, int ldb, double beta, double *C,
|
||||||
|
int ldc);
|
||||||
|
void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
const cuComplex *B, int ldb, cuComplex beta,
|
||||||
|
cuComplex *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
|
||||||
|
int k, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda,
|
||||||
|
const cuDoubleComplex *B, int ldb,
|
||||||
|
cuDoubleComplex beta, cuDoubleComplex *C,
|
||||||
|
int ldc);
|
||||||
|
/* -------------------------------------------------------*/
|
||||||
|
/* SYRK */
|
||||||
|
void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha,
|
||||||
|
const float *A, int lda, float beta, float *C,
|
||||||
|
int ldc);
|
||||||
|
void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
|
||||||
|
double alpha, const double *A, int lda,
|
||||||
|
double beta, double *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
cuComplex beta, cuComplex *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
|
||||||
|
cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda,
|
||||||
|
cuDoubleComplex beta,
|
||||||
|
cuDoubleComplex *C, int ldc);
|
||||||
|
/* ------------------------------------------------------- */
|
||||||
|
/* HERK */
|
||||||
|
void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
|
||||||
|
float alpha, const cuComplex *A, int lda,
|
||||||
|
float beta, cuComplex *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
|
||||||
|
double alpha,
|
||||||
|
const cuDoubleComplex *A, int lda,
|
||||||
|
double beta,
|
||||||
|
cuDoubleComplex *C, int ldc);
|
||||||
|
/* ------------------------------------------------------- */
|
||||||
|
/* SYR2K */
|
||||||
|
void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha,
|
||||||
|
const float *A, int lda, const float *B, int ldb,
|
||||||
|
float beta, float *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
|
||||||
|
double alpha, const double *A, int lda,
|
||||||
|
const double *B, int ldb, double beta,
|
||||||
|
double *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
const cuComplex *B, int ldb, cuComplex beta,
|
||||||
|
cuComplex *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
|
||||||
|
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||||
|
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||||
|
cuDoubleComplex *C, int ldc);
|
||||||
|
/* ------------------------------------------------------- */
|
||||||
|
/* HER2K */
|
||||||
|
void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
const cuComplex *B, int ldb, float beta,
|
||||||
|
cuComplex *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
|
||||||
|
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||||
|
const cuDoubleComplex *B, int ldb, double beta,
|
||||||
|
cuDoubleComplex *C, int ldc);
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* SYMM*/
|
||||||
|
void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha,
|
||||||
|
const float *A, int lda, const float *B, int ldb,
|
||||||
|
float beta, float *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha,
|
||||||
|
const double *A, int lda, const double *B, int ldb,
|
||||||
|
double beta, double *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha,
|
||||||
|
const cuComplex *A, int lda, const cuComplex *B, int ldb,
|
||||||
|
cuComplex beta, cuComplex *C, int ldc);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
|
||||||
|
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* HEMM*/
|
||||||
|
void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
|
||||||
|
cuComplex alpha, const cuComplex *A, int lda,
|
||||||
|
const cuComplex *B, int ldb, cuComplex beta,
|
||||||
|
cuComplex *C, int ldc);
|
||||||
|
void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
|
||||||
|
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||||
|
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||||
|
cuDoubleComplex *C, int ldc);
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TRSM*/
|
||||||
|
void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
|
||||||
|
int m, int n, float alpha, const float *A, int lda,
|
||||||
|
float *B, int ldb);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
|
||||||
|
char diag, int m, int n, double alpha,
|
||||||
|
const double *A, int lda, double *B,
|
||||||
|
int ldb);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
|
||||||
|
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||||
|
int lda, cuComplex *B, int ldb);
|
||||||
|
|
||||||
|
void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
|
||||||
|
char diag, int m, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda,
|
||||||
|
cuDoubleComplex *B, int ldb);
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/* TRMM*/
|
||||||
|
void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
|
||||||
|
int m, int n, float alpha, const float *A, int lda,
|
||||||
|
float *B, int ldb);
|
||||||
|
void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
|
||||||
|
char diag, int m, int n, double alpha,
|
||||||
|
const double *A, int lda, double *B,
|
||||||
|
int ldb);
|
||||||
|
void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
|
||||||
|
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||||
|
int lda, cuComplex *B, int ldb);
|
||||||
|
void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
|
||||||
|
char diag, int m, int n, cuDoubleComplex alpha,
|
||||||
|
const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
|
||||||
|
int ldb);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif /* !defined(CUBLAS_H_) */
|
2977
include/external/CUDA/cublas_api.h
vendored
Executable file
2977
include/external/CUDA/cublas_api.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
274
include/external/CUDA/cublas_v2.h
vendored
Normal file
274
include/external/CUDA/cublas_v2.h
vendored
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the public header file for the new CUBLAS library API, it mapped the generic
|
||||||
|
* Cublas name functions to the actual _v2 implementations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(CUBLAS_V2_H_)
|
||||||
|
#define CUBLAS_V2_H_
|
||||||
|
|
||||||
|
#undef CUBLASAPI
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define CUBLASAPI __host__ __device__
|
||||||
|
#else
|
||||||
|
#define CUBLASAPI
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "cublas_api.h"
|
||||||
|
|
||||||
|
#define cublasCreate cublasCreate_v2
|
||||||
|
#define cublasDestroy cublasDestroy_v2
|
||||||
|
#define cublasGetVersion cublasGetVersion_v2
|
||||||
|
#define cublasSetStream cublasSetStream_v2
|
||||||
|
#define cublasGetStream cublasGetStream_v2
|
||||||
|
#define cublasGetPointerMode cublasGetPointerMode_v2
|
||||||
|
#define cublasSetPointerMode cublasSetPointerMode_v2
|
||||||
|
|
||||||
|
/* Blas3 Routines */
|
||||||
|
|
||||||
|
#define cublasSnrm2 cublasSnrm2_v2
|
||||||
|
#define cublasDnrm2 cublasDnrm2_v2
|
||||||
|
#define cublasScnrm2 cublasScnrm2_v2
|
||||||
|
#define cublasDznrm2 cublasDznrm2_v2
|
||||||
|
|
||||||
|
#define cublasSdot cublasSdot_v2
|
||||||
|
#define cublasDdot cublasDdot_v2
|
||||||
|
#define cublasCdotu cublasCdotu_v2
|
||||||
|
#define cublasCdotc cublasCdotc_v2
|
||||||
|
#define cublasZdotu cublasZdotu_v2
|
||||||
|
#define cublasZdotc cublasZdotc_v2
|
||||||
|
|
||||||
|
#define cublasSscal cublasSscal_v2
|
||||||
|
#define cublasDscal cublasDscal_v2
|
||||||
|
#define cublasCscal cublasCscal_v2
|
||||||
|
#define cublasCsscal cublasCsscal_v2
|
||||||
|
#define cublasZscal cublasZscal_v2
|
||||||
|
#define cublasZdscal cublasZdscal_v2
|
||||||
|
|
||||||
|
#define cublasSaxpy cublasSaxpy_v2
|
||||||
|
#define cublasDaxpy cublasDaxpy_v2
|
||||||
|
#define cublasCaxpy cublasCaxpy_v2
|
||||||
|
#define cublasZaxpy cublasZaxpy_v2
|
||||||
|
|
||||||
|
#define cublasScopy cublasScopy_v2
|
||||||
|
#define cublasDcopy cublasDcopy_v2
|
||||||
|
#define cublasCcopy cublasCcopy_v2
|
||||||
|
#define cublasZcopy cublasZcopy_v2
|
||||||
|
|
||||||
|
#define cublasSswap cublasSswap_v2
|
||||||
|
#define cublasDswap cublasDswap_v2
|
||||||
|
#define cublasCswap cublasCswap_v2
|
||||||
|
#define cublasZswap cublasZswap_v2
|
||||||
|
|
||||||
|
#define cublasIsamax cublasIsamax_v2
|
||||||
|
#define cublasIdamax cublasIdamax_v2
|
||||||
|
#define cublasIcamax cublasIcamax_v2
|
||||||
|
#define cublasIzamax cublasIzamax_v2
|
||||||
|
|
||||||
|
#define cublasIsamin cublasIsamin_v2
|
||||||
|
#define cublasIdamin cublasIdamin_v2
|
||||||
|
#define cublasIcamin cublasIcamin_v2
|
||||||
|
#define cublasIzamin cublasIzamin_v2
|
||||||
|
|
||||||
|
#define cublasSasum cublasSasum_v2
|
||||||
|
#define cublasDasum cublasDasum_v2
|
||||||
|
#define cublasScasum cublasScasum_v2
|
||||||
|
#define cublasDzasum cublasDzasum_v2
|
||||||
|
|
||||||
|
#define cublasSrot cublasSrot_v2
|
||||||
|
#define cublasDrot cublasDrot_v2
|
||||||
|
#define cublasCrot cublasCrot_v2
|
||||||
|
#define cublasCsrot cublasCsrot_v2
|
||||||
|
#define cublasZrot cublasZrot_v2
|
||||||
|
#define cublasZdrot cublasZdrot_v2
|
||||||
|
|
||||||
|
#define cublasSrotg cublasSrotg_v2
|
||||||
|
#define cublasDrotg cublasDrotg_v2
|
||||||
|
#define cublasCrotg cublasCrotg_v2
|
||||||
|
#define cublasZrotg cublasZrotg_v2
|
||||||
|
|
||||||
|
#define cublasSrotm cublasSrotm_v2
|
||||||
|
#define cublasDrotm cublasDrotm_v2
|
||||||
|
|
||||||
|
#define cublasSrotmg cublasSrotmg_v2
|
||||||
|
#define cublasDrotmg cublasDrotmg_v2
|
||||||
|
|
||||||
|
|
||||||
|
/* Blas2 Routines */
|
||||||
|
|
||||||
|
#define cublasSgemv cublasSgemv_v2
|
||||||
|
#define cublasDgemv cublasDgemv_v2
|
||||||
|
#define cublasCgemv cublasCgemv_v2
|
||||||
|
#define cublasZgemv cublasZgemv_v2
|
||||||
|
|
||||||
|
#define cublasSgbmv cublasSgbmv_v2
|
||||||
|
#define cublasDgbmv cublasDgbmv_v2
|
||||||
|
#define cublasCgbmv cublasCgbmv_v2
|
||||||
|
#define cublasZgbmv cublasZgbmv_v2
|
||||||
|
|
||||||
|
#define cublasStrmv cublasStrmv_v2
|
||||||
|
#define cublasDtrmv cublasDtrmv_v2
|
||||||
|
#define cublasCtrmv cublasCtrmv_v2
|
||||||
|
#define cublasZtrmv cublasZtrmv_v2
|
||||||
|
|
||||||
|
#define cublasStbmv cublasStbmv_v2
|
||||||
|
#define cublasDtbmv cublasDtbmv_v2
|
||||||
|
#define cublasCtbmv cublasCtbmv_v2
|
||||||
|
#define cublasZtbmv cublasZtbmv_v2
|
||||||
|
|
||||||
|
#define cublasStpmv cublasStpmv_v2
|
||||||
|
#define cublasDtpmv cublasDtpmv_v2
|
||||||
|
#define cublasCtpmv cublasCtpmv_v2
|
||||||
|
#define cublasZtpmv cublasZtpmv_v2
|
||||||
|
|
||||||
|
#define cublasStrsv cublasStrsv_v2
|
||||||
|
#define cublasDtrsv cublasDtrsv_v2
|
||||||
|
#define cublasCtrsv cublasCtrsv_v2
|
||||||
|
#define cublasZtrsv cublasZtrsv_v2
|
||||||
|
|
||||||
|
#define cublasStpsv cublasStpsv_v2
|
||||||
|
#define cublasDtpsv cublasDtpsv_v2
|
||||||
|
#define cublasCtpsv cublasCtpsv_v2
|
||||||
|
#define cublasZtpsv cublasZtpsv_v2
|
||||||
|
|
||||||
|
#define cublasStbsv cublasStbsv_v2
|
||||||
|
#define cublasDtbsv cublasDtbsv_v2
|
||||||
|
#define cublasCtbsv cublasCtbsv_v2
|
||||||
|
#define cublasZtbsv cublasZtbsv_v2
|
||||||
|
|
||||||
|
#define cublasSsymv cublasSsymv_v2
|
||||||
|
#define cublasDsymv cublasDsymv_v2
|
||||||
|
#define cublasCsymv cublasCsymv_v2
|
||||||
|
#define cublasZsymv cublasZsymv_v2
|
||||||
|
#define cublasChemv cublasChemv_v2
|
||||||
|
#define cublasZhemv cublasZhemv_v2
|
||||||
|
|
||||||
|
#define cublasSsbmv cublasSsbmv_v2
|
||||||
|
#define cublasDsbmv cublasDsbmv_v2
|
||||||
|
#define cublasChbmv cublasChbmv_v2
|
||||||
|
#define cublasZhbmv cublasZhbmv_v2
|
||||||
|
|
||||||
|
#define cublasSspmv cublasSspmv_v2
|
||||||
|
#define cublasDspmv cublasDspmv_v2
|
||||||
|
#define cublasChpmv cublasChpmv_v2
|
||||||
|
#define cublasZhpmv cublasZhpmv_v2
|
||||||
|
|
||||||
|
|
||||||
|
#define cublasSger cublasSger_v2
|
||||||
|
#define cublasDger cublasDger_v2
|
||||||
|
#define cublasCgeru cublasCgeru_v2
|
||||||
|
#define cublasCgerc cublasCgerc_v2
|
||||||
|
#define cublasZgeru cublasZgeru_v2
|
||||||
|
#define cublasZgerc cublasZgerc_v2
|
||||||
|
|
||||||
|
#define cublasSsyr cublasSsyr_v2
|
||||||
|
#define cublasDsyr cublasDsyr_v2
|
||||||
|
#define cublasCsyr cublasCsyr_v2
|
||||||
|
#define cublasZsyr cublasZsyr_v2
|
||||||
|
#define cublasCher cublasCher_v2
|
||||||
|
#define cublasZher cublasZher_v2
|
||||||
|
|
||||||
|
#define cublasSspr cublasSspr_v2
|
||||||
|
#define cublasDspr cublasDspr_v2
|
||||||
|
#define cublasChpr cublasChpr_v2
|
||||||
|
#define cublasZhpr cublasZhpr_v2
|
||||||
|
|
||||||
|
#define cublasSsyr2 cublasSsyr2_v2
|
||||||
|
#define cublasDsyr2 cublasDsyr2_v2
|
||||||
|
#define cublasCsyr2 cublasCsyr2_v2
|
||||||
|
#define cublasZsyr2 cublasZsyr2_v2
|
||||||
|
#define cublasCher2 cublasCher2_v2
|
||||||
|
#define cublasZher2 cublasZher2_v2
|
||||||
|
|
||||||
|
#define cublasSspr2 cublasSspr2_v2
|
||||||
|
#define cublasDspr2 cublasDspr2_v2
|
||||||
|
#define cublasChpr2 cublasChpr2_v2
|
||||||
|
#define cublasZhpr2 cublasZhpr2_v2
|
||||||
|
|
||||||
|
/* Blas3 Routines */
|
||||||
|
|
||||||
|
#define cublasSgemm cublasSgemm_v2
|
||||||
|
#define cublasDgemm cublasDgemm_v2
|
||||||
|
#define cublasCgemm cublasCgemm_v2
|
||||||
|
#define cublasZgemm cublasZgemm_v2
|
||||||
|
|
||||||
|
#define cublasSsyrk cublasSsyrk_v2
|
||||||
|
#define cublasDsyrk cublasDsyrk_v2
|
||||||
|
#define cublasCsyrk cublasCsyrk_v2
|
||||||
|
#define cublasZsyrk cublasZsyrk_v2
|
||||||
|
#define cublasCherk cublasCherk_v2
|
||||||
|
#define cublasZherk cublasZherk_v2
|
||||||
|
|
||||||
|
#define cublasSsyr2k cublasSsyr2k_v2
|
||||||
|
#define cublasDsyr2k cublasDsyr2k_v2
|
||||||
|
#define cublasCsyr2k cublasCsyr2k_v2
|
||||||
|
#define cublasZsyr2k cublasZsyr2k_v2
|
||||||
|
#define cublasCher2k cublasCher2k_v2
|
||||||
|
#define cublasZher2k cublasZher2k_v2
|
||||||
|
|
||||||
|
#define cublasSsymm cublasSsymm_v2
|
||||||
|
#define cublasDsymm cublasDsymm_v2
|
||||||
|
#define cublasCsymm cublasCsymm_v2
|
||||||
|
#define cublasZsymm cublasZsymm_v2
|
||||||
|
#define cublasChemm cublasChemm_v2
|
||||||
|
#define cublasZhemm cublasZhemm_v2
|
||||||
|
|
||||||
|
#define cublasStrsm cublasStrsm_v2
|
||||||
|
#define cublasDtrsm cublasDtrsm_v2
|
||||||
|
#define cublasCtrsm cublasCtrsm_v2
|
||||||
|
#define cublasZtrsm cublasZtrsm_v2
|
||||||
|
|
||||||
|
#define cublasStrmm cublasStrmm_v2
|
||||||
|
#define cublasDtrmm cublasDtrmm_v2
|
||||||
|
#define cublasCtrmm cublasCtrmm_v2
|
||||||
|
#define cublasZtrmm cublasZtrmm_v2
|
||||||
|
|
||||||
|
#endif /* !defined(CUBLAS_V2_H_) */
|
12185
include/external/CUDA/cuda.h
vendored
Executable file
12185
include/external/CUDA/cuda.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
248
include/external/CUDA/cuda_device_runtime_api.h
vendored
Executable file
248
include/external/CUDA/cuda_device_runtime_api.h
vendored
Executable file
@@ -0,0 +1,248 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__CUDA_DEVICE_RUNTIME_API_H__)
|
||||||
|
#define __CUDA_DEVICE_RUNTIME_API_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#if !defined(__CUDACC_RTC__)
|
||||||
|
|
||||||
|
#if (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct cudaFuncAttributes;
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#define __NV_WEAK__ __declspec(nv_weak)
|
||||||
|
#else
|
||||||
|
#define __NV_WEAK__ __attribute__((nv_weak))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaMalloc(void **p, size_t s)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaGetDevice(int *device)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
|
||||||
|
{
|
||||||
|
return cudaErrorUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef __NV_WEAK__
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
|
||||||
|
|
||||||
|
#endif /* !defined(__CUDACC_RTC__) */
|
||||||
|
|
||||||
|
#if defined(__cplusplus) && defined(__CUDACC__) /* Visible to nvcc front-end only */
|
||||||
|
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350) // Visible to SM>=3.5 and "__host__ __device__" only
|
||||||
|
|
||||||
|
#include "driver_types.h"
|
||||||
|
#include "host_defines.h"
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(void);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaPeekAtLastError(void);
|
||||||
|
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cudaError_t error);
|
||||||
|
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorName(cudaError_t error);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamDestroy(cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventDestroy(cudaEvent_t event);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup CUDART_EXECUTION
|
||||||
|
* \brief Obtains a parameter buffer
|
||||||
|
*
|
||||||
|
* Obtains a parameter buffer which can be filled with parameters for a kernel launch.
|
||||||
|
* Parameters passed to ::cudaLaunchDevice must be allocated via this function.
|
||||||
|
*
|
||||||
|
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||||
|
* CUDA user code should use <<< >>> to launch kernels.
|
||||||
|
*
|
||||||
|
* \param alignment - Specifies alignment requirement of the parameter buffer
|
||||||
|
* \param size - Specifies size requirement in bytes
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* Returns pointer to the allocated parameterBuffer
|
||||||
|
* \notefnerr
|
||||||
|
*
|
||||||
|
* \sa cudaLaunchDevice
|
||||||
|
*/
|
||||||
|
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBuffer(size_t alignment, size_t size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup CUDART_EXECUTION
|
||||||
|
* \brief Launches a specified kernel
|
||||||
|
*
|
||||||
|
* Launches a specified kernel with the specified parameter buffer. A parameter buffer can be obtained
|
||||||
|
* by calling ::cudaGetParameterBuffer().
|
||||||
|
*
|
||||||
|
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||||
|
* CUDA user code should use <<< >>> to launch the kernels.
|
||||||
|
*
|
||||||
|
* \param func - Pointer to the kernel to be launched
|
||||||
|
* \param parameterBuffer - Holds the parameters to the launched kernel. parameterBuffer can be NULL. (Optional)
|
||||||
|
* \param gridDimension - Specifies grid dimensions
|
||||||
|
* \param blockDimension - Specifies block dimensions
|
||||||
|
* \param sharedMemSize - Specifies size of shared memory
|
||||||
|
* \param stream - Specifies the stream to be used
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorLaunchMaxDepthExceeded, ::cudaErrorInvalidConfiguration,
|
||||||
|
* ::cudaErrorStartupFailure, ::cudaErrorLaunchPendingCountExceeded, ::cudaErrorLaunchOutOfResources
|
||||||
|
* \notefnerr
|
||||||
|
* \n Please refer to Execution Configuration and Parameter Buffer Layout from the CUDA Programming
|
||||||
|
* Guide for the detailed descriptions of launch configuration and parameter layout respectively.
|
||||||
|
*
|
||||||
|
* \sa cudaGetParameterBuffer
|
||||||
|
*/
|
||||||
|
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
|
||||||
|
|
||||||
|
#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) && defined(__CUDA_ARCH__)
|
||||||
|
// When compiling for the device and per thread default stream is enabled, add
|
||||||
|
// a static inline redirect to the per thread stream entry points.
|
||||||
|
|
||||||
|
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||||
|
cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
return cudaLaunchDevice_ptsz(func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||||
|
cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
return cudaLaunchDeviceV2_ptsz(parameterBuffer, stream);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||||
|
|
||||||
|
extern __device__ __cudart_builtin__ unsigned long long CUDARTAPI cudaCGGetIntrinsicHandle(enum cudaCGScope scope);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGSynchronize(unsigned long long handle, unsigned int flags);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetSize(unsigned int *numThreads, unsigned int *numGrids, unsigned long long handle);
|
||||||
|
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetRank(unsigned int *threadRank, unsigned int *gridRank, unsigned long long handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaMalloc(T **devPtr, size_t size);
|
||||||
|
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
|
||||||
|
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
|
||||||
|
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)
|
||||||
|
#endif /* defined(__cplusplus) && defined(__CUDACC__) */
|
||||||
|
|
||||||
|
#endif /* !__CUDA_DEVICE_RUNTIME_API_H__ */
|
1969
include/external/CUDA/cuda_fp16.h
vendored
Executable file
1969
include/external/CUDA/cuda_fp16.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1797
include/external/CUDA/cuda_fp16.hpp
vendored
Executable file
1797
include/external/CUDA/cuda_fp16.hpp
vendored
Executable file
File diff suppressed because it is too large
Load Diff
2040
include/external/CUDA/cuda_runtime.h
vendored
Executable file
2040
include/external/CUDA/cuda_runtime.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
7422
include/external/CUDA/cuda_runtime_api.h
vendored
Executable file
7422
include/external/CUDA/cuda_runtime_api.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1805
include/external/CUDA/cudnn.h
vendored
Executable file
1805
include/external/CUDA/cudnn.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
6257
include/external/CUDA/cusparse.h
vendored
Normal file
6257
include/external/CUDA/cusparse.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
69
include/external/CUDA/device_types.h
vendored
Executable file
69
include/external/CUDA/device_types.h
vendored
Executable file
@@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__DEVICE_TYPES_H__)
|
||||||
|
#define __DEVICE_TYPES_H__
|
||||||
|
|
||||||
|
#include "host_defines.h"
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
enum __device_builtin__ cudaRoundMode
|
||||||
|
{
|
||||||
|
cudaRoundNearest,
|
||||||
|
cudaRoundZero,
|
||||||
|
cudaRoundPosInf,
|
||||||
|
cudaRoundMinInf
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* !__DEVICE_TYPES_H__ */
|
145
include/external/CUDA/driver_functions.h
vendored
Executable file
145
include/external/CUDA/driver_functions.h
vendored
Executable file
@@ -0,0 +1,145 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__DRIVER_FUNCTIONS_H__)
|
||||||
|
#define __DRIVER_FUNCTIONS_H__
|
||||||
|
|
||||||
|
#include "builtin_types.h"
|
||||||
|
#include "host_defines.h"
|
||||||
|
#include "driver_types.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \addtogroup CUDART_MEMORY
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Returns a cudaPitchedPtr based on input parameters
|
||||||
|
*
|
||||||
|
* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
|
||||||
|
* \p p, \p xsz, and \p ysz.
|
||||||
|
*
|
||||||
|
* \param d - Pointer to allocated memory
|
||||||
|
* \param p - Pitch of allocated memory in bytes
|
||||||
|
* \param xsz - Logical width of allocation in elements
|
||||||
|
* \param ysz - Logical height of allocation in elements
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
|
||||||
|
*
|
||||||
|
* \sa make_cudaExtent, make_cudaPos
|
||||||
|
*/
|
||||||
|
static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
|
||||||
|
{
|
||||||
|
struct cudaPitchedPtr s;
|
||||||
|
|
||||||
|
s.ptr = d;
|
||||||
|
s.pitch = p;
|
||||||
|
s.xsize = xsz;
|
||||||
|
s.ysize = ysz;
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Returns a cudaPos based on input parameters
|
||||||
|
*
|
||||||
|
* Returns a ::cudaPos based on the specified input parameters \p x,
|
||||||
|
* \p y, and \p z.
|
||||||
|
*
|
||||||
|
* \param x - X position
|
||||||
|
* \param y - Y position
|
||||||
|
* \param z - Z position
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* ::cudaPos specified by \p x, \p y, and \p z
|
||||||
|
*
|
||||||
|
* \sa make_cudaExtent, make_cudaPitchedPtr
|
||||||
|
*/
|
||||||
|
static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
|
||||||
|
{
|
||||||
|
struct cudaPos p;
|
||||||
|
|
||||||
|
p.x = x;
|
||||||
|
p.y = y;
|
||||||
|
p.z = z;
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Returns a cudaExtent based on input parameters
|
||||||
|
*
|
||||||
|
* Returns a ::cudaExtent based on the specified input parameters \p w,
|
||||||
|
* \p h, and \p d.
|
||||||
|
*
|
||||||
|
* \param w - Width in elements when referring to array memory, in bytes when referring to linear memory
|
||||||
|
* \param h - Height in elements
|
||||||
|
* \param d - Depth in elements
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* ::cudaExtent specified by \p w, \p h, and \p d
|
||||||
|
*
|
||||||
|
* \sa make_cudaPitchedPtr, make_cudaPos
|
||||||
|
*/
|
||||||
|
static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d)
|
||||||
|
{
|
||||||
|
struct cudaExtent e;
|
||||||
|
|
||||||
|
e.width = w;
|
||||||
|
e.height = h;
|
||||||
|
e.depth = d;
|
||||||
|
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @} */ /* END CUDART_MEMORY */
|
||||||
|
|
||||||
|
#endif /* !__DRIVER_FUNCTIONS_H__ */
|
1610
include/external/CUDA/driver_types.h
vendored
Executable file
1610
include/external/CUDA/driver_types.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
50
include/external/CUDA/host_config.h
vendored
Executable file
50
include/external/CUDA/host_config.h
vendored
Executable file
@@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "crt/host_config.h"
|
50
include/external/CUDA/host_defines.h
vendored
Executable file
50
include/external/CUDA/host_defines.h
vendored
Executable file
@@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "crt/host_defines.h"
|
80
include/external/CUDA/library_types.h
vendored
Executable file
80
include/external/CUDA/library_types.h
vendored
Executable file
@@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__LIBRARY_TYPES_H__)
|
||||||
|
#define __LIBRARY_TYPES_H__
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum cudaDataType_t
|
||||||
|
{
|
||||||
|
CUDA_R_16F= 2, /* real as a half */
|
||||||
|
CUDA_C_16F= 6, /* complex as a pair of half numbers */
|
||||||
|
CUDA_R_32F= 0, /* real as a float */
|
||||||
|
CUDA_C_32F= 4, /* complex as a pair of float numbers */
|
||||||
|
CUDA_R_64F= 1, /* real as a double */
|
||||||
|
CUDA_C_64F= 5, /* complex as a pair of double numbers */
|
||||||
|
CUDA_R_8I = 3, /* real as a signed char */
|
||||||
|
CUDA_C_8I = 7, /* complex as a pair of signed char numbers */
|
||||||
|
CUDA_R_8U = 8, /* real as a unsigned char */
|
||||||
|
CUDA_C_8U = 9, /* complex as a pair of unsigned char numbers */
|
||||||
|
CUDA_R_32I= 10, /* real as a signed int */
|
||||||
|
CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
|
||||||
|
CUDA_R_32U= 12, /* real as a unsigned int */
|
||||||
|
CUDA_C_32U= 13 /* complex as a pair of unsigned int numbers */
|
||||||
|
} cudaDataType;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum libraryPropertyType_t
|
||||||
|
{
|
||||||
|
MAJOR_VERSION,
|
||||||
|
MINOR_VERSION,
|
||||||
|
PATCH_LEVEL
|
||||||
|
} libraryPropertyType;
|
||||||
|
|
||||||
|
#endif /* !__LIBRARY_TYPES_H__ */
|
5628
include/external/CUDA/nvml.h
vendored
Executable file
5628
include/external/CUDA/nvml.h
vendored
Executable file
File diff suppressed because it is too large
Load Diff
525
include/external/CUDA/nvrtc.h
vendored
Executable file
525
include/external/CUDA/nvrtc.h
vendored
Executable file
@@ -0,0 +1,525 @@
|
|||||||
|
//
|
||||||
|
// NVIDIA_COPYRIGHT_BEGIN
|
||||||
|
//
|
||||||
|
// Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
//
|
||||||
|
// NVIDIA CORPORATION and its licensors retain all intellectual property
|
||||||
|
// and proprietary rights in and to this software, related documentation
|
||||||
|
// and any modifications thereto. Any use, reproduction, disclosure or
|
||||||
|
// distribution of this software and related documentation without an express
|
||||||
|
// license agreement from NVIDIA CORPORATION is strictly prohibited.
|
||||||
|
//
|
||||||
|
// NVIDIA_COPYRIGHT_END
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef __NVRTC_H__
|
||||||
|
#define __NVRTC_H__
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************//**
|
||||||
|
*
|
||||||
|
* \defgroup error Error Handling
|
||||||
|
*
|
||||||
|
* NVRTC defines the following enumeration type and function for API call
|
||||||
|
* error handling.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup error
|
||||||
|
* \brief The enumerated type nvrtcResult defines API call result codes.
|
||||||
|
* NVRTC API functions return nvrtcResult to indicate the call
|
||||||
|
* result.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
NVRTC_SUCCESS = 0,
|
||||||
|
NVRTC_ERROR_OUT_OF_MEMORY = 1,
|
||||||
|
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
||||||
|
NVRTC_ERROR_INVALID_INPUT = 3,
|
||||||
|
NVRTC_ERROR_INVALID_PROGRAM = 4,
|
||||||
|
NVRTC_ERROR_INVALID_OPTION = 5,
|
||||||
|
NVRTC_ERROR_COMPILATION = 6,
|
||||||
|
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
|
||||||
|
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
|
||||||
|
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
|
||||||
|
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
|
||||||
|
NVRTC_ERROR_INTERNAL_ERROR = 11
|
||||||
|
} nvrtcResult;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup error
|
||||||
|
* \brief nvrtcGetErrorString is a helper function that returns a string
|
||||||
|
* describing the given nvrtcResult code, e.g., NVRTC_SUCCESS to
|
||||||
|
* \c "NVRTC_SUCCESS".
|
||||||
|
* For unrecognized enumeration values, it returns
|
||||||
|
* \c "NVRTC_ERROR unknown".
|
||||||
|
*
|
||||||
|
* \param [in] result CUDA Runtime Compilation API result code.
|
||||||
|
* \return Message string for the given #nvrtcResult code.
|
||||||
|
*/
|
||||||
|
const char *nvrtcGetErrorString(nvrtcResult result);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************//**
|
||||||
|
*
|
||||||
|
* \defgroup query General Information Query
|
||||||
|
*
|
||||||
|
* NVRTC defines the following function for general information query.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup query
|
||||||
|
* \brief nvrtcVersion sets the output parameters \p major and \p minor
|
||||||
|
* with the CUDA Runtime Compilation version number.
|
||||||
|
*
|
||||||
|
* \param [out] major CUDA Runtime Compilation major version number.
|
||||||
|
* \param [out] minor CUDA Runtime Compilation minor version number.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcVersion(int *major, int *minor);
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************//**
|
||||||
|
*
|
||||||
|
* \defgroup compilation Compilation
|
||||||
|
*
|
||||||
|
* NVRTC defines the following type and functions for actual compilation.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcProgram is the unit of compilation, and an opaque handle for
|
||||||
|
* a program.
|
||||||
|
*
|
||||||
|
* To compile a CUDA program string, an instance of nvrtcProgram must be
|
||||||
|
* created first with ::nvrtcCreateProgram, then compiled with
|
||||||
|
* ::nvrtcCompileProgram.
|
||||||
|
*/
|
||||||
|
typedef struct _nvrtcProgram *nvrtcProgram;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcCreateProgram creates an instance of nvrtcProgram with the
|
||||||
|
* given input parameters, and sets the output parameter \p prog with
|
||||||
|
* it.
|
||||||
|
*
|
||||||
|
* \param [out] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [in] src CUDA program source.
|
||||||
|
* \param [in] name CUDA program name.\n
|
||||||
|
* \p name can be \c NULL; \c "default_program" is
|
||||||
|
* used when \p name is \c NULL.
|
||||||
|
* \param [in] numHeaders Number of headers used.\n
|
||||||
|
* \p numHeaders must be greater than or equal to 0.
|
||||||
|
* \param [in] headers Sources of the headers.\n
|
||||||
|
* \p headers can be \c NULL when \p numHeaders is
|
||||||
|
* 0.
|
||||||
|
* \param [in] includeNames Name of each header by which they can be
|
||||||
|
* included in the CUDA program source.\n
|
||||||
|
* \p includeNames can be \c NULL when \p numHeaders
|
||||||
|
* is 0.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcDestroyProgram
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
||||||
|
const char *src,
|
||||||
|
const char *name,
|
||||||
|
int numHeaders,
|
||||||
|
const char * const *headers,
|
||||||
|
const char * const *includeNames);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcDestroyProgram destroys the given program.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcCreateProgram
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcCompileProgram compiles the given program.
|
||||||
|
*
|
||||||
|
* It supports compile options listed in \ref options.
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
|
||||||
|
int numOptions, const char * const *options);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcGetPTXSize sets \p ptxSizeRet with the size of the PTX
|
||||||
|
* generated by the previous compilation of \p prog (including the
|
||||||
|
* trailing \c NULL).
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [out] ptxSizeRet Size of the generated PTX (including the trailing
|
||||||
|
* \c NULL).
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcGetPTX
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcGetPTX stores the PTX generated by the previous compilation
|
||||||
|
* of \p prog in the memory pointed by \p ptx.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [out] ptx Compiled result.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcGetPTXSize
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcGetProgramLogSize sets \p logSizeRet with the size of the
|
||||||
|
* log generated by the previous compilation of \p prog (including the
|
||||||
|
* trailing \c NULL).
|
||||||
|
*
|
||||||
|
* Note that compilation log may be generated with warnings and informative
|
||||||
|
* messages, even when the compilation of \p prog succeeds.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [out] logSizeRet Size of the compilation log
|
||||||
|
* (including the trailing \c NULL).
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcGetProgramLog
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcGetProgramLog stores the log generated by the previous
|
||||||
|
* compilation of \p prog in the memory pointed by \p log.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [out] log Compilation log.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcGetProgramLogSize
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcAddNameExpression notes the given name expression
|
||||||
|
* denoting a __global__ function or function template
|
||||||
|
* instantiation.
|
||||||
|
*
|
||||||
|
* The identical name expression string must be provided on a subsequent
|
||||||
|
* call to nvrtcGetLoweredName to extract the lowered name.
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [in] name_expression constant expression denoting a __global__
|
||||||
|
* function or function template instantiation.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcGetLoweredName
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog,
|
||||||
|
const char * const name_expression);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup compilation
|
||||||
|
* \brief nvrtcGetLoweredName extracts the lowered (mangled) name
|
||||||
|
* for a __global__ function or function template instantiation,
|
||||||
|
* and updates *lowered_name to point to it. The memory containing
|
||||||
|
* the name is released when the NVRTC program is destroyed by
|
||||||
|
* nvrtcDestroyProgram.
|
||||||
|
* The identical name expression must have been previously
|
||||||
|
* provided to nvrtcAddNameExpression.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [in] name_expression constant expression denoting a __global__
|
||||||
|
* function or function template instantiation.
|
||||||
|
* \param [out] lowered_name initialized by the function to point to a
|
||||||
|
* C string containing the lowered (mangled)
|
||||||
|
* name corresponding to the provided name expression.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID \endlink
|
||||||
|
*
|
||||||
|
* \see ::nvrtcAddNameExpression
|
||||||
|
*/
|
||||||
|
nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog,
|
||||||
|
const char *const name_expression,
|
||||||
|
const char** lowered_name);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \defgroup options Supported Compile Options
|
||||||
|
*
|
||||||
|
* NVRTC supports the compile options below.
|
||||||
|
* Option names with two preceding dashs (\c --) are long option names and
|
||||||
|
* option names with one preceding dash (\c -) are short option names.
|
||||||
|
* Short option names can be used instead of long option names.
|
||||||
|
* When a compile option takes an argument, an assignment operator (\c =)
|
||||||
|
* is used to separate the compile option argument from the compile option
|
||||||
|
* name, e.g., \c "--gpu-architecture=compute_30".
|
||||||
|
* Alternatively, the compile option name and the argument can be specified in
|
||||||
|
* separate strings without an assignment operator, .e.g,
|
||||||
|
* \c "--gpu-architecture" \c "compute_30".
|
||||||
|
* Single-character short option names, such as \c -D, \c -U, and \c -I, do
|
||||||
|
* not require an assignment operator, and the compile option name and the
|
||||||
|
* argument can be present in the same string with or without spaces between
|
||||||
|
* them.
|
||||||
|
* For instance, \c "-D=<def>", \c "-D<def>", and \c "-D <def>" are all
|
||||||
|
* supported.
|
||||||
|
*
|
||||||
|
* The valid compiler options are:
|
||||||
|
*
|
||||||
|
* - Compilation targets
|
||||||
|
* - \c --gpu-architecture=\<arch\> (\c -arch)\n
|
||||||
|
* Specify the name of the class of GPU architectures for which the
|
||||||
|
* input must be compiled.\n
|
||||||
|
* - Valid <c>\<arch\></c>s:
|
||||||
|
* - \c compute_30
|
||||||
|
* - \c compute_32
|
||||||
|
* - \c compute_35
|
||||||
|
* - \c compute_37
|
||||||
|
* - \c compute_50
|
||||||
|
* - \c compute_52
|
||||||
|
* - \c compute_53
|
||||||
|
* - \c compute_60
|
||||||
|
* - \c compute_61
|
||||||
|
* - \c compute_62
|
||||||
|
* - \c compute_70
|
||||||
|
* - \c compute_72
|
||||||
|
* - Default: \c compute_30
|
||||||
|
* - Separate compilation / whole-program compilation
|
||||||
|
* - \c --device-c (\c -dc)\n
|
||||||
|
* Generate relocatable code that can be linked with other relocatable
|
||||||
|
* device code. It is equivalent to --relocatable-device-code=true.
|
||||||
|
* - \c --device-w (\c -dw)\n
|
||||||
|
* Generate non-relocatable code. It is equivalent to
|
||||||
|
* \c --relocatable-device-code=false.
|
||||||
|
* - \c --relocatable-device-code={true|false} (\c -rdc)\n
|
||||||
|
* Enable (disable) the generation of relocatable device code.
|
||||||
|
* - Default: \c false
|
||||||
|
* - Debugging support
|
||||||
|
* - \c --device-debug (\c -G)\n
|
||||||
|
* Generate debug information.
|
||||||
|
* - \c --generate-line-info (\c -lineinfo)\n
|
||||||
|
* Generate line-number information.
|
||||||
|
* - Code generation
|
||||||
|
* - \c --maxrregcount=\<N\> (\c -maxrregcount)\n
|
||||||
|
* Specify the maximum amount of registers that GPU functions can use.
|
||||||
|
* Until a function-specific limit, a higher value will generally
|
||||||
|
* increase the performance of individual GPU threads that execute this
|
||||||
|
* function. However, because thread registers are allocated from a
|
||||||
|
* global register pool on each GPU, a higher value of this option will
|
||||||
|
* also reduce the maximum thread block size, thereby reducing the amount
|
||||||
|
* of thread parallelism. Hence, a good maxrregcount value is the result
|
||||||
|
* of a trade-off. If this option is not specified, then no maximum is
|
||||||
|
* assumed. Value less than the minimum registers required by ABI will
|
||||||
|
* be bumped up by the compiler to ABI minimum limit.
|
||||||
|
* - \c --ftz={true|false} (\c -ftz)\n
|
||||||
|
* When performing single-precision floating-point operations, flush
|
||||||
|
* denormal values to zero or preserve denormal values.
|
||||||
|
* \c --use_fast_math implies \c --ftz=true.
|
||||||
|
* - Default: \c false
|
||||||
|
* - \c --prec-sqrt={true|false} (\c -prec-sqrt)\n
|
||||||
|
* For single-precision floating-point square root, use IEEE
|
||||||
|
* round-to-nearest mode or use a faster approximation.
|
||||||
|
* \c --use_fast_math implies \c --prec-sqrt=false.
|
||||||
|
* - Default: \c true
|
||||||
|
* - \c --prec-div={true|false} (\c -prec-div)\n
|
||||||
|
* For single-precision floating-point division and reciprocals, use IEEE
|
||||||
|
* round-to-nearest mode or use a faster approximation.
|
||||||
|
* \c --use_fast_math implies \c --prec-div=false.
|
||||||
|
* - Default: \c true
|
||||||
|
* - \c --fmad={true|false} (\c -fmad)\n
|
||||||
|
* Enables (disables) the contraction of floating-point multiplies and
|
||||||
|
* adds/subtracts into floating-point multiply-add operations (FMAD,
|
||||||
|
* FFMA, or DFMA). \c --use_fast_math implies \c --fmad=true.
|
||||||
|
* - Default: \c true
|
||||||
|
* - \c --use_fast_math (\c -use_fast_math)\n
|
||||||
|
* Make use of fast math operations.
|
||||||
|
* \c --use_fast_math implies \c --ftz=true \c --prec-div=false
|
||||||
|
* \c --prec-sqrt=false \c --fmad=true.
|
||||||
|
* - Preprocessing
|
||||||
|
* - \c --define-macro=\<def\> (\c -D)\n
|
||||||
|
* \c \<def\> can be either \c \<name\> or \c \<name=definitions\>.
|
||||||
|
* - \c \<name\> \n
|
||||||
|
* Predefine \c \<name\> as a macro with definition \c 1.
|
||||||
|
* - \c \<name\>=\<definition\> \n
|
||||||
|
* The contents of \c \<definition\> are tokenized and preprocessed
|
||||||
|
* as if they appeared during translation phase three in a \c \#define
|
||||||
|
* directive. In particular, the definition will be truncated by
|
||||||
|
* embedded new line characters.
|
||||||
|
* - \c --undefine-macro=\<def\> (\c -U)\n
|
||||||
|
* Cancel any previous definition of \c \<def\>.
|
||||||
|
* - \c --include-path=\<dir\> (\c -I)\n
|
||||||
|
* Add the directory \c \<dir\> to the list of directories to be
|
||||||
|
* searched for headers. These paths are searched after the list of
|
||||||
|
* headers given to ::nvrtcCreateProgram.
|
||||||
|
* - \c --pre-include=\<header\> (\c -include)\n
|
||||||
|
* Preinclude \c \<header\> during preprocessing.
|
||||||
|
* - Language Dialect
|
||||||
|
* - \c --std={c++11|c++14} (\c -std={c++11|c++14})\n
|
||||||
|
* Set language dialect to C++11 or C++14.
|
||||||
|
* - \c --builtin-move-forward={true|false} (\c -builtin-move-forward)\n
|
||||||
|
* Provide builtin definitions of \c std::move and \c std::forward,
|
||||||
|
* when C++11 language dialect is selected.
|
||||||
|
* - Default: \c true
|
||||||
|
* - \c --builtin-initializer-list={true|false}
|
||||||
|
* (\c -builtin-initializer-list)\n
|
||||||
|
* Provide builtin definitions of \c std::initializer_list class and
|
||||||
|
* member functions when C++11 language dialect is selected.
|
||||||
|
* - Default: \c true
|
||||||
|
* - Misc.
|
||||||
|
* - \c --disable-warnings (\c -w)\n
|
||||||
|
* Inhibit all warning messages.
|
||||||
|
* - \c --restrict (\c -restrict)\n
|
||||||
|
* Programmer assertion that all kernel pointer parameters are restrict
|
||||||
|
* pointers.
|
||||||
|
* - \c --device-as-default-execution-space
|
||||||
|
* (\c -default-device)\n
|
||||||
|
* Treat entities with no execution space annotation as \c __device__
|
||||||
|
* entities.
|
||||||
|
*
|
||||||
|
* \param [in] prog CUDA Runtime Compilation program.
|
||||||
|
* \param [in] numOptions Number of compiler options passed.
|
||||||
|
* \param [in] options Compiler options in the form of C string array.\n
|
||||||
|
* \p options can be \c NULL when \p numOptions is 0.
|
||||||
|
*
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
|
||||||
|
/* The utility function 'nvrtcGetTypeName' is not available by default. Define
|
||||||
|
the macro 'NVRTC_GET_TYPE_NAME' to a non-zero value to make it available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if NVRTC_GET_TYPE_NAME || __DOXYGEN_ONLY__
|
||||||
|
|
||||||
|
#if NVRTC_USE_CXXABI || __clang__ || __GNUC__ || __DOXYGEN_ONLY__
|
||||||
|
#include <cxxabi.h>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
#include <Windows.h>
|
||||||
|
#include <DbgHelp.h>
|
||||||
|
#endif /* NVRTC_USE_CXXABI || __clang__ || __GNUC__ */
|
||||||
|
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <typeinfo>
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************************************//**
|
||||||
|
*
|
||||||
|
* \defgroup hosthelper Host Helper
|
||||||
|
*
|
||||||
|
* NVRTC defines the following functions for easier interaction with host code.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup hosthelper
|
||||||
|
* \brief nvrtcGetTypeName stores the source level name of the template type argument
|
||||||
|
* T in the given std::string location.
|
||||||
|
*
|
||||||
|
* This function is only provided when the macro NVRTC_GET_TYPE_NAME is
|
||||||
|
* defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName
|
||||||
|
* function calls to extract the type name, when using gcc/clang or cl.exe compilers,
|
||||||
|
* respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR,
|
||||||
|
* otherwise *result is initialized with the extracted name.
|
||||||
|
*
|
||||||
|
* \param [in] result: pointer to std::string in which to store the type name.
|
||||||
|
* \return
|
||||||
|
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
|
||||||
|
* - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
nvrtcResult nvrtcGetTypeName(std::string *result)
|
||||||
|
{
|
||||||
|
const char *name = typeid(T).name();
|
||||||
|
|
||||||
|
#if USE_CXXABI || __clang__ || __GNUC__
|
||||||
|
int status;
|
||||||
|
char *undecorated_name = abi::__cxa_demangle(name, 0, 0, &status);
|
||||||
|
if (status == 0) {
|
||||||
|
*result = undecorated_name;
|
||||||
|
free(undecorated_name);
|
||||||
|
return NVRTC_SUCCESS;
|
||||||
|
}
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
char undecorated_name[4096];
|
||||||
|
if(UnDecorateSymbolName(name, undecorated_name,
|
||||||
|
sizeof(undecorated_name) / sizeof(*undecorated_name),
|
||||||
|
UNDNAME_COMPLETE) ) {
|
||||||
|
*result = undecorated_name;
|
||||||
|
return NVRTC_SUCCESS;
|
||||||
|
}
|
||||||
|
#endif /* USE_CXXABI || __clang__ || __GNUC__ */
|
||||||
|
return NVRTC_ERROR_INTERNAL_ERROR;
|
||||||
|
}
|
||||||
|
#endif /* NVRTC_GET_TYPE_NAME */
|
||||||
|
|
||||||
|
#endif /* __NVRTC_H__ */
|
119
include/external/CUDA/surface_types.h
vendored
Executable file
119
include/external/CUDA/surface_types.h
vendored
Executable file
@@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__SURFACE_TYPES_H__)
|
||||||
|
#define __SURFACE_TYPES_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "driver_types.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \addtogroup CUDART_TYPES
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define cudaSurfaceType1D 0x01
|
||||||
|
#define cudaSurfaceType2D 0x02
|
||||||
|
#define cudaSurfaceType3D 0x03
|
||||||
|
#define cudaSurfaceTypeCubemap 0x0C
|
||||||
|
#define cudaSurfaceType1DLayered 0xF1
|
||||||
|
#define cudaSurfaceType2DLayered 0xF2
|
||||||
|
#define cudaSurfaceTypeCubemapLayered 0xFC
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA Surface boundary modes
|
||||||
|
*/
|
||||||
|
enum __device_builtin__ cudaSurfaceBoundaryMode
|
||||||
|
{
|
||||||
|
cudaBoundaryModeZero = 0, /**< Zero boundary mode */
|
||||||
|
cudaBoundaryModeClamp = 1, /**< Clamp boundary mode */
|
||||||
|
cudaBoundaryModeTrap = 2 /**< Trap boundary mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA Surface format modes
|
||||||
|
*/
|
||||||
|
enum __device_builtin__ cudaSurfaceFormatMode
|
||||||
|
{
|
||||||
|
cudaFormatModeForced = 0, /**< Forced format mode */
|
||||||
|
cudaFormatModeAuto = 1 /**< Auto format mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA Surface reference
|
||||||
|
*/
|
||||||
|
struct __device_builtin__ surfaceReference
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Channel descriptor for surface reference
|
||||||
|
*/
|
||||||
|
struct cudaChannelFormatDesc channelDesc;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An opaque value that represents a CUDA Surface object
|
||||||
|
*/
|
||||||
|
typedef __device_builtin__ unsigned long long cudaSurfaceObject_t;
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
/** @} */ /* END CUDART_TYPES */
|
||||||
|
|
||||||
|
#endif /* !__SURFACE_TYPES_H__ */
|
217
include/external/CUDA/texture_types.h
vendored
Executable file
217
include/external/CUDA/texture_types.h
vendored
Executable file
@@ -0,0 +1,217 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__TEXTURE_TYPES_H__)
|
||||||
|
#define __TEXTURE_TYPES_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "driver_types.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \addtogroup CUDART_TYPES
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define cudaTextureType1D 0x01
|
||||||
|
#define cudaTextureType2D 0x02
|
||||||
|
#define cudaTextureType3D 0x03
|
||||||
|
#define cudaTextureTypeCubemap 0x0C
|
||||||
|
#define cudaTextureType1DLayered 0xF1
|
||||||
|
#define cudaTextureType2DLayered 0xF2
|
||||||
|
#define cudaTextureTypeCubemapLayered 0xFC
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA texture address modes
|
||||||
|
*/
|
||||||
|
enum __device_builtin__ cudaTextureAddressMode
|
||||||
|
{
|
||||||
|
cudaAddressModeWrap = 0, /**< Wrapping address mode */
|
||||||
|
cudaAddressModeClamp = 1, /**< Clamp to edge address mode */
|
||||||
|
cudaAddressModeMirror = 2, /**< Mirror address mode */
|
||||||
|
cudaAddressModeBorder = 3 /**< Border address mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA texture filter modes
|
||||||
|
*/
|
||||||
|
enum __device_builtin__ cudaTextureFilterMode
|
||||||
|
{
|
||||||
|
cudaFilterModePoint = 0, /**< Point filter mode */
|
||||||
|
cudaFilterModeLinear = 1 /**< Linear filter mode */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA texture read modes
|
||||||
|
*/
|
||||||
|
enum __device_builtin__ cudaTextureReadMode
|
||||||
|
{
|
||||||
|
cudaReadModeElementType = 0, /**< Read texture as specified element type */
|
||||||
|
cudaReadModeNormalizedFloat = 1 /**< Read texture as normalized float */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA texture reference
|
||||||
|
*/
|
||||||
|
struct __device_builtin__ textureReference
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Indicates whether texture reads are normalized or not
|
||||||
|
*/
|
||||||
|
int normalized;
|
||||||
|
/**
|
||||||
|
* Texture filter mode
|
||||||
|
*/
|
||||||
|
enum cudaTextureFilterMode filterMode;
|
||||||
|
/**
|
||||||
|
* Texture address mode for up to 3 dimensions
|
||||||
|
*/
|
||||||
|
enum cudaTextureAddressMode addressMode[3];
|
||||||
|
/**
|
||||||
|
* Channel descriptor for the texture reference
|
||||||
|
*/
|
||||||
|
struct cudaChannelFormatDesc channelDesc;
|
||||||
|
/**
|
||||||
|
* Perform sRGB->linear conversion during texture read
|
||||||
|
*/
|
||||||
|
int sRGB;
|
||||||
|
/**
|
||||||
|
* Limit to the anisotropy ratio
|
||||||
|
*/
|
||||||
|
unsigned int maxAnisotropy;
|
||||||
|
/**
|
||||||
|
* Mipmap filter mode
|
||||||
|
*/
|
||||||
|
enum cudaTextureFilterMode mipmapFilterMode;
|
||||||
|
/**
|
||||||
|
* Offset applied to the supplied mipmap level
|
||||||
|
*/
|
||||||
|
float mipmapLevelBias;
|
||||||
|
/**
|
||||||
|
* Lower end of the mipmap level range to clamp access to
|
||||||
|
*/
|
||||||
|
float minMipmapLevelClamp;
|
||||||
|
/**
|
||||||
|
* Upper end of the mipmap level range to clamp access to
|
||||||
|
*/
|
||||||
|
float maxMipmapLevelClamp;
|
||||||
|
int __cudaReserved[15];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CUDA texture descriptor
|
||||||
|
*/
|
||||||
|
struct __device_builtin__ cudaTextureDesc
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Texture address mode for up to 3 dimensions
|
||||||
|
*/
|
||||||
|
enum cudaTextureAddressMode addressMode[3];
|
||||||
|
/**
|
||||||
|
* Texture filter mode
|
||||||
|
*/
|
||||||
|
enum cudaTextureFilterMode filterMode;
|
||||||
|
/**
|
||||||
|
* Texture read mode
|
||||||
|
*/
|
||||||
|
enum cudaTextureReadMode readMode;
|
||||||
|
/**
|
||||||
|
* Perform sRGB->linear conversion during texture read
|
||||||
|
*/
|
||||||
|
int sRGB;
|
||||||
|
/**
|
||||||
|
* Texture Border Color
|
||||||
|
*/
|
||||||
|
float borderColor[4];
|
||||||
|
/**
|
||||||
|
* Indicates whether texture reads are normalized or not
|
||||||
|
*/
|
||||||
|
int normalizedCoords;
|
||||||
|
/**
|
||||||
|
* Limit to the anisotropy ratio
|
||||||
|
*/
|
||||||
|
unsigned int maxAnisotropy;
|
||||||
|
/**
|
||||||
|
* Mipmap filter mode
|
||||||
|
*/
|
||||||
|
enum cudaTextureFilterMode mipmapFilterMode;
|
||||||
|
/**
|
||||||
|
* Offset applied to the supplied mipmap level
|
||||||
|
*/
|
||||||
|
float mipmapLevelBias;
|
||||||
|
/**
|
||||||
|
* Lower end of the mipmap level range to clamp access to
|
||||||
|
*/
|
||||||
|
float minMipmapLevelClamp;
|
||||||
|
/**
|
||||||
|
* Upper end of the mipmap level range to clamp access to
|
||||||
|
*/
|
||||||
|
float maxMipmapLevelClamp;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An opaque value that represents a CUDA texture object
|
||||||
|
*/
|
||||||
|
typedef __device_builtin__ unsigned long long cudaTextureObject_t;
|
||||||
|
|
||||||
|
/** @} */
|
||||||
|
/** @} */ /* END CUDART_TYPES */
|
||||||
|
|
||||||
|
#endif /* !__TEXTURE_TYPES_H__ */
|
177
include/external/CUDA/vector_functions.h
vendored
Executable file
177
include/external/CUDA/vector_functions.h
vendored
Executable file
@@ -0,0 +1,177 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__VECTOR_FUNCTIONS_H__)
|
||||||
|
#define __VECTOR_FUNCTIONS_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "builtin_types.h"
|
||||||
|
#include "host_defines.h"
|
||||||
|
#include "vector_types.h"
|
||||||
|
|
||||||
|
#if defined(__CUDACC_RTC__)
|
||||||
|
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||||
|
#else /* !__CUDACC_RTC__ */
|
||||||
|
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||||
|
#endif /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z);
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w);
|
||||||
|
|
||||||
|
#undef __VECTOR_FUNCTIONS_DECL__
|
||||||
|
|
||||||
|
#if !defined(__CUDACC_RTC__)
|
||||||
|
#include "vector_functions.hpp"
|
||||||
|
#endif /* !__CUDACC_RTC__ */
|
||||||
|
|
||||||
|
#endif /* !__VECTOR_FUNCTIONS_H__ */
|
318
include/external/CUDA/vector_functions.hpp
vendored
Executable file
318
include/external/CUDA/vector_functions.hpp
vendored
Executable file
@@ -0,0 +1,318 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__VECTOR_FUNCTIONS_HPP__)
|
||||||
|
#define __VECTOR_FUNCTIONS_HPP__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "builtin_types.h"
|
||||||
|
#include "host_defines.h"
|
||||||
|
#include "vector_types.h"
|
||||||
|
|
||||||
|
#if defined(__CUDACC_RTC__)
|
||||||
|
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||||
|
#else /* !__CUDACC_RTC__ */
|
||||||
|
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||||
|
#endif /* __CUDACC_RTC__ */
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x)
|
||||||
|
{
|
||||||
|
char1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x)
|
||||||
|
{
|
||||||
|
uchar1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y)
|
||||||
|
{
|
||||||
|
char2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y)
|
||||||
|
{
|
||||||
|
uchar2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z)
|
||||||
|
{
|
||||||
|
char3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z)
|
||||||
|
{
|
||||||
|
uchar3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w)
|
||||||
|
{
|
||||||
|
char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w)
|
||||||
|
{
|
||||||
|
uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x)
|
||||||
|
{
|
||||||
|
short1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x)
|
||||||
|
{
|
||||||
|
ushort1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y)
|
||||||
|
{
|
||||||
|
short2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y)
|
||||||
|
{
|
||||||
|
ushort2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z)
|
||||||
|
{
|
||||||
|
short3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z)
|
||||||
|
{
|
||||||
|
ushort3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w)
|
||||||
|
{
|
||||||
|
short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
|
||||||
|
{
|
||||||
|
ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x)
|
||||||
|
{
|
||||||
|
int1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x)
|
||||||
|
{
|
||||||
|
uint1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y)
|
||||||
|
{
|
||||||
|
int2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y)
|
||||||
|
{
|
||||||
|
uint2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z)
|
||||||
|
{
|
||||||
|
int3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z)
|
||||||
|
{
|
||||||
|
uint3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w)
|
||||||
|
{
|
||||||
|
int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
|
||||||
|
{
|
||||||
|
uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x)
|
||||||
|
{
|
||||||
|
long1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x)
|
||||||
|
{
|
||||||
|
ulong1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y)
|
||||||
|
{
|
||||||
|
long2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y)
|
||||||
|
{
|
||||||
|
ulong2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z)
|
||||||
|
{
|
||||||
|
long3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z)
|
||||||
|
{
|
||||||
|
ulong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w)
|
||||||
|
{
|
||||||
|
long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w)
|
||||||
|
{
|
||||||
|
ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x)
|
||||||
|
{
|
||||||
|
float1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y)
|
||||||
|
{
|
||||||
|
float2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z)
|
||||||
|
{
|
||||||
|
float3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w)
|
||||||
|
{
|
||||||
|
float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x)
|
||||||
|
{
|
||||||
|
longlong1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x)
|
||||||
|
{
|
||||||
|
ulonglong1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y)
|
||||||
|
{
|
||||||
|
longlong2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y)
|
||||||
|
{
|
||||||
|
ulonglong2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z)
|
||||||
|
{
|
||||||
|
longlong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z)
|
||||||
|
{
|
||||||
|
ulonglong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w)
|
||||||
|
{
|
||||||
|
longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w)
|
||||||
|
{
|
||||||
|
ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x)
|
||||||
|
{
|
||||||
|
double1 t; t.x = x; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y)
|
||||||
|
{
|
||||||
|
double2 t; t.x = x; t.y = y; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z)
|
||||||
|
{
|
||||||
|
double3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w)
|
||||||
|
{
|
||||||
|
double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef __VECTOR_FUNCTIONS_DECL__
|
||||||
|
|
||||||
|
#endif /* !__VECTOR_FUNCTIONS_HPP__ */
|
||||||
|
|
425
include/external/CUDA/vector_types.h
vendored
Executable file
425
include/external/CUDA/vector_types.h
vendored
Executable file
@@ -0,0 +1,425 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTICE TO LICENSEE:
|
||||||
|
*
|
||||||
|
* This source code and/or documentation ("Licensed Deliverables") are
|
||||||
|
* subject to NVIDIA intellectual property rights under U.S. and
|
||||||
|
* international Copyright laws.
|
||||||
|
*
|
||||||
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||||
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||||
|
* conditions of a form of NVIDIA software license agreement by and
|
||||||
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||||
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||||
|
* the contrary in the License Agreement, reproduction or disclosure
|
||||||
|
* of the Licensed Deliverables to any third party without the express
|
||||||
|
* written consent of NVIDIA is prohibited.
|
||||||
|
*
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||||
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||||
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||||
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||||
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||||
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||||
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||||
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||||
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||||
|
* OF THESE LICENSED DELIVERABLES.
|
||||||
|
*
|
||||||
|
* U.S. Government End Users. These Licensed Deliverables are a
|
||||||
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||||
|
* 1995), consisting of "commercial computer software" and "commercial
|
||||||
|
* computer software documentation" as such terms are used in 48
|
||||||
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||||
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||||
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||||
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||||
|
* only those rights set forth herein.
|
||||||
|
*
|
||||||
|
* Any use of the Licensed Deliverables in individual and commercial
|
||||||
|
* software must include, in the user documentation and internal
|
||||||
|
* comments to the code, the above Disclaimer and U.S. Government End
|
||||||
|
* Users Notice.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__VECTOR_TYPES_H__)
|
||||||
|
#define __VECTOR_TYPES_H__
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include "host_defines.h"
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) && \
|
||||||
|
defined(_WIN32) && !defined(_WIN64)
|
||||||
|
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable: 4201 4408)
|
||||||
|
|
||||||
|
#define __cuda_builtin_vector_align8(tag, members) \
|
||||||
|
struct __device_builtin__ tag \
|
||||||
|
{ \
|
||||||
|
union \
|
||||||
|
{ \
|
||||||
|
struct { members }; \
|
||||||
|
struct { long long int :1,:0; }; \
|
||||||
|
}; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* !__CUDACC__ && !__CUDACC_RTC__ && _WIN32 && !_WIN64 */
|
||||||
|
|
||||||
|
#define __cuda_builtin_vector_align8(tag, members) \
|
||||||
|
struct __device_builtin__ __align__(8) tag \
|
||||||
|
{ \
|
||||||
|
members \
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !__CUDACC__ && !__CUDACC_RTC__ && _WIN32 && !_WIN64 */
|
||||||
|
|
||||||
|
struct __device_builtin__ char1
|
||||||
|
{
|
||||||
|
signed char x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ uchar1
|
||||||
|
{
|
||||||
|
unsigned char x;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(2) char2
|
||||||
|
{
|
||||||
|
signed char x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(2) uchar2
|
||||||
|
{
|
||||||
|
unsigned char x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ char3
|
||||||
|
{
|
||||||
|
signed char x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ uchar3
|
||||||
|
{
|
||||||
|
unsigned char x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(4) char4
|
||||||
|
{
|
||||||
|
signed char x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(4) uchar4
|
||||||
|
{
|
||||||
|
unsigned char x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ short1
|
||||||
|
{
|
||||||
|
short x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ushort1
|
||||||
|
{
|
||||||
|
unsigned short x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(4) short2
|
||||||
|
{
|
||||||
|
short x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(4) ushort2
|
||||||
|
{
|
||||||
|
unsigned short x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ short3
|
||||||
|
{
|
||||||
|
short x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ushort3
|
||||||
|
{
|
||||||
|
unsigned short x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
__cuda_builtin_vector_align8(short4, short x; short y; short z; short w;);
|
||||||
|
__cuda_builtin_vector_align8(ushort4, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
|
||||||
|
|
||||||
|
struct __device_builtin__ int1
|
||||||
|
{
|
||||||
|
int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ uint1
|
||||||
|
{
|
||||||
|
unsigned int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
__cuda_builtin_vector_align8(int2, int x; int y;);
|
||||||
|
__cuda_builtin_vector_align8(uint2, unsigned int x; unsigned int y;);
|
||||||
|
|
||||||
|
struct __device_builtin__ int3
|
||||||
|
{
|
||||||
|
int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ uint3
|
||||||
|
{
|
||||||
|
unsigned int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) int4
|
||||||
|
{
|
||||||
|
int x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) uint4
|
||||||
|
{
|
||||||
|
unsigned int x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ long1
|
||||||
|
{
|
||||||
|
long int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ulong1
|
||||||
|
{
|
||||||
|
unsigned long x;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
__cuda_builtin_vector_align8(long2, long int x; long int y;);
|
||||||
|
__cuda_builtin_vector_align8(ulong2, unsigned long int x; unsigned long int y;);
|
||||||
|
#else /* !_WIN32 */
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(2*sizeof(long int)) long2
|
||||||
|
{
|
||||||
|
long int x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __align__(2*sizeof(unsigned long int)) ulong2
|
||||||
|
{
|
||||||
|
unsigned long int x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
|
struct __device_builtin__ long3
|
||||||
|
{
|
||||||
|
long int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ulong3
|
||||||
|
{
|
||||||
|
unsigned long int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) long4
|
||||||
|
{
|
||||||
|
long int x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) ulong4
|
||||||
|
{
|
||||||
|
unsigned long int x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ float1
|
||||||
|
{
|
||||||
|
float x;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !defined(__CUDACC__) && defined(__arm__) && \
|
||||||
|
defined(__ARM_PCS_VFP) && __GNUC__ == 4 && __GNUC_MINOR__ == 6
|
||||||
|
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-pedantic"
|
||||||
|
|
||||||
|
struct __device_builtin__ __attribute__((aligned(8))) float2
|
||||||
|
{
|
||||||
|
float x; float y; float __cuda_gnu_arm_ice_workaround[0];
|
||||||
|
};
|
||||||
|
|
||||||
|
#pragma GCC poison __cuda_gnu_arm_ice_workaround
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
#else /* !__CUDACC__ && __arm__ && __ARM_PCS_VFP &&
|
||||||
|
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||||
|
|
||||||
|
__cuda_builtin_vector_align8(float2, float x; float y;);
|
||||||
|
|
||||||
|
#endif /* !__CUDACC__ && __arm__ && __ARM_PCS_VFP &&
|
||||||
|
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||||
|
|
||||||
|
struct __device_builtin__ float3
|
||||||
|
{
|
||||||
|
float x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) float4
|
||||||
|
{
|
||||||
|
float x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ longlong1
|
||||||
|
{
|
||||||
|
long long int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ulonglong1
|
||||||
|
{
|
||||||
|
unsigned long long int x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) longlong2
|
||||||
|
{
|
||||||
|
long long int x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) ulonglong2
|
||||||
|
{
|
||||||
|
unsigned long long int x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ longlong3
|
||||||
|
{
|
||||||
|
long long int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ ulonglong3
|
||||||
|
{
|
||||||
|
unsigned long long int x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) longlong4
|
||||||
|
{
|
||||||
|
long long int x, y, z ,w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) ulonglong4
|
||||||
|
{
|
||||||
|
unsigned long long int x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ double1
|
||||||
|
{
|
||||||
|
double x;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) double2
|
||||||
|
{
|
||||||
|
double x, y;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ double3
|
||||||
|
{
|
||||||
|
double x, y, z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct __device_builtin__ __builtin_align__(16) double4
|
||||||
|
{
|
||||||
|
double x, y, z, w;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !defined(__CUDACC__) && defined(_WIN32) && !defined(_WIN64)
|
||||||
|
|
||||||
|
#pragma warning(pop)
|
||||||
|
|
||||||
|
#endif /* !__CUDACC__ && _WIN32 && !_WIN64 */
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
typedef __device_builtin__ struct char1 char1;
|
||||||
|
typedef __device_builtin__ struct uchar1 uchar1;
|
||||||
|
typedef __device_builtin__ struct char2 char2;
|
||||||
|
typedef __device_builtin__ struct uchar2 uchar2;
|
||||||
|
typedef __device_builtin__ struct char3 char3;
|
||||||
|
typedef __device_builtin__ struct uchar3 uchar3;
|
||||||
|
typedef __device_builtin__ struct char4 char4;
|
||||||
|
typedef __device_builtin__ struct uchar4 uchar4;
|
||||||
|
typedef __device_builtin__ struct short1 short1;
|
||||||
|
typedef __device_builtin__ struct ushort1 ushort1;
|
||||||
|
typedef __device_builtin__ struct short2 short2;
|
||||||
|
typedef __device_builtin__ struct ushort2 ushort2;
|
||||||
|
typedef __device_builtin__ struct short3 short3;
|
||||||
|
typedef __device_builtin__ struct ushort3 ushort3;
|
||||||
|
typedef __device_builtin__ struct short4 short4;
|
||||||
|
typedef __device_builtin__ struct ushort4 ushort4;
|
||||||
|
typedef __device_builtin__ struct int1 int1;
|
||||||
|
typedef __device_builtin__ struct uint1 uint1;
|
||||||
|
typedef __device_builtin__ struct int2 int2;
|
||||||
|
typedef __device_builtin__ struct uint2 uint2;
|
||||||
|
typedef __device_builtin__ struct int3 int3;
|
||||||
|
typedef __device_builtin__ struct uint3 uint3;
|
||||||
|
typedef __device_builtin__ struct int4 int4;
|
||||||
|
typedef __device_builtin__ struct uint4 uint4;
|
||||||
|
typedef __device_builtin__ struct long1 long1;
|
||||||
|
typedef __device_builtin__ struct ulong1 ulong1;
|
||||||
|
typedef __device_builtin__ struct long2 long2;
|
||||||
|
typedef __device_builtin__ struct ulong2 ulong2;
|
||||||
|
typedef __device_builtin__ struct long3 long3;
|
||||||
|
typedef __device_builtin__ struct ulong3 ulong3;
|
||||||
|
typedef __device_builtin__ struct long4 long4;
|
||||||
|
typedef __device_builtin__ struct ulong4 ulong4;
|
||||||
|
typedef __device_builtin__ struct float1 float1;
|
||||||
|
typedef __device_builtin__ struct float2 float2;
|
||||||
|
typedef __device_builtin__ struct float3 float3;
|
||||||
|
typedef __device_builtin__ struct float4 float4;
|
||||||
|
typedef __device_builtin__ struct longlong1 longlong1;
|
||||||
|
typedef __device_builtin__ struct ulonglong1 ulonglong1;
|
||||||
|
typedef __device_builtin__ struct longlong2 longlong2;
|
||||||
|
typedef __device_builtin__ struct ulonglong2 ulonglong2;
|
||||||
|
typedef __device_builtin__ struct longlong3 longlong3;
|
||||||
|
typedef __device_builtin__ struct ulonglong3 ulonglong3;
|
||||||
|
typedef __device_builtin__ struct longlong4 longlong4;
|
||||||
|
typedef __device_builtin__ struct ulonglong4 ulonglong4;
|
||||||
|
typedef __device_builtin__ struct double1 double1;
|
||||||
|
typedef __device_builtin__ struct double2 double2;
|
||||||
|
typedef __device_builtin__ struct double3 double3;
|
||||||
|
typedef __device_builtin__ struct double4 double4;
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
struct __device_builtin__ dim3
|
||||||
|
{
|
||||||
|
unsigned int x, y, z;
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
|
||||||
|
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
|
||||||
|
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef __device_builtin__ struct dim3 dim3;
|
||||||
|
|
||||||
|
#undef __cuda_builtin_vector_align8
|
||||||
|
|
||||||
|
#endif /* !__VECTOR_TYPES_H__ */
|
56
include/tools/sys/getenv.hpp
Executable file
56
include/tools/sys/getenv.hpp
Executable file
@@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is part of ISAAC.
|
||||||
|
*
|
||||||
|
* ISAAC is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this library; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
* MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_TOOLS_SYS_GETENV_HPP
|
||||||
|
#define TDL_TOOLS_SYS_GETENV_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace tools
|
||||||
|
{
|
||||||
|
|
||||||
|
inline std::string getenv(const char * name)
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
char* cache_path = 0;
|
||||||
|
std::size_t sz = 0;
|
||||||
|
_dupenv_s(&cache_path, &sz, name);
|
||||||
|
#else
|
||||||
|
const char * cache_path = std::getenv(name);
|
||||||
|
#endif
|
||||||
|
if(!cache_path)
|
||||||
|
return "";
|
||||||
|
std::string result(cache_path);
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
free(cache_path);
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
68
include/tools/sys/mkdir.hpp
Executable file
68
include/tools/sys/mkdir.hpp
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is part of ISAAC.
|
||||||
|
*
|
||||||
|
* ISAAC is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with this library; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
* MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TDL_TOOLS_SYS_MKDIR_HPP
|
||||||
|
#define TDL_TOOLS_SYS_MKDIR_HPP
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#include <direct.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace tools
|
||||||
|
{
|
||||||
|
|
||||||
|
inline int mkdir(std::string const & path)
|
||||||
|
{
|
||||||
|
#if defined(_WIN32)
|
||||||
|
return _mkdir(path.c_str());
|
||||||
|
#else
|
||||||
|
return ::mkdir(path.c_str(), 0777);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int mkpath(std::string const & path)
|
||||||
|
{
|
||||||
|
int status = 0;
|
||||||
|
size_t pp = 0;
|
||||||
|
size_t sp;
|
||||||
|
while ((sp = path.find('/', pp)) != std::string::npos)
|
||||||
|
{
|
||||||
|
if (sp != pp){
|
||||||
|
status = mkdir(path.substr(0, sp));
|
||||||
|
}
|
||||||
|
pp = sp + 1;
|
||||||
|
}
|
||||||
|
return (status==0 || errno==EEXIST)?0:-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
196
lib/driver/backend.cpp
Executable file
196
lib/driver/backend.cpp
Executable file
@@ -0,0 +1,196 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
#include "driver/backend.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/stream.h"
|
||||||
|
#include "driver/kernel.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
/*-----------------------------------*/
|
||||||
|
//---------- Modules ----------------*/
|
||||||
|
/*-----------------------------------*/
|
||||||
|
|
||||||
|
void backend::modules::release(){
|
||||||
|
for(auto & x: cache_)
|
||||||
|
delete x.second;
|
||||||
|
cache_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Module& backend::modules::get(Stream const & stream, std::string const & name, std::string const & src){
|
||||||
|
std::tuple<Stream, std::string> key(stream, name);
|
||||||
|
if(cache_.find(key)==cache_.end())
|
||||||
|
return *cache_.insert(std::make_pair(key, new Module(stream.context(), src))).first->second;
|
||||||
|
return *cache_.at(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<std::tuple<Stream, std::string>, Module * > backend::modules::cache_;
|
||||||
|
|
||||||
|
/*-----------------------------------*/
|
||||||
|
//----------- Kernels --------------*/
|
||||||
|
/*-----------------------------------*/
|
||||||
|
|
||||||
|
void backend::kernels::release(){
|
||||||
|
for(auto & x: cache_)
|
||||||
|
delete x.second;
|
||||||
|
cache_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Kernel & backend::kernels::get(Module const & program, std::string const & name){
|
||||||
|
std::tuple<Module, std::string> key(program, name);
|
||||||
|
if(cache_.find(key)==cache_.end())
|
||||||
|
return *cache_.insert(std::make_pair(key, new Kernel(program, name.c_str()))).first->second;
|
||||||
|
return *cache_.at(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<std::tuple<Module, std::string>, Kernel * > backend::kernels::cache_;
|
||||||
|
|
||||||
|
/*-----------------------------------*/
|
||||||
|
//------------ Queues --------------*/
|
||||||
|
/*-----------------------------------*/
|
||||||
|
|
||||||
|
void backend::streams::init(std::list<const Context *> const & contexts){
|
||||||
|
for(Context const * ctx : contexts)
|
||||||
|
if(cache_.find(*ctx)==cache_.end())
|
||||||
|
cache_.insert(std::make_pair(*ctx, std::vector<Stream*>{new Stream(*ctx)}));
|
||||||
|
}
|
||||||
|
|
||||||
|
void backend::streams::release(){
|
||||||
|
for(auto & x: cache_)
|
||||||
|
for(auto & y: x.second)
|
||||||
|
delete y;
|
||||||
|
cache_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Stream & backend::streams::get_default()
|
||||||
|
{ return get(contexts::get_default(), 0); }
|
||||||
|
|
||||||
|
Stream & backend::streams::get(Context const & context, unsigned int id){
|
||||||
|
init(std::list<Context const *>(1,&context));
|
||||||
|
for(auto & x : cache_)
|
||||||
|
if(x.first==context)
|
||||||
|
return *x.second[id];
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
void backend::streams::get(Context const & context, std::vector<Stream*> & queues){
|
||||||
|
init(std::list<Context const *>(1,&context));
|
||||||
|
queues = cache_.at(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<Context, std::vector<Stream*> > backend::streams::cache_;
|
||||||
|
|
||||||
|
/*-----------------------------------*/
|
||||||
|
//------------ Contexts ------------*/
|
||||||
|
/*-----------------------------------*/
|
||||||
|
|
||||||
|
void backend::contexts::init(std::vector<Platform> const & platforms){
|
||||||
|
for(Platform const & platform: platforms){
|
||||||
|
for(Device const & device: platform.devices())
|
||||||
|
cache_.push_back(new Context(device));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void backend::contexts::release(){
|
||||||
|
for(auto & x: cache_)
|
||||||
|
delete x;
|
||||||
|
cache_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Context const & backend::contexts::get_default(){
|
||||||
|
backend::init();
|
||||||
|
std::list<Context const *>::const_iterator it = cache_.begin();
|
||||||
|
std::advance(it, default_device);
|
||||||
|
return **it;
|
||||||
|
}
|
||||||
|
|
||||||
|
void backend::contexts::get(std::list<Context const *> & contexts){
|
||||||
|
backend::init();
|
||||||
|
contexts = cache_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::list<Context const *> backend::contexts::cache_;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*-----------------------------------*/
|
||||||
|
//------------ General -------------*/
|
||||||
|
/*-----------------------------------*/
|
||||||
|
|
||||||
|
std::vector<Device> backend::devices(){
|
||||||
|
std::vector<Platform> platforms = backend::platforms();
|
||||||
|
std::vector<Device> result;
|
||||||
|
for(Platform const & platform: platforms){
|
||||||
|
auto devices = platform.devices();
|
||||||
|
result.insert(result.end(), devices.begin(), devices.end());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Platform> backend::platforms(){
|
||||||
|
std::vector<Platform> platforms;
|
||||||
|
//if CUDA is here
|
||||||
|
if(dispatch::cuinit())
|
||||||
|
platforms.push_back(Platform());
|
||||||
|
if(platforms.empty())
|
||||||
|
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
|
||||||
|
return platforms;
|
||||||
|
}
|
||||||
|
|
||||||
|
void backend::synchronize(Context const & context){
|
||||||
|
for(Stream * queue: streams::cache_.at(context))
|
||||||
|
queue->synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void backend::release(){
|
||||||
|
backend::kernels::release();
|
||||||
|
// backend::programs::release();
|
||||||
|
backend::streams::release();
|
||||||
|
backend::contexts::release();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void backend::init(){
|
||||||
|
if(!contexts::cache_.empty())
|
||||||
|
return;
|
||||||
|
std::vector<Platform> platforms = backend::platforms();
|
||||||
|
contexts::init(platforms);
|
||||||
|
streams::init(contexts::cache_);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int backend::default_device = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
60
lib/driver/buffer.cpp
Executable file
60
lib/driver/buffer.cpp
Executable file
@@ -0,0 +1,60 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include "driver/stream.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
Buffer::Buffer(Context const & context, size_t size) : context_(context)
|
||||||
|
{
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
dispatch::cuMemAlloc(&*cu_, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
Buffer::Buffer(Context const & context, CUdeviceptr cu, bool take_ownership):
|
||||||
|
context_(context), cu_(cu, take_ownership)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
void Buffer::set_zero(Stream const & queue, size_t size)
|
||||||
|
{
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
dispatch::cuMemsetD8Async(*cu_, 0, size, queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
Handle<CUdeviceptr> const & Buffer::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
Handle<CUdeviceptr> & Buffer::cu()
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
99
lib/driver/context.cpp
Executable file
99
lib/driver/context.cpp
Executable file
@@ -0,0 +1,99 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/module.h"
|
||||||
|
|
||||||
|
#include "tools/sys/getenv.hpp"
|
||||||
|
#include "tools/sys/mkdir.hpp"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
std::string Context::get_cache_path(){
|
||||||
|
//user-specified cache path
|
||||||
|
std::string result = tools::getenv("ISAAC_CACHE_PATH");
|
||||||
|
if(!result.empty()){
|
||||||
|
if(tools::mkpath(result)==0)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
//create in home
|
||||||
|
result = tools::getenv("HOME");
|
||||||
|
if(!result.empty())
|
||||||
|
{
|
||||||
|
result = result + "/.isaac/cache/";
|
||||||
|
if(tools::mkpath(result)==0)
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
//couldn't find a directory
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
CUdevice Context::device(CUcontext context){
|
||||||
|
dispatch::cuCtxPushCurrent_v2(context);
|
||||||
|
CUdevice res;
|
||||||
|
dispatch::cuCtxGetDevice(&res);
|
||||||
|
dispatch::cuCtxPopCurrent_v2(NULL);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Context::Context(CUcontext context, bool take_ownership): cu_(context, take_ownership), device_(device(context), false), cache_path_(get_cache_path())
|
||||||
|
{ }
|
||||||
|
|
||||||
|
Context::Context(Device const & device): device_(device), cache_path_(get_cache_path())
|
||||||
|
{
|
||||||
|
dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, (CUdevice)device);
|
||||||
|
dispatch::cuCtxPopCurrent_v2(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
Device const & Context::device() const
|
||||||
|
{ return device_; }
|
||||||
|
|
||||||
|
std::string const & Context::cache_path() const
|
||||||
|
{ return cache_path_; }
|
||||||
|
|
||||||
|
Handle<CUcontext> const & Context::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
/* Context Switcher */
|
||||||
|
ContextSwitcher::ContextSwitcher(Context const & ctx): ctx_(ctx)
|
||||||
|
{
|
||||||
|
dispatch::cuCtxPushCurrent_v2(ctx_);
|
||||||
|
}
|
||||||
|
|
||||||
|
ContextSwitcher::~ContextSwitcher()
|
||||||
|
{
|
||||||
|
CUcontext tmp;
|
||||||
|
dispatch::cuCtxPopCurrent_v2(&tmp);
|
||||||
|
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
197
lib/driver/device.cpp
Executable file
197
lib/driver/device.cpp
Executable file
@@ -0,0 +1,197 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "driver/device.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
/* Architecture [NVidia] */
|
||||||
|
Device::Architecture Device::nv_arch(std::pair<unsigned int, unsigned int> sm) const{
|
||||||
|
switch(sm.first)
|
||||||
|
{
|
||||||
|
case 7:
|
||||||
|
switch(sm.second)
|
||||||
|
{
|
||||||
|
case 0: return Architecture::SM_7_0;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 6:
|
||||||
|
switch(sm.second)
|
||||||
|
{
|
||||||
|
case 0: return Architecture::SM_6_0;
|
||||||
|
case 1: return Architecture::SM_6_1;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
switch(sm.second)
|
||||||
|
{
|
||||||
|
case 0: return Architecture::SM_5_0;
|
||||||
|
case 2: return Architecture::SM_5_2;
|
||||||
|
default: return Architecture::UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
switch(sm.second)
|
||||||
|
{
|
||||||
|
case 0: return Architecture::SM_3_0;
|
||||||
|
case 5: return Architecture::SM_3_5;
|
||||||
|
case 7: return Architecture::SM_3_7;
|
||||||
|
default: return Architecture::UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
switch(sm.second)
|
||||||
|
{
|
||||||
|
case 0: return Architecture::SM_2_0;
|
||||||
|
case 1: return Architecture::SM_2_1;
|
||||||
|
default: return Architecture::UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
default: return Architecture::UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<CUdevice_attribute attr>
|
||||||
|
int Device::cuGetInfo() const{
|
||||||
|
int res;
|
||||||
|
dispatch::cuDeviceGetAttribute(&res, attr, *cu_);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvmlDevice_t Device::nvml_device() const{
|
||||||
|
std::map<std::string, nvmlDevice_t> map;
|
||||||
|
std::string key = pci_bus_id();
|
||||||
|
if(map.find(key)==map.end()){
|
||||||
|
nvmlDevice_t device;
|
||||||
|
dispatch::nvmlDeviceGetHandleByPciBusId_v2(key.c_str(), &device);
|
||||||
|
return map.insert(std::make_pair(key, device)).first->second;
|
||||||
|
}
|
||||||
|
return map.at(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Architecture */
|
||||||
|
Device::Architecture Device::architecture() const
|
||||||
|
{ return nv_arch(compute_capability()); }
|
||||||
|
|
||||||
|
/* Attributes */
|
||||||
|
size_t Device::address_bits() const
|
||||||
|
{ return sizeof(size_t)*8; }
|
||||||
|
|
||||||
|
driver::Platform Device::platform() const
|
||||||
|
{ return Platform(); }
|
||||||
|
|
||||||
|
std::string Device::name() const{
|
||||||
|
char tmp[128];
|
||||||
|
dispatch::cuDeviceGetName(tmp, 128, *cu_);
|
||||||
|
return std::string(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Device::pci_bus_id() const{
|
||||||
|
char tmp[128];
|
||||||
|
dispatch::cuDeviceGetPCIBusId(tmp, 128, *cu_);
|
||||||
|
return std::string(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Device::interpret_as(std::pair<size_t, size_t> cc){
|
||||||
|
interpreted_as_ = std::make_shared<std::pair<size_t, size_t>>(cc);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<size_t, size_t> Device::compute_capability() const{
|
||||||
|
if(interpreted_as_)
|
||||||
|
return *interpreted_as_;
|
||||||
|
size_t _major = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>();
|
||||||
|
size_t _minor = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
|
||||||
|
return std::make_pair(_major, _minor);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Device::max_threads_per_block() const
|
||||||
|
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>(); }
|
||||||
|
|
||||||
|
size_t Device::max_shared_memory() const
|
||||||
|
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>(); }
|
||||||
|
|
||||||
|
size_t Device::warp_size() const
|
||||||
|
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>(); }
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<size_t> Device::max_block_dim() const{
|
||||||
|
std::vector<size_t> result(3);
|
||||||
|
result[0] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X>();
|
||||||
|
result[1] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y>();
|
||||||
|
result[2] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z>();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Device::current_sm_clock() const{
|
||||||
|
unsigned int result;
|
||||||
|
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Device::max_sm_clock() const{
|
||||||
|
unsigned int result;
|
||||||
|
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t Device::current_mem_clock() const{
|
||||||
|
unsigned int result;
|
||||||
|
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Device::max_mem_clock() const{
|
||||||
|
unsigned int result;
|
||||||
|
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Infos */
|
||||||
|
std::string Device::infos() const{
|
||||||
|
std::ostringstream oss;
|
||||||
|
std::vector<size_t> max_wi_sizes = max_block_dim();
|
||||||
|
oss << "Platform: " << platform().name() << std::endl;
|
||||||
|
oss << "Name: " << name() << std::endl;
|
||||||
|
oss << "Maximum total work-group size: " << max_threads_per_block() << std::endl;
|
||||||
|
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
|
||||||
|
oss << "Local memory size: " << max_shared_memory() << std::endl;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
Handle<CUdevice> const & Device::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
363
lib/driver/dispatch.cpp
Executable file
363
lib/driver/dispatch.cpp
Executable file
@@ -0,0 +1,363 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include "driver/dispatch.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
//Helpers for function definition
|
||||||
|
#define DEFINE0(init, hlib, ret, fname) ret dispatch::fname()\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname); }
|
||||||
|
|
||||||
|
#define DEFINE1(init, hlib, ret, fname, t1) ret dispatch::fname(t1 a)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a); }
|
||||||
|
|
||||||
|
#define DEFINE2(init, hlib, ret, fname, t1, t2) ret dispatch::fname(t1 a, t2 b)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b); }
|
||||||
|
|
||||||
|
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) ret dispatch::fname(t1 a, t2 b, t3 c)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c); }
|
||||||
|
|
||||||
|
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d); }
|
||||||
|
|
||||||
|
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e); }
|
||||||
|
|
||||||
|
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f); }
|
||||||
|
|
||||||
|
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g); }
|
||||||
|
|
||||||
|
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h); }
|
||||||
|
|
||||||
|
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i); }
|
||||||
|
|
||||||
|
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j); }
|
||||||
|
|
||||||
|
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }
|
||||||
|
|
||||||
|
#define DEFINE13(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m); }
|
||||||
|
|
||||||
|
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
|
||||||
|
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
|
||||||
|
|
||||||
|
//Specialized helpers for CUDA
|
||||||
|
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
|
||||||
|
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
|
||||||
|
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
|
||||||
|
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
|
||||||
|
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
|
||||||
|
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||||
|
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||||
|
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||||
|
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||||
|
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||||
|
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||||
|
|
||||||
|
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
|
||||||
|
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
|
||||||
|
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
|
||||||
|
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
|
||||||
|
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
|
||||||
|
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||||
|
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||||
|
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||||
|
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
|
||||||
|
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
|
||||||
|
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
|
||||||
|
|
||||||
|
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
|
||||||
|
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
|
||||||
|
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
|
||||||
|
#define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
|
||||||
|
|
||||||
|
#define CUBLAS_DEFINE1(ret, fname, t1) DEFINE1(cublasinit, cublas_, ret, fname, t1)
|
||||||
|
#define CUBLAS_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cublasinit, cublas_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
|
||||||
|
#define CUBLAS_DEFINE19(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) DEFINE19(cublasinit, cublas_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19)
|
||||||
|
|
||||||
|
#define CUDNN_DEFINE1(ret, fname, t1) DEFINE1(cudnninit, cudnn_, ret, fname, t1)
|
||||||
|
#define CUDNN_DEFINE2(ret, fname, t1, t2) DEFINE2(cudnninit, cudnn_, ret, fname, t1, t2)
|
||||||
|
#define CUDNN_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cudnninit, cudnn_, ret, fname, t1, t2, t3)
|
||||||
|
#define CUDNN_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5)
|
||||||
|
#define CUDNN_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6)
|
||||||
|
#define CUDNN_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
|
||||||
|
#define CUDNN_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
|
||||||
|
#define CUDNN_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
|
||||||
|
|
||||||
|
|
||||||
|
bool dispatch::cuinit(){
|
||||||
|
if(cuda_==nullptr)
|
||||||
|
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
|
||||||
|
CUresult (*fptr)(unsigned int);
|
||||||
|
cuInit_ = dlsym(cuda_, "cuInit");
|
||||||
|
*reinterpret_cast<void **>(&fptr) = cuInit_;
|
||||||
|
CUresult res = (*fptr)(0);
|
||||||
|
check(res);
|
||||||
|
return cuda_ != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dispatch::nvrtcinit(){
|
||||||
|
if(nvrtc_==nullptr)
|
||||||
|
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
|
||||||
|
return nvrtc_ != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dispatch::nvmlinit(){
|
||||||
|
if(nvml_==nullptr)
|
||||||
|
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
|
||||||
|
nvmlReturn_t (*fptr)();
|
||||||
|
nvmlInit_v2_ = dlsym(nvml_, "nvmlInit_v2");
|
||||||
|
*reinterpret_cast<void **>(&fptr) = nvmlInit_v2_;
|
||||||
|
nvmlReturn_t res = (*fptr)();
|
||||||
|
check(res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dispatch::cublasinit(){
|
||||||
|
if(cublas_==nullptr)
|
||||||
|
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
|
||||||
|
return cublas_ != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dispatch::cudnninit(){
|
||||||
|
if(cudnn_==nullptr)
|
||||||
|
cudnn_ = dlopen("libcudnn.so", RTLD_LAZY);
|
||||||
|
return cudnn_ != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CUDA
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
|
||||||
|
CUDA_DEFINE2(CUresult, cuEventCreate, CUevent *, unsigned int)
|
||||||
|
CUDA_DEFINE2(CUresult, cuDeviceGet, CUdevice *, int)
|
||||||
|
CUDA_DEFINE3(CUresult, cuMemcpyDtoH_v2, void *, CUdeviceptr, size_t)
|
||||||
|
CUDA_DEFINE2(CUresult, cuStreamCreate, CUstream *, unsigned int)
|
||||||
|
CUDA_DEFINE3(CUresult, cuEventElapsedTime, float *, CUevent, CUevent)
|
||||||
|
CUDA_DEFINE1(CUresult, cuMemFree_v2, CUdeviceptr)
|
||||||
|
CUDA_DEFINE4(CUresult, cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t, CUstream)
|
||||||
|
CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
|
||||||
|
CUDA_DEFINE3(CUresult, cuDeviceGetName, char *, int, CUdevice)
|
||||||
|
CUDA_DEFINE3(CUresult, cuDeviceGetPCIBusId, char *, int, CUdevice)
|
||||||
|
CUDA_DEFINE4(CUresult, cuModuleGetGlobal_v2, CUdeviceptr*, size_t*, CUmodule, const char*)
|
||||||
|
|
||||||
|
CUDA_DEFINE4(CUresult, cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t, CUstream)
|
||||||
|
CUDA_DEFINE2(CUresult, cuModuleLoad, CUmodule *, const char *)
|
||||||
|
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **)
|
||||||
|
CUDA_DEFINE1(CUresult, cuModuleUnload, CUmodule)
|
||||||
|
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *, unsigned int, CUjit_option *, void **)
|
||||||
|
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute, CUdevice)
|
||||||
|
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int *)
|
||||||
|
CUDA_DEFINE3(CUresult, cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t )
|
||||||
|
CUDA_DEFINE1(CUresult, cuInit, unsigned int)
|
||||||
|
CUDA_DEFINE2(CUresult, cuEventRecord, CUevent, CUstream)
|
||||||
|
CUDA_DEFINE3(CUresult, cuCtxCreate_v2, CUcontext *, unsigned int, CUdevice)
|
||||||
|
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule, const char *)
|
||||||
|
CUDA_DEFINE1(CUresult, cuStreamSynchronize, CUstream)
|
||||||
|
CUDA_DEFINE1(CUresult, cuStreamDestroy_v2, CUstream)
|
||||||
|
CUDA_DEFINE1(CUresult, cuEventDestroy_v2, CUevent)
|
||||||
|
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr*, size_t)
|
||||||
|
CUDA_DEFINE3(CUresult, cuPointerGetAttribute, void*, CUpointer_attribute, CUdeviceptr)
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxGetDevice, CUdevice*)
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxGetCurrent, CUcontext*)
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxSetCurrent, CUcontext)
|
||||||
|
CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUstream)
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
|
||||||
|
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
|
||||||
|
|
||||||
|
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
|
||||||
|
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
|
||||||
|
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
|
||||||
|
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||||
|
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
|
||||||
|
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||||
|
|
||||||
|
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
||||||
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||||
|
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||||
|
|
||||||
|
cublasHandle_t dispatch::cublasHandle(Context const & ctx){
|
||||||
|
static std::map<Context, cublasHandle_t> handles;
|
||||||
|
auto pr = handles.insert({ctx, cublasHandle_t()});
|
||||||
|
if(pr.second)
|
||||||
|
cublasCreate_v2(&pr.first->second);
|
||||||
|
return pr.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudnnHandle_t dispatch::cudnnHandle(Context const & ctx){
|
||||||
|
static std::map<Context, cudnnHandle_t> handles;
|
||||||
|
auto pr = handles.insert({ctx, cudnnHandle_t()});
|
||||||
|
if(pr.second)
|
||||||
|
cudnnCreate(&pr.first->second);
|
||||||
|
return pr.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
|
||||||
|
cublasStatus_t dispatch::cublasGetStream_v2(cublasHandle_t h, cudaStream_t *a)
|
||||||
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_v2_, "cublasGetStream_v2", h, a); }
|
||||||
|
cublasStatus_t dispatch::cublasSetStream_v2(cublasHandle_t h, cudaStream_t a)
|
||||||
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_v2_, "cublasSetStream_v2", h, a); }
|
||||||
|
cublasStatus_t dispatch::cublasSgemm_v2(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
|
||||||
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_v2_, "cublasSgemm_v2", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||||
|
cublasStatus_t dispatch::cublasDgemm_v2(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
|
||||||
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_v2_, "cublasDgemm_v2", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||||
|
cublasStatus_t dispatch::cublasHgemm(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc)
|
||||||
|
{ return f_impl<dispatch::cublasinit>(cublas_, cublasHgemm, cublasHgemm_, "cublasHgemm", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||||
|
CUBLAS_DEFINE19(cublasStatus_t, cublasGemmEx, cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const void*, const void*, cudaDataType, int, const void*, cudaDataType, int, const void*, void*, cudaDataType, int, cudaDataType, cublasGemmAlgo_t)
|
||||||
|
|
||||||
|
//cuDNN
|
||||||
|
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateConvolutionDescriptor, cudnnConvolutionDescriptor_t*)
|
||||||
|
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateTensorDescriptor, cudnnTensorDescriptor_t*)
|
||||||
|
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateFilterDescriptor, cudnnFilterDescriptor_t*)
|
||||||
|
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreate, cudnnHandle_t*)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetTensor4dDescriptor, cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetFilter4dDescriptor, cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int)
|
||||||
|
CUDNN_DEFINE5(cudnnStatus_t, cudnnSetTensorNdDescriptorEx, cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int*)
|
||||||
|
CUDNN_DEFINE5(cudnnStatus_t, cudnnSetFilterNdDescriptor, cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int*)
|
||||||
|
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreatePoolingDescriptor, cudnnPoolingDescriptor_t*)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetPoolingNdDescriptor, cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int*, const int*, const int*)
|
||||||
|
CUDNN_DEFINE8(cudnnStatus_t, cudnnPoolingForward, cudnnHandle_t, const cudnnPoolingDescriptor_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
|
||||||
|
|
||||||
|
|
||||||
|
CUDNN_DEFINE8(cudnnStatus_t, cudnnSetConvolution2dDescriptor, cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetConvolutionNdDescriptor, cudnnConvolutionDescriptor_t, int, const int*, const int*, const int*, cudnnConvolutionMode_t, cudnnDataType_t)
|
||||||
|
CUDNN_DEFINE8(cudnnStatus_t, cudnnGetConvolutionForwardAlgorithm, cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnGetConvolutionForwardWorkspaceSize, cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t*)
|
||||||
|
CUDNN_DEFINE13(cudnnStatus_t, cudnnConvolutionForward, cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *)
|
||||||
|
CUDNN_DEFINE2(cudnnStatus_t, cudnnSetStream, cudnnHandle_t, cudaStream_t)
|
||||||
|
CUDNN_DEFINE7(cudnnStatus_t, cudnnTransformTensor, cudnnHandle_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
|
||||||
|
|
||||||
|
|
||||||
|
void dispatch::release(){
|
||||||
|
if(cuda_){
|
||||||
|
dlclose(cuda_);
|
||||||
|
cuda_ = nullptr;
|
||||||
|
}
|
||||||
|
if(nvrtc_){
|
||||||
|
dlclose(nvrtc_);
|
||||||
|
nvrtc_ = nullptr;
|
||||||
|
}
|
||||||
|
if(cublas_){
|
||||||
|
dlclose(cublas_);
|
||||||
|
cublas_ = nullptr;
|
||||||
|
}
|
||||||
|
if(cudnn_){
|
||||||
|
dlclose(cudnn_);
|
||||||
|
cudnn_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void* dispatch::cuda_;
|
||||||
|
void* dispatch::nvrtc_;
|
||||||
|
void* dispatch::nvml_;
|
||||||
|
void* dispatch::cublas_;
|
||||||
|
void* dispatch::cudnn_;
|
||||||
|
|
||||||
|
//CUDA
|
||||||
|
void* dispatch::cuCtxGetCurrent_;
|
||||||
|
void* dispatch::cuCtxSetCurrent_;
|
||||||
|
void* dispatch::cuCtxDestroy_v2_;
|
||||||
|
void* dispatch::cuEventCreate_;
|
||||||
|
void* dispatch::cuDeviceGet_;
|
||||||
|
void* dispatch::cuMemcpyDtoH_v2_;
|
||||||
|
void* dispatch::cuStreamCreate_;
|
||||||
|
void* dispatch::cuEventElapsedTime_;
|
||||||
|
void* dispatch::cuMemFree_v2_;
|
||||||
|
void* dispatch::cuMemcpyDtoHAsync_v2_;
|
||||||
|
void* dispatch::cuDriverGetVersion_;
|
||||||
|
void* dispatch::cuDeviceGetName_;
|
||||||
|
void* dispatch::cuDeviceGetPCIBusId_;
|
||||||
|
void* dispatch::cuModuleGetGlobal_v2_;
|
||||||
|
|
||||||
|
void* dispatch::cuMemcpyHtoDAsync_v2_;
|
||||||
|
void* dispatch::cuModuleLoad_;
|
||||||
|
void* dispatch::cuLaunchKernel_;
|
||||||
|
void* dispatch::cuModuleUnload_;
|
||||||
|
void* dispatch::cuModuleLoadDataEx_;
|
||||||
|
void* dispatch::cuDeviceGetAttribute_;
|
||||||
|
void* dispatch::cuDeviceGetCount_;
|
||||||
|
void* dispatch::cuMemcpyHtoD_v2_;
|
||||||
|
void* dispatch::cuInit_;
|
||||||
|
void* dispatch::cuEventRecord_;
|
||||||
|
void* dispatch::cuCtxCreate_v2_;
|
||||||
|
void* dispatch::cuModuleGetFunction_;
|
||||||
|
void* dispatch::cuStreamSynchronize_;
|
||||||
|
void* dispatch::cuStreamDestroy_v2_;
|
||||||
|
void* dispatch::cuEventDestroy_v2_;
|
||||||
|
void* dispatch::cuMemAlloc_v2_;
|
||||||
|
void* dispatch::cuPointerGetAttribute_;
|
||||||
|
void* dispatch::cuCtxGetDevice_;
|
||||||
|
void* dispatch::cuMemsetD8Async_;
|
||||||
|
void* dispatch::cuCtxPushCurrent_v2_;
|
||||||
|
void* dispatch::cuCtxPopCurrent_v2_;
|
||||||
|
|
||||||
|
void* dispatch::nvrtcCompileProgram_;
|
||||||
|
void* dispatch::nvrtcGetProgramLogSize_;
|
||||||
|
void* dispatch::nvrtcGetPTX_;
|
||||||
|
void* dispatch::nvrtcGetPTXSize_;
|
||||||
|
void* dispatch::nvrtcCreateProgram_;
|
||||||
|
void* dispatch::nvrtcGetProgramLog_;
|
||||||
|
|
||||||
|
void* dispatch::nvmlInit_v2_;
|
||||||
|
void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_;
|
||||||
|
void* dispatch::nvmlDeviceGetClockInfo_;
|
||||||
|
void* dispatch::nvmlDeviceGetMaxClockInfo_;
|
||||||
|
|
||||||
|
void* dispatch::cublasCreate_v2_;
|
||||||
|
void* dispatch::cublasGetStream_v2_;
|
||||||
|
void* dispatch::cublasSetStream_v2_;
|
||||||
|
void* dispatch::cublasHgemm_;
|
||||||
|
void* dispatch::cublasSgemm_v2_;
|
||||||
|
void* dispatch::cublasDgemm_v2_;
|
||||||
|
void* dispatch::cublasGemmEx_;
|
||||||
|
|
||||||
|
void* dispatch::cudnnCreateConvolutionDescriptor_;
|
||||||
|
void* dispatch::cudnnCreatePoolingDescriptor_;
|
||||||
|
void* dispatch::cudnnCreateTensorDescriptor_;
|
||||||
|
void* dispatch::cudnnCreateFilterDescriptor_;
|
||||||
|
void* dispatch::cudnnCreate_;
|
||||||
|
void* dispatch::cudnnSetTensor4dDescriptor_;
|
||||||
|
void* dispatch::cudnnSetFilter4dDescriptor_;
|
||||||
|
void* dispatch::cudnnSetTensorNdDescriptorEx_;
|
||||||
|
void* dispatch::cudnnSetFilterNdDescriptor_;
|
||||||
|
void* dispatch::cudnnSetPoolingNdDescriptor_;
|
||||||
|
void* dispatch::cudnnSetConvolution2dDescriptor_;
|
||||||
|
void* dispatch::cudnnSetConvolutionNdDescriptor_;
|
||||||
|
void* dispatch::cudnnGetConvolutionForwardAlgorithm_;
|
||||||
|
void* dispatch::cudnnGetConvolutionForwardWorkspaceSize_;
|
||||||
|
void* dispatch::cudnnConvolutionForward_;
|
||||||
|
void* dispatch::cudnnPoolingForward_;
|
||||||
|
void* dispatch::cudnnSetStream_;
|
||||||
|
void* dispatch::cudnnTransformTensor_;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
155
lib/driver/error.cpp
Executable file
155
lib/driver/error.cpp
Executable file
@@ -0,0 +1,155 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "driver/error.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
void check(CUresult err)
|
||||||
|
{
|
||||||
|
using namespace exception::cuda;
|
||||||
|
switch(err)
|
||||||
|
{
|
||||||
|
case CUDA_SUCCESS : break;
|
||||||
|
case CUDA_ERROR_INVALID_VALUE : throw invalid_value();
|
||||||
|
case CUDA_ERROR_OUT_OF_MEMORY : throw out_of_memory();
|
||||||
|
case CUDA_ERROR_NOT_INITIALIZED : throw not_initialized();
|
||||||
|
case CUDA_ERROR_DEINITIALIZED : throw deinitialized();
|
||||||
|
case CUDA_ERROR_PROFILER_DISABLED : throw profiler_disabled();
|
||||||
|
case CUDA_ERROR_PROFILER_NOT_INITIALIZED : throw profiler_not_initialized();
|
||||||
|
case CUDA_ERROR_PROFILER_ALREADY_STARTED : throw profiler_already_started();
|
||||||
|
case CUDA_ERROR_PROFILER_ALREADY_STOPPED : throw profiler_already_stopped();
|
||||||
|
case CUDA_ERROR_NO_DEVICE : throw no_device();
|
||||||
|
case CUDA_ERROR_INVALID_DEVICE : throw invalid_device();
|
||||||
|
case CUDA_ERROR_INVALID_IMAGE : throw invalid_image();
|
||||||
|
case CUDA_ERROR_INVALID_CONTEXT : throw invalid_context();
|
||||||
|
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT : throw context_already_current();
|
||||||
|
case CUDA_ERROR_MAP_FAILED : throw map_failed();
|
||||||
|
case CUDA_ERROR_UNMAP_FAILED : throw unmap_failed();
|
||||||
|
case CUDA_ERROR_ARRAY_IS_MAPPED : throw array_is_mapped();
|
||||||
|
case CUDA_ERROR_ALREADY_MAPPED : throw already_mapped();
|
||||||
|
case CUDA_ERROR_NO_BINARY_FOR_GPU : throw no_binary_for_gpu();
|
||||||
|
case CUDA_ERROR_ALREADY_ACQUIRED : throw already_acquired();
|
||||||
|
case CUDA_ERROR_NOT_MAPPED : throw not_mapped();
|
||||||
|
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY : throw not_mapped_as_array();
|
||||||
|
case CUDA_ERROR_NOT_MAPPED_AS_POINTER : throw not_mapped_as_pointer();
|
||||||
|
case CUDA_ERROR_ECC_UNCORRECTABLE : throw ecc_uncorrectable();
|
||||||
|
case CUDA_ERROR_UNSUPPORTED_LIMIT : throw unsupported_limit();
|
||||||
|
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE : throw context_already_in_use();
|
||||||
|
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED : throw peer_access_unsupported();
|
||||||
|
case CUDA_ERROR_INVALID_PTX : throw invalid_ptx();
|
||||||
|
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT : throw invalid_graphics_context();
|
||||||
|
case CUDA_ERROR_INVALID_SOURCE : throw invalid_source();
|
||||||
|
case CUDA_ERROR_FILE_NOT_FOUND : throw file_not_found();
|
||||||
|
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND : throw shared_object_symbol_not_found();
|
||||||
|
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED : throw shared_object_init_failed();
|
||||||
|
case CUDA_ERROR_OPERATING_SYSTEM : throw operating_system();
|
||||||
|
case CUDA_ERROR_INVALID_HANDLE : throw invalid_handle();
|
||||||
|
case CUDA_ERROR_NOT_FOUND : throw not_found();
|
||||||
|
case CUDA_ERROR_NOT_READY : throw not_ready();
|
||||||
|
case CUDA_ERROR_ILLEGAL_ADDRESS : throw illegal_address();
|
||||||
|
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES : throw launch_out_of_resources();
|
||||||
|
case CUDA_ERROR_LAUNCH_TIMEOUT : throw launch_timeout();
|
||||||
|
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING : throw launch_incompatible_texturing();
|
||||||
|
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED : throw peer_access_already_enabled();
|
||||||
|
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED : throw peer_access_not_enabled();
|
||||||
|
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE : throw primary_context_active();
|
||||||
|
case CUDA_ERROR_CONTEXT_IS_DESTROYED : throw context_is_destroyed();
|
||||||
|
case CUDA_ERROR_ASSERT : throw assert_error();
|
||||||
|
case CUDA_ERROR_TOO_MANY_PEERS : throw too_many_peers();
|
||||||
|
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED : throw host_memory_already_registered();
|
||||||
|
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED : throw host_memory_not_registered();
|
||||||
|
case CUDA_ERROR_HARDWARE_STACK_ERROR : throw hardware_stack_error();
|
||||||
|
case CUDA_ERROR_ILLEGAL_INSTRUCTION : throw illegal_instruction();
|
||||||
|
case CUDA_ERROR_MISALIGNED_ADDRESS : throw misaligned_address();
|
||||||
|
case CUDA_ERROR_INVALID_ADDRESS_SPACE : throw invalid_address_space();
|
||||||
|
case CUDA_ERROR_INVALID_PC : throw invalid_pc();
|
||||||
|
case CUDA_ERROR_LAUNCH_FAILED : throw launch_failed();
|
||||||
|
case CUDA_ERROR_NOT_PERMITTED : throw not_permitted();
|
||||||
|
case CUDA_ERROR_NOT_SUPPORTED : throw not_supported();
|
||||||
|
case CUDA_ERROR_UNKNOWN : throw unknown();
|
||||||
|
default : throw unknown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void check(nvrtcResult err){
|
||||||
|
using namespace exception::nvrtc;
|
||||||
|
|
||||||
|
switch(err)
|
||||||
|
{
|
||||||
|
case NVRTC_SUCCESS: break;
|
||||||
|
case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory();
|
||||||
|
case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure();
|
||||||
|
case NVRTC_ERROR_INVALID_INPUT: throw invalid_input();
|
||||||
|
case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program();
|
||||||
|
case NVRTC_ERROR_INVALID_OPTION: throw invalid_option();
|
||||||
|
case NVRTC_ERROR_COMPILATION: throw compilation();
|
||||||
|
case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure();
|
||||||
|
default: throw unknown_error();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void check(cublasStatus_t err){
|
||||||
|
using namespace exception::cublas;
|
||||||
|
switch(err)
|
||||||
|
{
|
||||||
|
case CUBLAS_STATUS_SUCCESS : break;
|
||||||
|
case CUBLAS_STATUS_NOT_INITIALIZED : throw not_initialized();
|
||||||
|
case CUBLAS_STATUS_ALLOC_FAILED : throw alloc_failed();
|
||||||
|
case CUBLAS_STATUS_INVALID_VALUE : throw invalid_value();
|
||||||
|
case CUBLAS_STATUS_ARCH_MISMATCH : throw arch_mismatch();
|
||||||
|
case CUBLAS_STATUS_MAPPING_ERROR : throw mapping_error();
|
||||||
|
case CUBLAS_STATUS_EXECUTION_FAILED: throw execution_failed();
|
||||||
|
case CUBLAS_STATUS_INTERNAL_ERROR : throw internal_error();
|
||||||
|
case CUBLAS_STATUS_NOT_SUPPORTED : throw not_supported();
|
||||||
|
case CUBLAS_STATUS_LICENSE_ERROR : throw license_error();
|
||||||
|
default : throw unknown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void check(cudnnStatus_t err){
|
||||||
|
using namespace exception::cudnn;
|
||||||
|
switch(err)
|
||||||
|
{
|
||||||
|
case CUDNN_STATUS_SUCCESS: break;
|
||||||
|
case CUDNN_STATUS_NOT_INITIALIZED: throw not_initialized();
|
||||||
|
case CUDNN_STATUS_ALLOC_FAILED: throw alloc_failed();
|
||||||
|
case CUDNN_STATUS_BAD_PARAM: throw bad_param();
|
||||||
|
case CUDNN_STATUS_INTERNAL_ERROR: throw internal_error();
|
||||||
|
case CUDNN_STATUS_INVALID_VALUE: throw invalid_value();
|
||||||
|
case CUDNN_STATUS_ARCH_MISMATCH: throw arch_mismatch();
|
||||||
|
case CUDNN_STATUS_MAPPING_ERROR: throw mapping_error();
|
||||||
|
case CUDNN_STATUS_EXECUTION_FAILED: throw execution_failed();
|
||||||
|
case CUDNN_STATUS_NOT_SUPPORTED: throw not_supported();
|
||||||
|
case CUDNN_STATUS_LICENSE_ERROR: throw license_error();
|
||||||
|
case CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING: throw runtime_prerequisite_missing();
|
||||||
|
case CUDNN_STATUS_RUNTIME_IN_PROGRESS: throw runtime_in_progress();
|
||||||
|
case CUDNN_STATUS_RUNTIME_FP_OVERFLOW: throw runtime_fp_overflow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
40
lib/driver/event.cpp
Executable file
40
lib/driver/event.cpp
Executable file
@@ -0,0 +1,40 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "driver/event.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
float Event::elapsed_time() const{
|
||||||
|
float time;
|
||||||
|
dispatch::cuEventElapsedTime(&time, cu_->first, cu_->second);
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
|
Handle<cu_event_t> const & Event::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
66
lib/driver/handle.cpp
Executable file
66
lib/driver/handle.cpp
Executable file
@@ -0,0 +1,66 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <memory>
|
||||||
|
#include "driver/handle.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
//CUDA
|
||||||
|
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
|
||||||
|
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
|
||||||
|
inline void _delete(CUstream x) { dispatch::cuStreamDestroy(x); }
|
||||||
|
inline void _delete(CUdevice) { }
|
||||||
|
inline void _delete(CUevent x) { dispatch::cuEventDestroy(x); }
|
||||||
|
inline void _delete(CUfunction) { }
|
||||||
|
inline void _delete(CUmodule x) { dispatch::cuModuleUnload(x); }
|
||||||
|
inline void _delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
|
||||||
|
inline void _delete(cu_platform){}
|
||||||
|
|
||||||
|
//Constructor
|
||||||
|
template<class CUType>
|
||||||
|
Handle<CUType>::Handle(CUType cu, bool take_ownership): h_(new CUType(cu)), has_ownership_(take_ownership)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
|
||||||
|
template<class CUType>
|
||||||
|
Handle<CUType>::~Handle(){
|
||||||
|
if(has_ownership_ && h_ && h_.unique() && *h_)
|
||||||
|
_delete(*h_);
|
||||||
|
}
|
||||||
|
|
||||||
|
template class Handle<CUdeviceptr>;
|
||||||
|
template class Handle<CUstream>;
|
||||||
|
template class Handle<CUcontext>;
|
||||||
|
template class Handle<CUdevice>;
|
||||||
|
template class Handle<cu_event_t>;
|
||||||
|
template class Handle<CUfunction>;
|
||||||
|
template class Handle<CUmodule>;
|
||||||
|
template class Handle<cu_platform>;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
67
lib/driver/kernel.cpp
Executable file
67
lib/driver/kernel.cpp
Executable file
@@ -0,0 +1,67 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "driver/kernel.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
Kernel::Kernel(Module const & program, const char * name) : program_(program), address_bits_(program.context().device().address_bits()){
|
||||||
|
cu_params_store_.reserve(64);
|
||||||
|
cu_params_.reserve(64);
|
||||||
|
dispatch::cuModuleGetFunction(&*cu_, program, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Kernel::setArg(unsigned int index, std::size_t size, void* ptr){
|
||||||
|
if(index + 1> cu_params_store_.size()){
|
||||||
|
cu_params_store_.resize(index+1);
|
||||||
|
cu_params_.resize(index+1);
|
||||||
|
}
|
||||||
|
cu_params_store_[index].reset(malloc(size), free);
|
||||||
|
memcpy(cu_params_store_[index].get(), ptr, size);
|
||||||
|
cu_params_[index] = cu_params_store_[index].get();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Kernel::setArg(unsigned int index, Buffer const & data)
|
||||||
|
{ return setArg(index, (CUdeviceptr)data);}
|
||||||
|
|
||||||
|
void* const* Kernel::cu_params() const
|
||||||
|
{ return cu_params_.data(); }
|
||||||
|
|
||||||
|
Handle<CUfunction> const & Kernel::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
Module const & Kernel::module() const
|
||||||
|
{ return program_; }
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
118
lib/driver/module.cpp
Executable file
118
lib/driver/module.cpp
Executable file
@@ -0,0 +1,118 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "driver/module.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/error.h"
|
||||||
|
|
||||||
|
#include "tools/sys/getenv.hpp"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
CUjit_target_enum cutarget(Device::Architecture arch){
|
||||||
|
switch(arch){
|
||||||
|
case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20;
|
||||||
|
case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21;
|
||||||
|
case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30;
|
||||||
|
case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35;
|
||||||
|
case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37;
|
||||||
|
case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50;
|
||||||
|
case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52;
|
||||||
|
case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60;
|
||||||
|
case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61;
|
||||||
|
default: throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::pair<int, int> ptx(std::pair<int, int> sm){
|
||||||
|
if(sm.first == 7) return {6, 0};
|
||||||
|
if(sm.first == 6) return {5, 0};
|
||||||
|
if(sm.first == 5) return {4, 3};
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Module::header(Device const & device){
|
||||||
|
auto cc = device.compute_capability();
|
||||||
|
auto vptx = ptx(cc);
|
||||||
|
std::string header;
|
||||||
|
header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n";
|
||||||
|
header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n";
|
||||||
|
header += ".address_size 64\n";
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
|
||||||
|
//Path to custom PTX compiler
|
||||||
|
std::string compiler = tools::getenv("ISAAC_PTXAS");
|
||||||
|
if(compiler.size()){
|
||||||
|
auto cc = context.device().compute_capability();
|
||||||
|
std::string out = context.cache_path() + "tmp.o";
|
||||||
|
std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second)
|
||||||
|
+ " -o " + out
|
||||||
|
+ " -ias \"" + source_ + "\"";
|
||||||
|
std::string cmd = compiler + opt;
|
||||||
|
if(std::system(cmd.c_str()) != 0)
|
||||||
|
throw;
|
||||||
|
dispatch::cuModuleLoad(&*cu_, out.c_str());
|
||||||
|
}
|
||||||
|
//JIT Compilation
|
||||||
|
else{
|
||||||
|
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
|
||||||
|
unsigned int errbufsize = 8096;
|
||||||
|
std::string errbuf(errbufsize, 0);
|
||||||
|
//CUjit_target_enum target = cutarget(context.device().architecture());
|
||||||
|
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
|
||||||
|
try{
|
||||||
|
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
|
||||||
|
}catch(exception::cuda::base const &){
|
||||||
|
std::cerr << "Compilation Failed! Log: " << std::endl;
|
||||||
|
std::cerr << errbuf << std::endl;
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Context const & Module::context() const
|
||||||
|
{ return context_; }
|
||||||
|
|
||||||
|
Handle<CUmodule> const & Module::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
Buffer Module::symbol(const char *name) const{
|
||||||
|
CUdeviceptr handle;
|
||||||
|
size_t size;
|
||||||
|
dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name);
|
||||||
|
return Buffer(context_, handle, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
56
lib/driver/platform.cpp
Executable file
56
lib/driver/platform.cpp
Executable file
@@ -0,0 +1,56 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "driver/platform.h"
|
||||||
|
#include "driver/device.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
std::string Platform::version() const{
|
||||||
|
int version;
|
||||||
|
dispatch::cuDriverGetVersion(&version);
|
||||||
|
return std::to_string(version);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Platform::name() const
|
||||||
|
{ return (std::string)"CUDA"; }
|
||||||
|
|
||||||
|
std::vector<Device> Platform::devices() const{
|
||||||
|
std::vector<Device> devices;
|
||||||
|
int N;
|
||||||
|
dispatch::cuDeviceGetCount(&N);
|
||||||
|
for(int i = 0 ; i < N ; ++i){
|
||||||
|
CUdevice device;
|
||||||
|
dispatch::cuDeviceGet(&device, i);
|
||||||
|
devices.push_back(Device(device));
|
||||||
|
}
|
||||||
|
return devices;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
95
lib/driver/stream.cpp
Executable file
95
lib/driver/stream.cpp
Executable file
@@ -0,0 +1,95 @@
|
|||||||
|
/* Copyright 2015-2017 Philippe Tillet
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files
|
||||||
|
* (the "Software"), to deal in the Software without restriction,
|
||||||
|
* including without limitation the rights to use, copy, modify, merge,
|
||||||
|
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <cassert>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "driver/backend.h"
|
||||||
|
#include "driver/stream.h"
|
||||||
|
#include "driver/context.h"
|
||||||
|
#include "driver/device.h"
|
||||||
|
#include "driver/event.h"
|
||||||
|
#include "driver/kernel.h"
|
||||||
|
#include "driver/buffer.h"
|
||||||
|
|
||||||
|
namespace tdl
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace driver
|
||||||
|
{
|
||||||
|
|
||||||
|
inline CUcontext cucontext(){
|
||||||
|
CUcontext result;
|
||||||
|
dispatch::cuCtxGetCurrent(&result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Stream::Stream(CUstream stream, bool take_ownership): context_(cucontext(), take_ownership), cu_(stream, take_ownership)
|
||||||
|
{}
|
||||||
|
|
||||||
|
Stream::Stream(Context const & context): context_(context), cu_(CUstream(), true)
|
||||||
|
{
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
dispatch::cuStreamCreate(&*cu_, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stream::synchronize()
|
||||||
|
{
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
dispatch::cuStreamSynchronize(*cu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
Context const & Stream::context() const
|
||||||
|
{ return context_; }
|
||||||
|
|
||||||
|
void Stream::enqueue(Kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event){
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
if(event)
|
||||||
|
dispatch::cuEventRecord(((cu_event_t)*event).first, *cu_);
|
||||||
|
dispatch::cuLaunchKernel(kernel, grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL);
|
||||||
|
if(event)
|
||||||
|
dispatch::cuEventRecord(((cu_event_t)*event).second, *cu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stream::write(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr){
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
if(blocking)
|
||||||
|
dispatch::cuMemcpyHtoD(buffer + offset, ptr, size);
|
||||||
|
else
|
||||||
|
dispatch::cuMemcpyHtoDAsync(buffer + offset, ptr, size, *cu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Stream::read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr){
|
||||||
|
ContextSwitcher ctx_switch(context_);
|
||||||
|
if(blocking)
|
||||||
|
dispatch::cuMemcpyDtoH(ptr, buffer + offset, size);
|
||||||
|
else
|
||||||
|
dispatch::cuMemcpyDtoHAsync(ptr, buffer + offset, size, *cu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
Handle<CUstream> const & Stream::cu() const
|
||||||
|
{ return cu_; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Reference in New Issue
Block a user