[GENERAL] Deleted ISAAC Files

This commit is contained in:
Philippe Tillet
2020-02-06 00:48:45 -05:00
parent 77c6b750bc
commit fa29e63838
158 changed files with 0 additions and 110955 deletions

View File

@@ -1,29 +0,0 @@
cmake_minimum_required(VERSION 2.8.7)
project(isaac)
include(CTest)
#Default build type
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Default build type: Release")
set(CMAKE_BUILD_TYPE "Release")
endif()
#QtCreator: add visibility of headers
file( GLOB_RECURSE ALL_SRC *.cpp *.hpp *.h *.py)
add_custom_target( ALL SOURCES ${ALL_SRC} )
#Compiler flags
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/isaac/external/CUDA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic -Wno-strict-aliasing")
#Source
file(GLOB_RECURSE LIBISAAC_SRC lib/*.cpp)
add_library(isaac SHARED ${LIBISAAC_SRC})
target_link_libraries(isaac "dl")
#Examples
add_subdirectory(examples)
#Tests
add_subdirectory(tests)

22
LICENSE
View File

@@ -1,22 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

View File

@@ -1,76 +0,0 @@
# ISAAC
This is the development repository for ISAAC, an input-aware auto-tuning framework and code-generator for HPC/DL. This version is only compatible with NVIDIA hardware (it generates PTX source code). For OpenCL/CUDA compatibility, visit the Intel fork (https://github.com/intel/isaac) or the v1.0 branch (deprecated) or the
### License
ISAAC is distributed under the MIT/X11 license.
### Getting started - Deep Learning Inference
Execute the following commands on a python environment that contains a recent version of pytorch:
```
git clone https://github.com/ptillet/isaac.git
cd isaac/python;
python setup.py build;
python setup.py install;
cd examples/pytorch;
python imagenet.py --arch resnet152 /path/to/imagenet/;
```
This should give you 78.1% accuracy, and roughly 4x speed-up over pytorch.
### Getting started - C++ API
In order to compile and use the ISAAC C++ API, only a proprietary NVIDIA driver is necessary. No CUDA SDK is required (except for testing and benchmarking against cuBLAS/cuDNN):
```
git clone https://github.com/ptillet/isaac.git
cd isaac;
mkdir build;
cd build;
cmake ../ ; make -j8;
./examples/isaac-tools --gemm --bench --suite deepbench --dtype float32
./examples/isaac-tools --conv --bench --suite deepbench --dtype float32
```
If you want, you can also dump the PTX source code generated by ISAAC for some shapes:
```
./examples/isaac-tools --gemm --dump --format ptx --shape 2048,2048,2048 --layout NT --dtype float32
```
If you really know what you're doing, you can also capture the tiling parameters found by ISAAC:
```
./examples/isaac-tools --gemm --dump --format params --shape 2048,2048,2048 --layout NT --dtype float32
```
You will get the following output:
```
Tuning parameters: 4, 16, 8, 8, 8, 8, 16, 8, 16, 8, 1, 1, 1
```
The parameters respectively mean:
(1) that shared memory loads have a width of **4** ;
(2) each block comprises **16**x**8** threads ;
(3) each threads computes a tile of **8**x**8** elements;
(4) Each loop iteration processes **8** elements along the K axis ;
(5) threads are rearranged as a **16** x **8** block for loading A, and a **16** x **8** block for loading B;
(6) the reduction is split accross **1**, **1** and **1** independent batches within each thread, thread-block and grid, and the results are accumulated after the inner-loop
### Benchmarks - C++ API
ISAAC often provides
Tesla P100 - SGEMM:
![sgemm-gv100](https://github.com/ptillet/isaac/blob/master/documentation/bench/gv100/sgemm.png?raw=true)
Tesla P100 - DGEMM:
![sgemm-gv100](https://github.com/ptillet/isaac/blob/master/documentation/bench/gv100/dgemm.png?raw=true)
Tesla P100 - SCONV (vs cuDNN's IMPLICIT_PRECOMP_GEMM)
![sgemm-gv100](https://github.com/ptillet/isaac/blob/master/documentation/bench/gv100/sconv.png?raw=true)
## Acknowledgments
This work was partially supported by the National Science Foundation (IIS 1409097) and by IARPA (contract D16PC00002).

View File

@@ -1,7 +0,0 @@
for i in $(find ../lib/ ../include/isaac/ ../python/src/bind -name '*.cpp' -or -name '*.hpp' -or -name '*.h' | grep -v "../lib/external" | grep -v "../include/isaac/driver/external/");
do
if ! grep -q Copyright $i
then
cat ../LICENSE $i >$i.new && mv $i.new $i
fi
done

Binary file not shown.

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

View File

@@ -1,6 +0,0 @@
foreach(PROG isaac-tools)
add_executable(${PROG} ${PROG}.cpp)
set_target_properties(${PROG} PROPERTIES OUTPUT_NAME ${PROG})
include_directories(/usr/local/cuda/include/)
target_link_libraries(${PROG} isaac)
endforeach(PROG)

View File

@@ -1,649 +0,0 @@
#include "opts.hpp"
#include "isaac/scalar.h"
#include "isaac/api.h"
#include "isaac/driver/cublas.h"
#include "isaac/driver/backend.h"
#include "isaac/driver/context.h"
#include "isaac/driver/stream.h"
#include "isaac/runtime/predict.h"
#include "isaac/templates/gemm.h"
#include "isaac/templates/error.hpp"
#include "isaac/tools/bench.hpp"
namespace sc = isaac;
namespace drv = sc::driver;
using sc::param_t;
enum Code {
RESET = 0,
BOLD = 1,
ITALIC = 3,
FG_RED = 31,
FG_GREEN = 32,
FG_YELLOW = 33,
FG_BLUE = 34,
FG_MAGENTA = 35,
FG_CYAN = 36,
FG_LIGHT_GRAY = 37,
FG_DARK_GRAY = 90,
FG_LIGHT_RED = 91,
FG_LIGHT_GREEN = 92,
FG_LIGHT_YELLOW = 93,
FG_LIGHT_BLUE = 94,
FG_LIGHT_MAGENTA = 95,
FG_LIGHT_CYAN = 96,
FG_WHITE = 97
};
class color_stream {
Code code;
public:
color_stream(Code pCode) : code(pCode) {}
friend std::ostream&
operator<<(std::ostream& os, const color_stream& mod) {
return os << "\033[" << mod.code << "m";
}
};
/* Helpers for benchmarking */
typedef std::tuple<sc::DType, sc::IsaacOperation_t, sc::IsaacOperation_t, sc::param_t, sc::param_t, sc::param_t> gemm_params_t;
typedef std::tuple<sc::DType, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t> conv_params_t;
typedef std::tuple<sc::DType, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t> pool_params_t;
struct SC17{
// GEMM
static std::vector<gemm_params_t> gemm(sc::DType dtype){
std::vector<gemm_params_t> shapes;
// LinPack
for(param_t N: std::vector<param_t>{512, 1024, 2048})
shapes.push_back(std::make_tuple(dtype, sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, N));
// DeepBench
for(sc::IsaacOperation_t AT: std::vector<sc::IsaacOperation_t>{sc::ISAAC_OP_N, sc::ISAAC_OP_T})
for(param_t M: std::vector<param_t>{1760})
for(param_t N: std::vector<param_t>{16, 32, 64, 128})
shapes.push_back(std::make_tuple(dtype, AT, sc::ISAAC_OP_N, M, N, M));
// PCA/ICA
for(param_t N: std::vector<param_t>{16, 64, 256})
for(param_t K: std::vector<param_t>{64000})
shapes.push_back(std::make_tuple(dtype, sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, K));
// LaPACK
for(param_t N: std::vector<param_t>{1024, 2048, 4096})
for(param_t K: std::vector<param_t>{32})
shapes.push_back(std::make_tuple(dtype, sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, K));
return shapes;
}
// CONV
static std::vector<conv_params_t> conv(sc::DType dtype){
// Vector of (dtype, D, W, H, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)
std::vector<conv_params_t> shapes;
// // DeepSpeech
// for(size_t N: std::vector<size_t>{8})
// shapes.push_back(std::make_tuple(dtype, 1, 700, 161, 1, N, 32, 1, 5, 20, 0, 0, 0, 1, 1, 1));
// for(size_t N: std::vector<size_t>{8})
// shapes.push_back(std::make_tuple(dtype, 1, 341, 79, 32, N, 32, 1, 5, 10, 0, 0, 0, 1, 1, 1));
// // OCR
// shapes.push_back(std::make_tuple(dtype, 1, 480, 48, 1, 16, 16, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 240, 24, 16, 16, 32, 1, 3, 3, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 120, 12, 32, 16, 64, 1, 3, 3, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 60, 6, 64, 16, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// // Face Recognition
// shapes.push_back(std::make_tuple(dtype, 1, 108, 108, 3, 8, 64, 1, 3, 3, 0, 1, 1, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 54, 54, 64, 8, 64, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 27, 27, 128, 8, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 128, 8, 256, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 256, 8, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// // Vision
// for(size_t N: std::vector<size_t>{8}){
// shapes.push_back(std::make_tuple(dtype, 1, 224, 224, 3, N, 64, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 112, 112, 64, N, 128, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 128, N, 256, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 256, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// }
// shapes.push_back(std::make_tuple(dtype, 1, 224, 224, 3, 16, 64, 1, 7, 7, 0, 3, 3, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 192, 16, 32, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 192, 16, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, 16, 48, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 512, 16, 192, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 832, 16, 256, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 832, 16, 128, 1, 5, 5, 0, 2, 2, 1, 1, 1));
// // Speaker ID
// shapes.push_back(std::make_tuple(dtype, 1, 350, 80, 64, 16, 128, 1, 5, 5, 0, 1, 1, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 175, 40, 128, 16, 256, 1, 5, 5, 0, 1, 1, 1, 2, 2));
// // ResNET
// for(size_t N: std::vector<size_t>{8}){
// shapes.push_back(std::make_tuple(dtype, 1, 112, 112, 64, N, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 64, N, 256, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 256, N, 64, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 56, 56, 256, N, 128, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 128, N, 512, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 128, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 256, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 256, N, 1024, 1, 1, 1, 0, 0, 0, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 28, 28, 512, N, 1024, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 1024, N, 2048, 1, 1, 1, 0, 0, 0, 1, 2, 2));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 512, 1, 3, 3, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 7, 7, 512, N, 2048, 1, 1, 1, 0, 1, 1, 1, 1, 1));
// shapes.push_back(std::make_tuple(dtype, 1, 14, 14, 1024, N, 2048, 1, 1, 1, 0, 1, 1, 1, 2, 2));
// }
// 3D-Unet
shapes.push_back(std::make_tuple(dtype, 31, 204, 204, 4, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 29, 202, 202, 24, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 27, 100, 100, 24, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 25, 98, 98, 72, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 23, 48, 48, 72, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 21, 46, 46, 216, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 19, 22, 22, 216, 1, 648, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 17, 20, 20, 648, 1, 648, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 15, 36, 36, 648, 1, 432, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 13, 36, 36, 432, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 34, 34, 216, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 64, 64, 216, 1, 144, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 64, 64, 144, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 9 , 62, 62, 72, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 7 , 120, 120, 72, 1, 48, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 5 , 120, 120, 48, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 3 , 118, 118, 24, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 1 , 116, 116, 4 , 1, 24, 1, 1, 1, 0, 0, 0, 1, 1, 1));
return shapes;
}
// POOL
static std::vector<pool_params_t> pool(sc::DType dtype){
std::vector<pool_params_t> shapes;
// 3D-Unet
shapes.push_back(std::make_tuple(dtype, 31, 204, 204, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 29, 202, 202, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 27, 100, 100, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 25, 98, 98, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 23, 48, 48, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 21, 46, 46, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 19, 22, 22, 1, 648, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 17, 20, 20, 1, 648, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 15, 36, 36, 1, 432, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 13, 36, 36, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 34, 34, 1, 216, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 64, 64, 1, 144, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 11, 64, 64, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 9 , 62, 62, 1, 72, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 7 , 120, 120, 1, 48, 1, 1, 1, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 5 , 120, 120, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 3 , 118, 118, 1, 24, 3, 3, 3, 0, 0, 0, 1, 1, 1));
shapes.push_back(std::make_tuple(dtype, 1 , 116, 116, 1, 24, 1, 1, 1, 0, 0, 0, 1, 1, 1));
return shapes;
}
};
/* Metrics for benchmarking */
struct Metric{
virtual std::function<bool(double, double)> cmp() const = 0;
virtual double conv(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t C, param_t R, param_t S, param_t T, double tsec) const = 0;
virtual double gemm(param_t M, param_t N, param_t K, double tsec) const = 0;
virtual double pool(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t, param_t, param_t, double tsec) const = 0;
};
class FLOPS: public Metric{
public:
FLOPS(double scale): scale_(scale){}
std::function<bool(double, double)> cmp() const { return std::greater<double>(); }
double conv(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t C, param_t R, param_t S, param_t T, double tsec) const
{ return sc::templates::Conv::tflops(P,Q,M,K,N,C,R,S,T,tsec) * 1e12 / scale_; }
double gemm(param_t M, param_t N, param_t K, double tsec) const
{ return sc::templates::GEMM::tflops(M, N, K, tsec) * 1e12 / scale_; }
double pool(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t T, param_t R, param_t S, double tsec) const
{ return sc::templates::Pool::tflops(P, Q, M, K, N, T, R, S, tsec) * 1e12 / scale_;}
private:
double scale_;
};
class Time: public Metric{
public:
Time(double scale): scale_(scale){}
std::function<bool(double, double)> cmp() const { return std::less<double>(); }
double conv(param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, double tsec) const { return tsec*1e-9/scale_; }
double gemm(param_t, param_t, param_t, double tsec) const { return tsec*1e-9/scale_; }
double pool(param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, double tsec) const { return tsec*1e-9/scale_; }
private:
double scale_;
};
void print_results_header(std::vector<std::string> sections){
std::cout << color_stream(ITALIC) << color_stream(BOLD) ;
std::copy(sections.begin(), sections.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
std::cout << color_stream(RESET) << std::endl;
}
void print_results(std::vector<double> const & times, std::vector<std::string> const & prefix, std::function<bool(double, double)> cmp, std::function<double(double)> fn){
std::copy(prefix.begin(), prefix.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
std::vector<double> perf;
std::transform(times.begin(), times.end(), std::back_inserter(perf), fn);
auto fastest = perf;
std::sort(fastest.begin(), fastest.end(), cmp);
for(auto x: perf){
if(x == fastest[0] && x / fastest[1] > 1.05)
std::cout << color_stream(FG_LIGHT_BLUE) << x << color_stream(RESET);
else
std::cout << x;
std::cout << "\t";
}
std::cout << std::endl;
}
void benchmark_gemm(Metric const & metric, sc::driver::Context& ctx, sc::driver::Device& device, sc::driver::Stream& stream,
sc::DType dtype, sc::IsaacOperation_t AT, sc::IsaacOperation_t BT, size_t M, size_t N, size_t K,
sc::templates::Generator* generator){
size_t ldc = M;
size_t lda = (AT==sc::ISAAC_OP_N)?M:K;
size_t ldb = (BT==sc::ISAAC_OP_N)?K:N;
size_t dtsize = sc::size_of(dtype);
sc::scalar alpha(1., dtype);
sc::scalar beta(0., dtype);
char cuAT = (AT==sc::ISAAC_OP_T)?'T':'N';
char cuBT = (BT==sc::ISAAC_OP_T)?'T':'N';
sc::driver::Buffer C(ctx, M*N*dtsize);
sc::driver::Buffer A(ctx, M*K*dtsize);
sc::driver::Buffer B(ctx, K*N*dtsize);
std::vector<double> times;
times.push_back(bench([&](){ sc::GEMM(device, stream, dtype, dtype, AT, BT, M, N, K, 0, lda, 0, ldb, 0, ldc, alpha, A, B, beta, C, 1., 1., 1., NULL, (sc::templates::GEMM*)generator, 10); }, [&](){ stream.synchronize(); }, device));
if(sc::driver::dispatch::cublasinit()){
cublasGemmAlgo_t fastest;
sc::driver::cublasGemm(dtype, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, &fastest);
times.push_back(bench([&](){ sc::driver::cublasGemm(dtype, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, NULL, fastest); }, [&](){ stream.synchronize(); }, device));
//times.push_back(bench([&](){ sc::driver::cublasGemm(dtype, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }, [&](){ stream.synchronize(); }, device));
}
print_results(times, {str(AT), str(BT), str(M), str(N), str(K)}, metric.cmp(), [&](double tsec){ return metric.gemm(M, N, K, tsec);});
}
void benchmark_conv(Metric const & metric, sc::driver::Context& ctx, sc::driver::Device& device, sc::driver::Stream& stream,
sc::DType in_dtype, sc::DType out_dtype, size_t D, size_t H, size_t W, size_t C, size_t N, size_t K, size_t T, size_t R, size_t S,
size_t pad_d, size_t pad_h, size_t pad_w,
size_t stride_d, size_t stride_h, size_t stride_w,
size_t upsample_d, size_t upsample_h, size_t upsample_w,
sc::templates::Generator* generator){
param_t Zk = 0, crop_z_m0 = 0, crop_z_m1 = 0, crop_z_p0 = 0, crop_z_p1 = 0, crop_z_q0 = 0, crop_z_q1 = 0;
param_t M, P, Q;
sc::templates::Conv::output_shapes(D, H, W, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, M, P, Q);
sc::ActivationType activation = sc::Linear;
size_t vect_c = (in_dtype==sc::INT8X4_TYPE)?4:1;
size_t vect_k = (out_dtype==sc::INT8X4_TYPE)?4:1;
sc::DType ab_dtype = (out_dtype==sc::INT8X4_TYPE)?sc::FLOAT_TYPE:out_dtype;
sc::scalar alpha(1., ab_dtype);
sc::scalar beta(0., ab_dtype);
sc::driver::Buffer O(ctx, N*K/vect_k*M*P*Q*sc::size_of(out_dtype));
sc::driver::Buffer I(ctx, C/vect_c*D*H*W*N*sc::size_of(in_dtype));
sc::driver::Buffer F(ctx, K*C/vect_c*T*R*S*sc::size_of(in_dtype));
std::vector<double> times;
times.push_back(bench([&](){ sc::CONV(device, stream, in_dtype, out_dtype, N, K, M, P, Q, C, T, R, S, D, H, W, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, I, F, &O, 1, NULL, activation, 0., 1., 1., {1.}, 1., sc::NoResidual, Zk, crop_z_m0, crop_z_m1, crop_z_p0, crop_z_p1, crop_z_q0, crop_z_q1, NULL, (sc::templates::Conv*)generator, 10); }, [&](){ stream.synchronize(); }, device));
// if(sc::driver::dispatch::cudnninit())
// times.push_back(bench([&](){ sc::driver::cudnnConv(out_dtype, stream, D, H, W, N, K, M, P, Q, C, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, alpha, I, F, beta, O); }, [&](){ stream.synchronize(); }, device));
print_results(times, {str(N), str(K), str(M), str(P), str(Q), str(C), str(T), str(R), str(S)}, metric.cmp(), [&](double tsec){ return metric.conv(M, P, Q, K, N, C, T, R, S, tsec);});
}
void benchmark_pool(Metric const & metric, sc::driver::Context& ctx, sc::driver::Device& device, sc::driver::Stream& stream,
sc::DType dtype, size_t D, size_t H, size_t W, size_t N, size_t K, size_t T, size_t R, size_t S, size_t pad_d, size_t pad_h, size_t pad_w, size_t stride_d, size_t stride_h, size_t stride_w,
sc::templates::Generator* generator){
param_t M, P, Q;
sc::templates::Conv::output_shapes(D, H, W, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, 1, 1, 1, M, P, Q);
size_t dtsize = sc::size_of(dtype);
sc::scalar alpha(1., dtype);
sc::scalar beta(0., dtype);
sc::driver::Buffer O(ctx, N*K*M*P*Q*dtsize);
sc::driver::Buffer I(ctx, K*D*H*W*N*dtsize);
std::vector<double> times;
times.push_back(bench([&](){ sc::POOL(device, stream, dtype, dtype, sc::MaxPool, K, M, P, Q, N, T, R, S, D, H, W, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, I, O, 1., 1., (sc::templates::Pool*)generator); }, [&](){ stream.synchronize(); }, device));
if(sc::driver::dispatch::cudnninit())
times.push_back(bench([&](){ sc::driver::cudnnPool(dtype, stream, D, H, W, N, K, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, alpha, I, beta, O); }, [&](){ stream.synchronize(); }, device));
print_results(times, {str(N), str(K), str(M), str(P), str(Q), str(T), str(R), str(S)}, metric.cmp(), [&](double tsec){ return metric.pool(M, P, Q, K, N, T, R, S, tsec);});
}
/* ------------------------------- */
void loop_nest(std::vector<size_t> const & ranges, std::function<void(std::vector<size_t> const &)> const & f){
size_t D = ranges.size();
std::vector<size_t> values(D, 0);
// Start with innermost loop
size_t i = D - 1;
while(true){
//Execute function
f(values);
//Increment counters
while(values[i]++ == ranges[i] - 1){
if(i == 0)
return;
values[i--] = 0;
}
i = D - 1;
}
}
template<class T>
void loop_nest(std::vector<std::vector<T>> const & iterates, std::function<void(std::vector<T>)> const & f){
//Ranges to iterate over
std::vector<size_t> ranges;
for(auto const & x: iterates)
ranges.push_back(x.size());
//Proxy function
auto proxy = [&](std::vector<size_t> const & idx){
std::vector<T> x(iterates.size());
for(size_t i = 0; i < x.size(); ++i)
x[i] = iterates[i][idx[i]];
f(x);
};
//Iterate
loop_nest(ranges, proxy);
}
void search_conv(int32_t D, int32_t H, int32_t W,
int32_t C, int32_t N, int32_t K,
int32_t T, int32_t R, int32_t S,
int32_t pad_d, int32_t pad_h, int32_t pad_w,
int32_t stride_d, int32_t stride_h, int32_t stride_w,
int32_t upsample_d, int32_t upsample_h, int32_t upsample_w,
sc::ActivationType activation, sc::DType in_dtype, sc::DType out_dtype)
{
auto ctx = drv::backend::contexts::get_default();
size_t P = (H - R + 1 + 2*pad_h + stride_h - 1)/stride_h;
size_t Q = (W - S + 1 + 2*pad_w + stride_w - 1)/stride_w;
size_t M = (D - T + 1 + 2*pad_d + stride_d - 1)/stride_d;
size_t Zk = 0, crop_z_m0 = 0, crop_z_m1 = 0, crop_z_p0 = 0, crop_z_p1 = 0, crop_z_q0 = 0, crop_z_q1 = 0;
//Setup
drv::Buffer O(ctx, K*P*Q*M*N*sc::size_of(out_dtype));
drv::Buffer I(ctx, C*H*W*D*N*sc::size_of(in_dtype));
drv::Buffer F(ctx, C*R*S*T*K*sc::size_of(in_dtype));
drv::Stream stream(ctx);
//Exhaustive search
std::vector<sc::param_t> r1 = {1};
std::vector<sc::param_t> rv = {4};
std::vector<sc::param_t> rr = {1, 2, 4, 8};
std::vector<sc::param_t> rl = {4, 8, 16, 32};
std::vector<sc::param_t> rs = {4, 8, 16};
double best;
loop_nest<sc::param_t>({rv, rl, rl, rs, rs, rl, rl, r1, rr, rr}, [&](std::vector<sc::param_t> const & x){
sc::templates::Conv generator(in_dtype, out_dtype,
C, D, H, W, N, K, M, P, Q, T, R, S,
pad_d, pad_h, pad_w,
stride_d, stride_h, stride_w,
upsample_d, upsample_h, upsample_w,
activation, 1,
sc::NoResidual, Zk, crop_z_m0, crop_z_m1, crop_z_p0, crop_z_p1, crop_z_q0, crop_z_q1,
x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8]);
//Compile
try{
std::string src = generator.dump(ctx.device(), "conv");
drv::Module program(ctx, src);
drv::Kernel kernel(program, "conv");
double tsec = bench([&](){ generator.enqueue(kernel, stream, I, F, &O); }, [&](){ stream.synchronize(); }, ctx.device());
double tflops = sc::templates::Conv::tflops(P,Q,M,K,N,C,R,S,T,tsec);
best = std::max(tflops, best);
std::cout << "//";
std::copy(x.begin(), x.end(), std::ostream_iterator<int>(std::cout, " "));
std::cout << ": " << tflops << " TFLOPS [BEST: " << best << "]" << std::endl;
}catch(isaac::templates::invalid_parameters const &){
return;
}catch(drv::exception::cuda::launch_out_of_resources const &){
return;
}
});
std::cout << "ISAAC: " << best << std::endl;
}
void search_gemm(int32_t M, int32_t N, int32_t K, sc::IsaacOperation_t AT, sc::IsaacOperation_t BT, sc::DType dtype){
auto ctx = drv::backend::contexts::get_default();
size_t dtsize = sc::size_of(dtype);
// Setup
size_t ldc = M;
size_t lda = (AT==sc::ISAAC_OP_N)?M:K;
size_t ldb = (BT==sc::ISAAC_OP_N)?K:N;
int32_t offc = 0, offa = 0, offb = 0;
drv::Buffer C(ctx, M*N*dtsize);
drv::Buffer A(ctx, M*K*dtsize);
drv::Buffer B(ctx, K*N*dtsize);
drv::Stream stream(ctx);
sc::scalar alpha(1., dtype), beta(0., dtype);
// Exhaustive search
std::vector<sc::param_t> r1 = {1};
std::vector<sc::param_t> rv = {4};
std::vector<sc::param_t> rr = {1, 2, 4, 8};
std::vector<sc::param_t> rl = {4, 8, 16, 32};
std::vector<sc::param_t> rs = {4, 8, 16};
double best = 0;
loop_nest<sc::param_t>({rv, rl, rl, rl, rs, r1, rs, rl, rl, rl, rl, r1, rr, rr}, [&](std::vector<sc::param_t> const & x){
isaac::templates::GEMM generator(dtype, dtype, AT, BT, M, N, K, offa, lda, offb, ldb, offc, ldc, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13]);
// Compile
try{
std::string src = generator.dump(ctx.device(), "gemm");
drv::Module program(ctx, src);
drv::Kernel kernel(program, "gemm");
double time = bench([&](){ generator.enqueue(kernel, stream, alpha, A, B, beta, C); }, [&](){ stream.synchronize(); }, ctx.device());
double tflops = 2*1e-3*M*N*K/time;
best = std::max(tflops, best);
std::cout << "//";
std::copy(x.begin(), x.end(), std::ostream_iterator<int>(std::cout, " "));
std::cout << ": " << tflops << " TFLOPS [BEST: " << best << "]" << std::endl;
}catch(isaac::templates::invalid_parameters const &){
return;
}catch(drv::exception::cuda::launch_out_of_resources const &){
return;
}
});
std::cout << "ISAAC: " << best << std::endl;
}
/* Helpers for dumping source code */
void dump_source(sc::driver::Device const & device, sc::templates::Generator& generator, opts::Options* options, std::string const & name){
if(options->get<std::string>("format") == "ptx")
std::cout << generator.dump(device, name) << std::endl;
else{
auto x = generator.tuning_params();
std::cout << "Tuning parameters: " << std::flush;
for(size_t i = 0; i < x.size(); ++i)
std::cout << ((i>0)?", ":"") << x[i] << std::flush;
std::cout << std::endl;
}
}
/* Application code */
int main(int argc, char* argv[]){
opts::Application program("isaac-tools", "Command-line interface for ISAAC");
// Options
opts::Options* options = program.options();
options->add<size_t>("device", "Device to run on", 0);
options->add<sc::DType>("dtype", "Data-type to use for computations", "float32", {{"int8x4", sc::INT8X4_TYPE}, {"float32", sc::FLOAT_TYPE}, {"float64", sc::DOUBLE_TYPE}});
options->add<std::string>("name", "Name to give to the generated kernel", "kernel");
options->add_group("search", "Exhaustively search for best tuning parameters");
opts::Options* dump = options->add_group("dump", "Dump source-code generated by ISAAC");
dump->add("format", "Format to generate", "ptx", {"ptx", "params"});
dump->add("target", "Target GPU (sm_xx)", {"sm_50", "sm_52", "sm_60", "sm_61", "sm_70"});
opts::Options* bench = options->add_group("bench", "Benchmark source code generated by ISAAC");
bench->add("suite", "Benchmarking suite to run", "custom", {"custom", "deepbench"});
bench->add<std::shared_ptr<Metric>>("metric", "performance metric for the results", "tflops", {{"tflops", std::make_shared<FLOPS>(1e12)}, {"ms", std::make_shared<Time>(1e-3)}, {"us", std::make_shared<Time>(1e-6)}});
// Constraints
options->add_constraint(opts::OneOf({"bench", "dump", "search"}));
options->add_constraint(opts::OneOf({"gemm", "conv", "pool"}));
// GEMM
opts::Options* gemm = options->add_group("gemm", "Use matrix-multiplication");
gemm->add("layout", "Transposition layout for A and B", "NT", {"NN", "NT", "TN", "TT"});
gemm->add<std::vector<size_t>>("shape", "Matrix shapes (M,N,K)", {2048, 2048, 2048}, opts::SizeConstraint(3));
gemm->add<std::vector<size_t>>("kernel", "Bypass predictive model to use given tuning parameters", opts::SizeConstraint(14));
// CONV
opts::Options* conv = options->add_group("conv", "Use convolutions");
conv->add<std::vector<size_t>>("shape", "Tensor shapes (D, H, W, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)", {1, 70, 14, 512, 128, 64, 1, 7, 7, 0, 0, 0, 1, 1, 1}, opts::SizeConstraint(15));
conv->add<std::vector<size_t>>("kernel", "Bypass predictive model to use given tuning parameters", opts::SizeConstraint(9));
// POOL
opts::Options* pool = options->add_group("pool", "Use pooling");
pool->add<std::vector<size_t>>("shape", "Tensor shapes (D, H, W, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)", {1, 70, 14, 128, 64, 1, 7, 7, 0, 0, 0, 1, 1, 1}, opts::SizeConstraint(14));
pool->add<std::vector<size_t>>("kernel", "Bypass predictive model to use given tuning parameters", opts::SizeConstraint(4));
program.parse(argc, argv);
if(options->has("bench"))
std::cout << std::fixed << std::setprecision(2);
//Device
sc::driver::Device device = sc::driver::backend::devices()[options->get<size_t>("device")];
if(options->has("dump") && dump->has("target")){
std::string target = dump->get<std::string>("target");
char major = target[3];
char minor = target[4];
device.interpret_as(std::make_pair((size_t)std::atoi(&major), (size_t)std::atoi(&minor)));
}
static sc::driver::Context context(device);
sc::driver::Stream stream(context);
// Data-Type
sc::DType dtype = options->get<sc::DType>("dtype");
// Kernel name
std::string name = options->get<std::string>("name");
/* Get optimized kernel generator */
std::unique_ptr<sc::templates::Generator> generator;
// GEMM
if(options->has("gemm")){
std::string layout = gemm->get<std::string>("layout");
sc::IsaacOperation_t AT = layout[0]=='T'?sc::ISAAC_OP_T:sc::ISAAC_OP_N;
sc::IsaacOperation_t BT = layout[1]=='T'?sc::ISAAC_OP_T:sc::ISAAC_OP_N;
auto shape = gemm->get<std::vector<size_t>>("shape");
size_t M = shape[0], N = shape[1], K = shape[2];
//Get Source
size_t ldc = M;
size_t lda = (AT==sc::ISAAC_OP_N)?M:K;
size_t ldb = (BT==sc::ISAAC_OP_N)?K:N;
if(options->has("search")){
search_gemm(M, N, K, AT, BT, dtype);
}
if(gemm->has("kernel")){
auto x = gemm->get<std::vector<size_t>>("kernel");
generator.reset(new sc::templates::GEMM(dtype, dtype, AT, BT, M, N, K, 0, lda, 0, ldb, 0, ldc, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13]));
}
else{
sc::runtime::GEMMProfile* profile = (sc::runtime::GEMMProfile*)sc::runtime::database.at({device.architecture(), sc::runtime::GEMM}).get();
generator.reset(new sc::templates::GEMM(profile->predict(stream, dtype, dtype, AT, BT, M, N, K, 0, lda, 0, ldb, 0, ldc)));
}
if(options->has("dump"))
dump_source(device, *generator, dump, name);
if(options->has("bench")){
auto metric = bench->get<std::shared_ptr<Metric>>("metric");
print_results_header({"AT", "BT", "M", "N", "K", "ISAAC", "cuBLAS"});
std::vector<gemm_params_t> shapes;
//User provided shapes
if(bench->get<std::string>("suite")=="custom")
shapes = {std::make_tuple(dtype, AT, BT, M, N, K)};
//SC17 paper shapes
if(bench->get<std::string>("suite")=="deepbench")
shapes = SC17::gemm(dtype);
//Print results
for(auto x: shapes){
std::tie(dtype, AT, BT, M, N, K) = x;
benchmark_gemm(*metric, context, device, stream, dtype, AT, BT, M, N, K, gemm->has("kernel")?generator.get():NULL);
}
}
}
// CONV
if(options->has("conv")){
sc::DType in_dtype = dtype;
sc::DType out_dtype = dtype;
auto x = conv->get<std::vector<size_t>>("shape");
param_t D = x[0], H = x[1], W = x[2], C = x[3], N = x[4], K = x[5], T = x[6], R = x[7], S = x[8], pad_d = x[9], pad_h = x[10], pad_w = x[11], stride_d = x[12], stride_h = x[13], stride_w = x[14];
param_t M, P, Q;
param_t upsample_d = 1, upsample_h = 1, upsample_w = 1;
param_t Zk = 0, crop_z_m0 = 0, crop_z_m1 = 0, crop_z_p0 = 0, crop_z_p1 = 0, crop_z_q0 = 0, crop_z_q1 = 0;
sc::templates::Conv::output_shapes(D, H, W, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, M, P, Q);
sc::ActivationType activation = sc::Linear;
if(options->has("search"))
search_conv(D, H, W, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, activation, in_dtype, out_dtype);
if(conv->has("kernel")){
auto x = conv->get<std::vector<size_t>>("kernel");
generator.reset(new sc::templates::Conv(in_dtype, out_dtype, C, D, H, W, N, K, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, activation, 1, sc::NoResidual, Zk, crop_z_m0, crop_z_m1, crop_z_p0, crop_z_p1, crop_z_q0, crop_z_q1, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8]));
}
else{
sc::runtime::ConvProfile* profile = (sc::runtime::ConvProfile*)sc::runtime::database.at({device.architecture(), sc::runtime::CONV}).get();
generator.reset(new sc::templates::Conv(profile->predict(stream, in_dtype, out_dtype, C, D, H, W, N, K, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, activation, 1, sc::NoResidual, Zk, crop_z_m0, crop_z_m1, crop_z_p0, crop_z_p1, crop_z_q0, crop_z_q1)));
}
if(options->has("dump"))
dump_source(device, *generator, dump, name);
if(options->has("bench")){
auto metric = bench->get<std::shared_ptr<Metric>>("metric");
print_results_header({"N", "K", "M", "P", "Q", "C", "T", "R", "S", "ISAAC", "cuDNN"});
std::vector<conv_params_t> shapes;
//User provided shapes
if(bench->get<std::string>("suite")=="custom")
shapes = {std::make_tuple(dtype, D, W, H, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)};
//SuperComputing17 shapes
if(bench->get<std::string>("suite")=="deepbench")
shapes = SC17::conv(dtype);
//Print results
for(auto x: shapes){
std::tie(dtype, D, W, H, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) = x;
benchmark_conv(*metric, context, device, stream, in_dtype, out_dtype, D, H, W, C, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, conv->has("kernel")?generator.get():NULL);
}
}
}
// POOL
if(options->has("pool")){
auto x = pool->get<std::vector<size_t>>("shape");
param_t D = x[0], W = x[1], H = x[2], N = x[3], K = x[4], T = x[5], R = x[6], S = x[7], pad_d = x[8], pad_h = x[9], pad_w = x[10], stride_d = x[11], stride_h = x[12], stride_w = x[13];
param_t M, P, Q;
sc::templates::Conv::output_shapes(D, H, W, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, 1, 1, 1, M, P, Q);
if(pool->has("kernel")){
auto x = pool->get<std::vector<size_t>>("kernel");
generator.reset(new sc::templates::Pool(dtype, dtype, sc::MaxPool, K, D, H, W, N, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, x[0], x[1], x[2], x[3]));
}
else{
generator.reset(new sc::templates::Pool(dtype, dtype, sc::MaxPool, K, D, H, W, N, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w));
}
if(options->has("dump"))
dump_source(device, *generator, dump, name);
if(options->has("bench")){
auto metric = bench->get<std::shared_ptr<Metric>>("metric");
print_results_header({"N", "K", "M", "P", "Q", "T", "R", "S", "ISAAC", "cuDNN"});
std::vector<pool_params_t> shapes;
//User provided shapes
if(bench->get<std::string>("suite")=="custom")
shapes = {std::make_tuple(dtype, D, W, H, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w)};
//SuperComputing17 shapes
if(bench->get<std::string>("suite")=="deepbench")
shapes = SC17::pool(dtype);
//Print results
for(auto x: shapes){
std::tie(dtype, D, W, H, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w) = x;
benchmark_pool(*metric, context, device, stream, dtype, D, H, W, N, K, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, pool->has("kernel")?generator.get():NULL);
}
}
}
}

View File

@@ -1,395 +0,0 @@
#ifndef OPTS_HPP
#define OPTS_HPP
#include <string>
#include <set>
#include <vector>
#include <iostream>
#include <sstream>
#include <memory>
#include <map>
#include <algorithm>
#include <type_traits>
#include <functional>
#include <sstream>
#include <stdexcept>
namespace opts{
class InvalidOptions: public std::exception{
public:
InvalidOptions(std::string const & msg): msg_("Invalid options: " + msg){}
const char* what() const throw(){ return msg_.c_str();}
private:
std::string msg_;
};
/**
* @class OptionBase
* @brief Base class for command-line options
*/
class OptionBase{
protected:
template<class ItType>
std::vector<std::string>::const_iterator get_option(ItType const & begin, ItType const & end){
auto it = std::find(begin, end, "--" + name_);
if(it==end && required_)
throw InvalidOptions("parameter '" + name_ + "' is mandatory");
if(parent_ && parent_->parent_ && parent_->get_option(begin, it)==it)
throw InvalidOptions("parameter '" + name_ + "' needs to be nested in group '" + parent_->name_ + "'");
return it;
}
public:
OptionBase(std::string const & name, std::string const & desc, bool required = false, OptionBase* parent = NULL): name_(name), desc_(desc), required_(required), parent_(parent)
{}
virtual std::ostream& usage(std::ostream& os, size_t indent) const{
if(!desc_.empty())
os << std::string(indent, ' ') << "--" << "\033[1m" << name_ << "\033[0m" << ": " << desc_ << std::endl;
return os;
}
virtual void parse(std::vector<std::string> const & args, std::map<std::string, void*>& values) = 0;
std::string const & name() const
{ return name_; }
protected:
const std::string name_;
const std::string desc_;
bool required_;
OptionBase* parent_;
};
/**
* @class OptionHelp
* @brief Automatically added --help option
*/
class OptionHelp: public OptionBase{
public:
OptionHelp() : OptionBase("help", "Display this message", false){}
void parse(std::vector<std::string> const & args, std::map<std::string, void*>& values){
if(get_option(args.begin(), args.end()) != args.end())
values[name_] = (void*)this;
}
};
/**
* @class Option
* @brief Standard, typed option
*/
template<class T>
class Option: public OptionBase{
public:
typedef std::function<T(std::string const &)> converter_t;
typedef std::function<void(T const &)> constraint_t;
public:
Option(std::string const & name, std::string const & desc, T dft, converter_t convert, constraint_t constraint, OptionBase* parent):
OptionBase(name, desc, false, parent), default_(new T(dft)), convert_(convert), constraint_(constraint){}
Option(std::string const & name, std::string const & desc, bool required, converter_t convert, constraint_t constraint, OptionBase* parent):
OptionBase(name, desc, required, parent), convert_(convert), constraint_(constraint){}
void parse(std::vector<std::string> const & args, std::map<std::string, void*>& values){
value_ = default_;
auto it = get_option(args.begin(), args.end());
if(it!=args.end()){
auto next = it + 1;
if(next==args.end() || next->compare(0, 2, "--")==0)
throw InvalidOptions("parameter " + name_ + " requires an argument");
else{
value_.reset(new T(convert_(*next)));
constraint_(*value_);
}
}
values[name_] = (void*)value_.get();
}
std::ostream& usage(std::ostream& os, size_t indent) const{
OptionBase::usage(os, indent);
return os;
}
private:
std::shared_ptr<T> default_;
std::shared_ptr<T> value_;
converter_t convert_;
constraint_t constraint_;
};
/**
* @class SwitchOption
* @brief Boolean option activated with --flag or --no-flag
*/
class SwitchOption: public OptionBase{
public:
SwitchOption(std::string const & name, std::string const & desc, bool dft, OptionBase* parent):
OptionBase(name, desc, false, parent), default_(dft)
{}
void parse(std::vector<std::string> const & args, std::map<std::string, void*>& values){
auto it_true = std::find(args.begin(), args.end(), "--" + name_);
auto it_false = std::find(args.begin(), args.end(), "--no-" + name_);
value_.reset(new bool(default_));
if(it_true != args.end()) value_.reset(new bool(true));
if(it_false != args.end()) value_.reset(new bool(false));
values[name_] = (void*)value_.get();
}
private:
bool default_;
std::shared_ptr<bool> value_;
};
/* Pre-defined converters */
template<class T>
class MapConverter{
public:
MapConverter(std::map<std::string, T> const & values): values_(values){}
inline T operator()(std::string const & str){
if(values_.find(str) == values_.end())
throw InvalidOptions("value " + str + " is invalid");
return values_.at(str);
}
private:
std::map<std::string, T> values_;
};
//Read type from stream
template<class T>
class StreamConverter{
public:
T operator()(std::string const & str){
T value;
std::istringstream iss(str);
iss >> value;
return value;
}
};
//Read vector from stream
template<class T>
class StreamConverter<std::vector<T>>{
public:
std::vector<T> operator()(std::string const & str){
std::vector<T> result;
std::istringstream iss(str);
std::string token;
while(std::getline(iss, token, ','))
result.push_back(StreamConverter<T>()(token));
return result;
}
};
//Read tuple from stream
template<class... Args>
class StreamConverter<std::tuple<Args...>>{
template<size_t I, class T, class... U>
struct TupleReader{
static std::tuple<T, U...> get(std::istringstream& iss){
auto x = TupleReader<0,T>::get(iss);
auto y = TupleReader<I-1, U...>::get(iss);
return std::tuple_cat(x, y);
}
};
template<class T>
struct TupleReader<0, T>{
static std::tuple<T> get(std::istringstream& iss){
std::string token;
std::getline(iss, token, ',');
return std::make_tuple(StreamConverter<T>()(token));
}
};
public:
inline std::tuple<Args...> operator()(std::string const & str){
std::istringstream iss(str);
return TupleReader<sizeof...(Args) - 1, Args...>::get(iss);
}
};
/* Pre-defined constraints */
struct NoOp {
template<class T>
void operator()(T const &) {}
};
class SizeConstraint{
public:
SizeConstraint(size_t size): size_(size){}
template<class T>
void operator()(std::vector<T> const & x) const {
if(x.size()!=size_)
throw InvalidOptions("parameter must have size " + std::to_string(size_));
}
private:
size_t size_;
};
class OneOf{
public:
OneOf(std::vector<std::string> keys): keys_(keys){}
void operator()(std::map<std::string, void*> values){
std::vector<std::string> keys;
for(auto& x: values)
keys.push_back(x.first);
size_t found = 0;
for(auto& x: keys_)
if(std::find(keys.begin(), keys.end(), x) != keys.end())
found++;
std::string msg;
for(size_t i = 0; i < keys_.size(); ++i)
msg += (i>0?", ":"") + keys_[i];
if(found != 1)
throw InvalidOptions(std::string(found<1?"At least":"Only") + " one of the following flags must be specified: " + msg);
}
private:
std::vector<std::string> keys_;
};
/**
* @class Options
* @brief Container for multiple options
*/
class Options: public OptionBase{
public:
typedef std::function<void(std::map<std::string, void*> const &)> constraint_t;
std::map<std::string, std::string> set_to_map(std::set<std::string> const & set){
std::map<std::string, std::string> tmp;
for(std::string x: set)
tmp.insert(std::make_pair(x, x));
return tmp;
}
public:
Options(std::string const & name, std::string const & desc, OptionBase* parent): OptionBase(name, desc, false, parent)
{}
std::ostream& usage(std::ostream& os, size_t indent) const{
OptionBase::usage(os, indent);
for(auto& opt: opts_)
opt->usage(os, indent + (parent_==NULL)?0:2);
return os;
}
void parse(std::vector<std::string> const & args, std::map<std::string, void*>& values){
if(parent_==NULL || get_option(args.begin(), args.end()) != args.end()){
for(auto& opt: opts_)
opt->parse(args, values_);
for(auto& constraint: constraints_)
constraint(values_);
values[name_] = (void*)&values_;
}
}
void parse(int argc, char* argv[]){
std::vector<std::string> args(argv, argv + argc);
parse(args, values_);
}
template<class T>
void add(std::string const & name, std::string const & desc, T dft, typename Option<T>::constraint_t constraint = NoOp())
{ opts_.push_back(std::make_shared<Option<T>>(name, desc, dft, StreamConverter<T>(), constraint, this));}
template<class T>
void add(std::string const & name, std::string const & desc, typename Option<T>::constraint_t constraint = NoOp())
{ opts_.push_back(std::make_shared<Option<T>>(name, desc, false, StreamConverter<T>(), constraint, this));}
void add(std::string const & name, std::string const & desc, std::string dft, std::set<std::string> values)
{ add<std::string>(name, desc, dft, set_to_map(values)); }
void add(std::string const & name, std::string const & desc, std::set<std::string> values)
{ add<std::string>(name, desc, set_to_map(values)); }
template<class T>
void add(std::string const & name, std::string const & desc, std::string dft, std::map<std::string, T> values, typename Option<T>::constraint_t constraint = NoOp())
{ opts_.push_back(std::make_shared<Option<T>>(name, desc, values.at(dft), MapConverter<T>(values), constraint, this)); }
template<class T>
void add(std::string const & name, std::string const & desc, std::map<std::string, T> values, typename Option<T>::constraint_t constraint = NoOp())
{ opts_.push_back(std::make_shared<Option<T>>(name, desc, false, MapConverter<T>(values), constraint, this)); }
void add_switch(std::string const & name, std::string const & desc, bool dft = true)
{ opts_.push_back(std::make_shared<SwitchOption>(name, desc, dft, this)); }
void add(OptionBase* opt)
{ opts_.push_back(std::shared_ptr<OptionBase>(opt)); }
Options* add_group(std::string const & name, std::string const & desc){
opts_.push_back(std::make_shared<Options>(name, desc, this));
return (Options*)opts_.back().get();
}
void add_constraint(constraint_t const & constraint){
constraints_.push_back(constraint);
}
bool has(std::string const & name)
{ return values_.find(name) != values_.end() && values_.at(name)!=NULL; }
template<class T>
T get(std::string const & name)
{ return *((T*)values_[name]); }
private:
std::vector<std::shared_ptr<OptionBase>> opts_;
std::map<std::string, void*> values_;
std::vector<constraint_t> constraints_;
};
/* Application */
class Application{
private:
void show_help() const{
std::cerr << "Usage: " << name_ << " [OPTS]" << std::endl;
std::cerr << "Description: " << desc_ << std::endl;
opts_.usage(std::cerr, 0);
}
public:
Application(std::string const & name, std::string const & desc): name_(name), desc_(desc), opts_("root","",NULL)
{ opts_.add(new OptionHelp()); }
void parse(int argc, char* argv[]){
try{
opts_.parse(argc, argv);
}catch(InvalidOptions const & e){
std::cerr << e.what() << std::endl;
show_help();
exit(EXIT_FAILURE);
}
if(opts_.has("help")){
show_help();
exit(EXIT_FAILURE);
}
}
Options* options()
{ return &opts_; }
private:
std::string name_;
std::string desc_;
Options opts_;
};
}
#endif

View File

@@ -1,69 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <tuple>
#include "isaac/runtime/predict.h"
#include "isaac/driver/backend.h"
#include "isaac/driver/cublas.h"
#include "isaac/driver/context.h"
#include "isaac/driver/kernel.h"
#include "isaac/driver/buffer.h"
#include "isaac/driver/stream.h"
#include "isaac/tools/bench.hpp"
#include "isaac/tools/collections.hpp"
#include "isaac/templates/conv.h"
#include "isaac/templates/gemm.h"
#include "isaac/templates/pool.h"
namespace isaac{
void GEMM(driver::Device const & device, driver::Stream & stream,
DType in_dtype, DType out_dtype, IsaacOperation_t AT, IsaacOperation_t BT, param_t M, param_t N, param_t K,
param_t offa, param_t lda, param_t offb, param_t ldb, param_t offc, param_t ldc,
scalar const & alpha, driver::Buffer const & A, driver::Buffer const & B, scalar const & beta, driver::Buffer& C,
float a_scale, float b_scale, float c_scale,
driver::Buffer const *bias = NULL,
templates::GEMM* generator = NULL, size_t optimization_level = 1);
void CONV(driver::Device const &, driver::Stream & stream,
DType in_dtype, DType out_dtype, param_t N, param_t K, param_t M, param_t P, param_t Q, param_t C, param_t T, param_t R, param_t S,
param_t D, param_t H, param_t W, param_t pad_d, param_t pad_h, param_t pad_w,
param_t stride_d, param_t stride_h, param_t stride_w,
param_t upsample_d, param_t upsample_h, param_t upsample_w,
driver::Buffer const & I, driver::Buffer const & F, driver::Buffer *O, param_t num_outputs,
driver::Buffer const *bias = NULL, ActivationType activation = Linear, float alpha = 0, float iscale = 1, float fscale = 1, std::vector<float> const & oscale = {1}, float z_scale = 1,
ResidualType residual = NoResidual, param_t Zk = 0, param_t crop_z_m0 = 0, param_t crop_z_m1 = 0, param_t crop_z_p0 = 0, param_t crop_z_p1 = 0, param_t crop_z_q0 = 0, param_t crop_z_q1 = 0, driver::Buffer const *Z = NULL,
templates::Conv* generator = NULL, size_t optimization_level = 1);
void POOL(driver::Device const & device, driver::Stream & stream,
DType in_dtype, DType out_dtype, PoolType pool_type, param_t C, param_t M, param_t P, param_t Q, param_t N, param_t T, param_t R, param_t S,
param_t D, param_t H, param_t W, param_t pad_d, param_t pad_h, param_t pad_w, param_t stride_d, param_t stride_h, param_t stride_w,
driver::Buffer const & I, driver::Buffer& O,
float iscale, float oscale,
templates::Pool* generator = NULL, size_t optimization_level = 1);
void TRANSFORM(driver::Stream & stream,
DType in_dtype, DType out_dtype, param_t N, param_t C, param_t D, param_t H, param_t W,
driver::Buffer const & I, driver::Buffer& O);
}

View File

@@ -1,116 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_CL_QUEUES_H
#define ISAAC_CL_QUEUES_H
#include <map>
#include <list>
#include <vector>
namespace isaac
{
namespace driver
{
class Buffer;
class Stream;
class Device;
class Context;
class Platform;
class Module;
class Kernel;
struct backend
{
class modules
{
friend class backend;
public:
static void release();
static Module& get(Stream const & stream, std::string const & name, std::string const &src);
private:
static std::map<std::tuple<Stream, std::string>, Module * > cache_;
};
class kernels
{
friend class backend;
public:
static void release();
static Kernel & get(Module const & program, std::string const & name);
private:
static std::map<std::tuple<Module, std::string>, Kernel * > cache_;
};
class contexts
{
friend class backend;
private:
static void init(std::vector<Platform> const &);
static void release();
public:
static Context const & get_default();
template<class T>
static Context const & import(T context)
{
for(driver::Context const * x: cache_)
if((T)*x==context)
return *x;
cache_.emplace_back(new Context(context, false));
return *cache_.back();
}
static void get(std::list<Context const *> &);
private:
static std::list<Context const *> cache_;
};
class streams
{
friend class backend;
private:
static void init(std::list<Context const *> const &);
static void release();
public:
static void get(Context const &, std::vector<Stream *> &streams);
static Stream & get(Context const &, unsigned int id = 0);
static Stream & get_default();
private:
static std::map< Context, std::vector<Stream*> > cache_;
};
static void init();
static void release();
static std::vector<Device> devices();
static std::vector<Platform> platforms();
static void synchronize(Context const &);
static unsigned int default_device;
};
}
}
#endif

View File

@@ -1,54 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_BUFFER_H
#define ISAAC_DRIVER_BUFFER_H
#include "isaac/driver/handle.h"
#include "isaac/driver/context.h"
namespace isaac
{
namespace driver
{
class Stream;
// Buffer
class Buffer: public HandleInterface<Buffer, CUdeviceptr>
{
public:
Buffer(Context const & context, size_t size);
Buffer(Context const & context, CUdeviceptr cu, bool take_ownership);
void set_zero(Stream const & queue, size_t size);
Handle<CUdeviceptr> const & cu() const;
Handle<CUdeviceptr> & cu();
private:
Context context_;
Handle<CUdeviceptr> cu_;
};
}
}
#endif

View File

@@ -1,66 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_CONTEXT_H
#define ISAAC_DRIVER_CONTEXT_H
#include "isaac/driver/device.h"
#include "isaac/driver/handle.h"
namespace isaac
{
namespace driver
{
class Context: public HandleInterface<Context, CUcontext>
{
private:
static std::string get_cache_path();
static CUdevice device(CUcontext);
public:
//Constructors
explicit Context(CUcontext context, bool take_ownership = true);
explicit Context(Device const & device);
//Accessors
Device const & device() const;
std::string const & cache_path() const;
Handle<CUcontext> const & cu() const;
private:
Handle<CUcontext> cu_;
Device device_;
std::string cache_path_;
};
class ContextSwitcher{
public:
ContextSwitcher(Context const & ctx);
~ContextSwitcher();
private:
Context const & ctx_;
};
}
}
#endif

View File

@@ -1,229 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_CUBLAS_H
#define ISAAC_DRIVER_CUBLAS_H
#include "isaac/templates/common.hpp"
#include "isaac/driver/dispatch.h"
#include "isaac/driver/buffer.h"
#include "isaac/driver/stream.h"
#include "isaac/driver/backend.h"
#include "isaac/driver/error.h"
#include "isaac/tools/bench.hpp"
#include "isaac/tools/collections.hpp"
namespace isaac
{
namespace driver
{
enum cublasStrategy_t{
CUBLAS_PREFER_FASTEST,
CUBLAS_HEURISTICS
};
static const std::vector<cublasGemmAlgo_t> cublasAlgorithms = {
CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1, CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3,
CUBLAS_GEMM_ALGO4, CUBLAS_GEMM_ALGO5, CUBLAS_GEMM_ALGO6, CUBLAS_GEMM_ALGO7
};
static const std::map<DType, cudaDataType> cudtype = {{FLOAT_TYPE, CUDA_R_32F}, {DOUBLE_TYPE,CUDA_R_64F}};
static const std::map<char, cublasOperation_t> cuop = {{'N', CUBLAS_OP_N}, {'T', CUBLAS_OP_T}};
inline cublasGemmAlgo_t cublasGemmFastest(Stream& stream, cublasHandle_t handle, cudaDataType cudt, cublasOperation_t AT, cublasOperation_t BT, int32_t M, int32_t N, int32_t K,
void* alpha, CUdeviceptr A, int32_t lda, CUdeviceptr B, int32_t ldb,
void* beta, CUdeviceptr C, int32_t ldc){
typedef std::tuple<cudaDataType_t, cublasOperation_t, cublasOperation_t, int32_t, int32_t, int32_t> key_t;
// Benchmark fastest algorithm in cublasGemmEx
auto benchmark_fastest = [&](key_t const &){
std::vector<double> times;
for(cublasGemmAlgo_t a: cublasAlgorithms){
try{
times.push_back(bench([&](){ dispatch::cublasGemmEx(handle, AT, BT, M, N, K, alpha, (const void*)A, cudt, lda, (const void*)B, cudt, ldb, beta, (void*)C, cudt, ldc, cudt, a); },
[&](){ stream.synchronize(); },
stream.context().device()));
}catch(driver::exception::cublas::base const &){
times.push_back(INFINITY);
}
}
size_t argmin = std::min_element(times.begin(), times.end()) - times.begin();
return cublasAlgorithms[argmin];
};
// Cache result
static cpp::CachedMap<key_t, cublasGemmAlgo_t> cache(benchmark_fastest);
return cache.get(std::make_tuple(cudt, AT, BT, M, N, K));
}
/* Wrapper for cublasGemmEx */
inline void cublasGemmEx(cublasHandle_t handle, cudaDataType cudt, cublasOperation_t AT, cublasOperation_t BT, int32_t M, int32_t N, int32_t K,
void* alpha, CUdeviceptr A, int32_t lda, CUdeviceptr B, int32_t ldb,
void* beta, CUdeviceptr C, int32_t ldc, cublasGemmAlgo_t algo)
{ dispatch::cublasGemmEx(handle, AT, BT, M, N, K, alpha, (const void*)A, cudt, lda, (const void*)B, cudt, ldb, beta, (void*)C, cudt, ldc, cudt, algo); }
/* Simplified API for default GEMM */
inline void cublasGemm(DType dtype, Stream& stream, char cAT, char cBT, int32_t M, int32_t N, int32_t K, scalar alpha, Buffer const & A, int32_t lda, Buffer const & B, int32_t ldb, scalar beta, Buffer& C, int32_t ldc, cublasGemmAlgo_t* fastest = NULL, cublasGemmAlgo_t algo = CUBLAS_GEMM_DFALT){
ContextSwitcher ctx_switch(stream.context());
cublasHandle_t handle = dispatch::cublasHandle(stream.context());
dispatch::cublasSetStream_v2(handle, (CUstream)stream);
if(fastest)
*fastest = cublasGemmFastest(stream, handle, cudtype.at(dtype), cuop.at(cAT), cuop.at(cBT), M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc);
else
cublasGemmEx(handle, cudtype.at(dtype), cuop.at(cAT), cuop.at(cBT), M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc, algo);
}
inline cudnnDataType_t cudnnDtype(DType dtype){
switch(dtype){
case INT8X4_TYPE: return CUDNN_DATA_INT8x4;
case INT32_TYPE: return CUDNN_DATA_INT32;
case FLOAT_TYPE: return CUDNN_DATA_FLOAT;
case DOUBLE_TYPE: return CUDNN_DATA_DOUBLE;
}
throw;
}
inline cudnnTensorFormat_t format(cudnnDataType_t cutype){
switch(cutype){
case CUDNN_DATA_INT8x4: return CUDNN_TENSOR_NCHW_VECT_C;
default: return CUDNN_TENSOR_NCHW;
}
}
inline void cudnnConv(DType dtype, Stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t C, int32_t T, int32_t R, int32_t S,
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, Buffer const & I, Buffer const & F, scalar beta, Buffer const & O){
driver::Context const & ctx = stream.context();
ContextSwitcher switch_ctx(ctx);
std::vector<int> pad = {pad_d, pad_h, pad_w};
std::vector<int> stride = {stride_d, stride_h, stride_w};
std::vector<int> upscale = {1, 1, 1};
std::vector<int> Oshapes = {N, K, M, P, Q};
std::vector<int> Fshapes = {K, C, T, R, S};
std::vector<int> Ishapes = {N, C, D, H, W};
if(M == 1 && T == 1 && D == 1){
pad.erase(pad.begin());
stride.erase(stride.begin());
upscale.erase(upscale.begin());
Oshapes.erase(Oshapes.begin() + 2);
Ishapes.erase(Ishapes.begin() + 2);
Fshapes.erase(Fshapes.begin() + 2);
}
cudnnHandle_t handle = dispatch::cudnnHandle(ctx);
cudnnDataType_t in_cutype = cudnnDtype(dtype);
cudnnDataType_t conv_cutype = (dtype == INT8X4_TYPE)?CUDNN_DATA_INT32:in_cutype;
dispatch::cudnnSetStream(handle, (CUstream)stream);
cudnnTensorDescriptor_t tO, tI;
cudnnFilterDescriptor_t tF;
cudnnConvolutionDescriptor_t conv;
cudnnConvolutionFwdAlgo_t algo;
dispatch::cudnnCreateTensorDescriptor(&tO);
dispatch::cudnnCreateTensorDescriptor(&tI);
dispatch::cudnnCreateFilterDescriptor(&tF);
dispatch::cudnnSetTensorNdDescriptorEx(tO, format(in_cutype), in_cutype, Oshapes.size(), Oshapes.data());
dispatch::cudnnSetFilterNdDescriptor(tF, in_cutype, format(in_cutype), Fshapes.size(), Fshapes.data());
dispatch::cudnnSetTensorNdDescriptorEx(tI, format(in_cutype), in_cutype, Ishapes.size(), Ishapes.data());
dispatch::cudnnCreateConvolutionDescriptor(&conv);
dispatch::cudnnSetConvolutionNdDescriptor(conv, pad.size(), pad.data(), stride.data(), upscale.data(), CUDNN_CROSS_CORRELATION, conv_cutype);
dispatch::cudnnGetConvolutionForwardAlgorithm(handle, tI, tF, conv, tO, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, 1024*1024*64, &algo);
size_t workspace_size;
dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size);
static Buffer work(ctx, 1024*1024*64);
CUdeviceptr twork = work;
CUdeviceptr pI = I, pF = F, pO = O;
dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO);
}
inline void cudnnPool(DType dtype, Stream& stream, int32_t D, int32_t H, int32_t W, int32_t N, int32_t K, int32_t M, int32_t P, int32_t Q, int32_t T, int32_t R, int32_t S,
int32_t pad_d, int32_t pad_h, int32_t pad_w, int32_t stride_d, int32_t stride_h, int32_t stride_w, scalar alpha, Buffer const & I, scalar beta, Buffer const & O){
driver::Context const & ctx = stream.context();
ContextSwitcher switch_ctx(ctx);
std::vector<int> pad = {pad_d, pad_h, pad_w};
std::vector<int> stride = {stride_d, stride_h, stride_w};
std::vector<int> upscale = {1, 1, 1};
std::vector<int> Oshapes = {N, K, M, P, Q};
std::vector<int> Ishapes = {N, K, D, H, W};
std::vector<int> window = {T, R, S};
if(M == 1 && T == 1 && D == 1){
window.erase(window.begin());
pad.erase(pad.begin());
stride.erase(stride.begin());
upscale.erase(upscale.begin());
Oshapes.erase(Oshapes.begin() + 2);
Ishapes.erase(Ishapes.begin() + 2);
}
cudnnHandle_t handle = dispatch::cudnnHandle(ctx);
cudnnDataType_t cutype = cudnnDtype(dtype);
dispatch::cudnnSetStream(handle, (CUstream)stream);
cudnnTensorDescriptor_t tO, tI;
cudnnPoolingDescriptor_t desc;
dispatch::cudnnCreateTensorDescriptor(&tO);
dispatch::cudnnCreateTensorDescriptor(&tI);
dispatch::cudnnSetTensorNdDescriptorEx(tO, CUDNN_TENSOR_NCHW, cutype, Oshapes.size(), Oshapes.data());
dispatch::cudnnSetTensorNdDescriptorEx(tI, CUDNN_TENSOR_NCHW, cutype, Ishapes.size(), Ishapes.data());
dispatch::cudnnCreatePoolingDescriptor(&desc);
dispatch::cudnnSetPoolingNdDescriptor(desc, CUDNN_POOLING_MAX, CUDNN_NOT_PROPAGATE_NAN, window.size(), window.data(), pad.data(), stride.data());
CUdeviceptr pI = I, pO = O;
dispatch::cudnnPoolingForward(handle, desc, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
}
inline void cudnnTransformTensor(driver::Stream & stream,
DType in_dtype, DType out_dtype,
cudnnTensorFormat_t in_layout, cudnnTensorFormat_t out_layout,
int32_t N, int32_t C, int32_t D, int32_t H, int32_t W,
scalar alpha, driver::Buffer const & I, scalar beta, driver::Buffer& O)
{
cudnnHandle_t handle = dispatch::cudnnHandle(stream.context());
dispatch::cudnnSetStream(handle, (CUstream)stream);
cudnnTensorDescriptor_t tO, tI;
std::vector<int> shapes = {N, C, D, H, W};
dispatch::cudnnCreateTensorDescriptor(&tI);
dispatch::cudnnSetTensorNdDescriptorEx(tI, in_layout, cudnnDtype(in_dtype), shapes.size(), shapes.data());
dispatch::cudnnCreateTensorDescriptor(&tO);
dispatch::cudnnSetTensorNdDescriptorEx(tO, out_layout, cudnnDtype(out_dtype), shapes.size(), shapes.data());
CUdeviceptr pI = I, pO = O;
dispatch::cudnnTransformTensor(handle, alpha.data(), tI, (void*)pI, beta.data(), tO, (void*)pO);
}
}
}
#endif

View File

@@ -1,98 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_DEVICE_H
#define ISAAC_DRIVER_DEVICE_H
#include "isaac/driver/platform.h"
#include "isaac/driver/handle.h"
namespace isaac
{
namespace driver
{
// Device
class Device: public HandleInterface<Device, CUdevice>
{
public:
//Supported architectures
enum class Architecture{
//NVidia
SM_2_0,
SM_2_1,
SM_3_0,
SM_3_5,
SM_3_7,
SM_5_0,
SM_5_2,
SM_6_0,
SM_6_1,
SM_7_0,
UNKNOWN
};
private:
//Metaprogramming elper to get cuda info from attribute
template<CUdevice_attribute attr>
int cuGetInfo() const;
inline Architecture nv_arch(std::pair<unsigned int, unsigned int> sm) const;
inline nvmlDevice_t nvml_device() const;
public:
Device(CUdevice cu = CUdevice(), bool take_ownership = true): cu_(cu, take_ownership){}
//Accessors
Architecture architecture() const;
Handle<CUdevice> const & cu() const;
//Informations
std::string infos() const;
size_t address_bits() const;
driver::Platform platform() const;
std::vector<size_t> max_block_dim() const;
size_t max_threads_per_block() const;
size_t max_shared_memory() const;
size_t warp_size() const;
//Compute Capability
void interpret_as(std::pair<size_t, size_t> cc);
std::pair<size_t, size_t> compute_capability() const;
//Identifier
std::string name() const;
std::string pci_bus_id() const;
//Clocks
size_t current_sm_clock() const;
size_t current_mem_clock() const;
size_t max_sm_clock() const;
size_t max_mem_clock() const;
private:
Handle<CUdevice> cu_;
std::shared_ptr<std::pair<size_t, size_t>> interpreted_as_;
};
}
}
#endif

View File

@@ -1,258 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_DISPATCHER_H
#define ISAAC_DRIVER_DISPATCHER_H
#include <type_traits>
#include <dlfcn.h>
//CUDA Backend
#include "isaac/external/CUDA/cuda.h"
#include "isaac/external/CUDA/nvrtc.h"
#include "isaac/external/CUDA/cublas_v2.h"
#include "isaac/external/CUDA/cudnn.h"
#include "isaac/external/CUDA/nvml.h"
//Exceptions
#include <iostream>
#include <stdexcept>
namespace isaac
{
namespace driver
{
class Context;
template<class T> void check(T){}
void check(nvrtcResult err);
void check(CUresult err);
void check(cublasStatus_t err);
void check(cudnnStatus_t err);
class dispatch
{
private:
template <class F>
struct return_type;
template <class R, class... A>
struct return_type<R (*)(A...)>
{ typedef R type; };
typedef bool (*f_init_t)();
template<f_init_t initializer, typename FunPtrT, typename... Args>
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
{
initializer();
if(cache == nullptr){
cache = dlsym(lib_h, name);
if(cache == 0)
throw std::runtime_error("dlsym unable to load function");
}
FunPtrT fptr;
*reinterpret_cast<void **>(&fptr) = cache;
typename return_type<FunPtrT>::type res = (*fptr)(args...);
check(res);
return res;
}
public:
static bool nvrtcinit();
static bool nvmlinit();
static bool cuinit();
static bool cublasinit();
static bool cudnninit();
static void release();
//CUDA
static CUresult cuCtxGetCurrent(CUcontext *pctx);
static CUresult cuCtxSetCurrent(CUcontext ctx);
static CUresult cuCtxDestroy_v2(CUcontext ctx);
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
static CUresult cuMemFree_v2(CUdeviceptr dptr);
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
static CUresult cuDriverGetVersion(int *driverVersion);
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
static CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t* bytes, CUmodule hmod, const char *name);
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
static CUresult cuModuleUnload(CUmodule hmod);
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
static CUresult cuDeviceGetCount(int *count);
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
static CUresult cuInit(unsigned int Flags);
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
static CUresult cuStreamSynchronize(CUstream hStream);
static CUresult cuStreamDestroy_v2(CUstream hStream);
static CUresult cuEventDestroy_v2(CUevent hEvent);
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
static CUresult cuCtxGetDevice(CUdevice* result);
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
static cublasHandle_t cublasHandle(Context const & ctx);
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
static cublasStatus_t cublasSgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
static cublasStatus_t cublasDgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc);
static cublasStatus_t cublasGemmEx(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const void *alpha, const void *A, cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, const void *beta, void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, cublasGemmAlgo_t algo);
static cudnnHandle_t cudnnHandle(Context const & ctx);
static cudnnStatus_t cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
static cudnnStatus_t cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
static cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
static cudnnStatus_t cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
static cudnnStatus_t cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int k, int c, int h, int w);
static cudnnStatus_t cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]);
static cudnnStatus_t cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int nbDims, const int filterDimA[]);
static cudnnStatus_t cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, int pad_h, int pad_w, int u, int v, int upscalex, int upscaley, cudnnConvolutionMode_t mode);
static cudnnStatus_t cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, int arrayLength, const int padA[], const int filterStrideA[], const int upscaleA[], cudnnConvolutionMode_t mode, cudnnDataType_t dataType);
static cudnnStatus_t cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]);
static cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, cudnnConvolutionFwdAlgo_t *algo);
static cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t *sizeInBytes);
static cudnnStatus_t cudnnConvolutionForward(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const cudnnFilterDescriptor_t wDesc, const void *w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void *workSpace, size_t workSpaceSizeInBytes, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
static cudnnStatus_t cudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
static cudnnStatus_t cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
static cudnnStatus_t cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
private:
static void* cuda_;
static void* nvrtc_;
static void* nvml_;
static void* cublas_;
static void* cudnn_;
//CUDA
static void* cuCtxGetCurrent_;
static void* cuCtxSetCurrent_;
static void* cuCtxDestroy_v2_;
static void* cuEventCreate_;
static void* cuDeviceGet_;
static void* cuMemcpyDtoH_v2_;
static void* cuStreamCreate_;
static void* cuEventElapsedTime_;
static void* cuMemFree_v2_;
static void* cuMemcpyDtoHAsync_v2_;
static void* cuDriverGetVersion_;
static void* cuDeviceGetName_;
static void* cuDeviceGetPCIBusId_;
static void* cuModuleGetGlobal_v2_;
static void* cuMemcpyHtoDAsync_v2_;
static void* cuModuleLoad_;
static void* cuLaunchKernel_;
static void* cuModuleUnload_;
static void* cuModuleLoadDataEx_;
static void* cuDeviceGetAttribute_;
static void* cuDeviceGetCount_;
static void* cuMemcpyHtoD_v2_;
static void* cuInit_;
static void* cuEventRecord_;
static void* cuCtxCreate_v2_;
static void* cuModuleGetFunction_;
static void* cuStreamSynchronize_;
static void* cuStreamDestroy_v2_;
static void* cuEventDestroy_v2_;
static void* cuMemAlloc_v2_;
static void* cuPointerGetAttribute_;
static void* cuCtxGetDevice_;
static void* cuMemsetD8Async_;
static void* cuCtxPushCurrent_v2_;
static void* cuCtxPopCurrent_v2_;
static void* nvmlInit_v2_;
static void* nvmlDeviceGetHandleByPciBusId_v2_;
static void* nvmlDeviceGetClockInfo_;
static void* nvmlDeviceGetMaxClockInfo_;
static void* nvrtcCompileProgram_;
static void* nvrtcGetProgramLogSize_;
static void* nvrtcGetPTX_;
static void* nvrtcGetPTXSize_;
static void* nvrtcCreateProgram_;
static void* nvrtcGetProgramLog_;
static void* cublasCreate_v2_;
static void* cublasGetStream_v2_;
static void* cublasSetStream_v2_;
static void* cublasHgemm_;
static void* cublasSgemm_v2_;
static void* cublasDgemm_v2_;
static void* cublasGemmEx_;
static void* cudnnCreateConvolutionDescriptor_;
static void* cudnnCreatePoolingDescriptor_;
static void* cudnnCreateTensorDescriptor_;
static void* cudnnCreateFilterDescriptor_;
static void* cudnnCreate_;
static void* cudnnSetTensor4dDescriptor_;
static void* cudnnSetFilter4dDescriptor_;
static void* cudnnSetTensorNdDescriptorEx_;
static void* cudnnSetFilterNdDescriptor_;
static void* cudnnSetConvolution2dDescriptor_;
static void* cudnnSetConvolutionNdDescriptor_;
static void* cudnnSetPoolingNdDescriptor_;
static void* cudnnGetConvolutionForwardAlgorithm_;
static void* cudnnGetConvolutionForwardWorkspaceSize_;
static void* cudnnConvolutionForward_;
static void* cudnnPoolingForward_;
static void* cudnnSetStream_;
static void* cudnnTransformTensor_;
};
}
}
#endif

View File

@@ -1,228 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_EXCEPTION_DRIVER_H
#define ISAAC_EXCEPTION_DRIVER_H
#include <exception>
#include "isaac/driver/dispatch.h"
namespace isaac
{
namespace driver
{
namespace exception
{
namespace nvrtc
{
#define ISAAC_CREATE_NVRTC_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "NVRTC: Error- " msg; } }
ISAAC_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
ISAAC_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
ISAAC_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
ISAAC_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
ISAAC_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
#undef ISAAC_CREATE_NVRTC_EXCEPTION
}
namespace cuda
{
class base: public std::exception{};
#define ISAAC_CREATE_CUDA_EXCEPTION(name, msg) class name: public base { public:const char * what() const throw(){ return "CUDA: Error- " msg; } }
ISAAC_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
ISAAC_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
ISAAC_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
ISAAC_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
ISAAC_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
ISAAC_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
ISAAC_CREATE_CUDA_EXCEPTION(no_device ,"no device");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
ISAAC_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
ISAAC_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
ISAAC_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
ISAAC_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
ISAAC_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
ISAAC_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
ISAAC_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
ISAAC_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
ISAAC_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
ISAAC_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
ISAAC_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
ISAAC_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
ISAAC_CREATE_CUDA_EXCEPTION(not_found ,"not found");
ISAAC_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
ISAAC_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
ISAAC_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
ISAAC_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
ISAAC_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
ISAAC_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
ISAAC_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
ISAAC_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
ISAAC_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
ISAAC_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
ISAAC_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
ISAAC_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
ISAAC_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
ISAAC_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
ISAAC_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
ISAAC_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
ISAAC_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
#undef ISAAC_CREATE_CUDA_EXCEPTION
}
namespace cublas
{
class base: public std::exception{};
#define ISAAC_CREATE_CUBLAS_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "CUBLAS: Error- " msg; } }
ISAAC_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
ISAAC_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
ISAAC_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
ISAAC_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
ISAAC_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
ISAAC_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
ISAAC_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
ISAAC_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
ISAAC_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
ISAAC_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
#undef ISAAC_CREATE_CUBLAS_EXCEPTION
}
namespace cudnn
{
#define ISAAC_CREATE_CUDNN_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "CUDNN: Error- " msg; } }
ISAAC_CREATE_CUDNN_EXCEPTION(not_initialized ,"not initialized");
ISAAC_CREATE_CUDNN_EXCEPTION(alloc_failed ,"allocation failed");
ISAAC_CREATE_CUDNN_EXCEPTION(bad_param ,"bad param");
ISAAC_CREATE_CUDNN_EXCEPTION(internal_error ,"internal error");
ISAAC_CREATE_CUDNN_EXCEPTION(invalid_value ,"invalid value");
ISAAC_CREATE_CUDNN_EXCEPTION(arch_mismatch ,"arch mismatch");
ISAAC_CREATE_CUDNN_EXCEPTION(mapping_error ,"mapping error");
ISAAC_CREATE_CUDNN_EXCEPTION(execution_failed ,"execution failed");
ISAAC_CREATE_CUDNN_EXCEPTION(not_supported ,"not supported");
ISAAC_CREATE_CUDNN_EXCEPTION(license_error ,"license error");
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_prerequisite_missing ,"prerequisite missing");
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_in_progress ,"runtime in progress");
ISAAC_CREATE_CUDNN_EXCEPTION(runtime_fp_overflow ,"runtime fp overflow");
}
namespace ocl
{
class base: public std::exception{};
#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } }
ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found");
ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available");
ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available");
ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure");
ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources");
ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory");
ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available");
ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap");
ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch");
ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported");
ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure");
ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure");
ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value");
ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type");
ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform");
ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device");
ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context");
ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties");
ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue");
ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer");
ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object");
ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor");
ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size");
ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler");
ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary");
ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options");
ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program");
ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable");
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name");
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition");
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel");
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index");
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value");
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size");
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args");
ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension");
ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size");
ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size");
ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset");
ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list");
ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event");
ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation");
ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object");
ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size");
ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level");
ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size");
#ifdef CL_INVALID_PROPERTY
ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property");
#endif
}
}
}
}
#endif

View File

@@ -1,49 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_EVENT_H
#define ISAAC_DRIVER_EVENT_H
#include "isaac/driver/handle.h"
namespace isaac
{
namespace driver
{
// Event
class Event: public HandleInterface<Event, cu_event_t>
{
public:
float elapsed_time() const;
Handle<cu_event_t> const & cu() const;
private:
Handle<cu_event_t> cu_;
};
}
}
#endif

View File

@@ -1,82 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_HANDLE_H
#define ISAAC_DRIVER_HANDLE_H
#include <memory>
#include <iostream>
#include <functional>
#include <type_traits>
#include "isaac/driver/dispatch.h"
namespace isaac
{
namespace driver
{
struct cu_event_t{
operator bool() const { return first && second; }
CUevent first;
CUevent second;
};
struct cu_platform{
cu_platform() : status_(dispatch::cuInit(0)) { }
operator bool() const { return status_; }
private:
CUresult status_;
};
template<class T, class CUType>
class HandleInterface{
public:
//Accessors
operator CUType() const { return *(((T*)this)->cu().h_); }
//Comparison
bool operator==(HandleInterface const & y) { return (CUType)(*this) == (CUType)(y); }
bool operator!=(HandleInterface const & y) { return (CUType)(*this) != (CUType)(y); }
bool operator<(HandleInterface const & y) { return (CUType)(*this) < (CUType)(y); }
};
template<class CUType>
class Handle{
public:
template<class, class> friend class HandleInterface;
public:
//Constructors
Handle(CUType cu = CUType(), bool take_ownership = true);
~Handle();
CUType& operator*() { return *h_; }
CUType const & operator*() const { return *h_; }
CUType* operator->() const { return h_.get(); }
protected:
std::shared_ptr<CUType> h_;
bool has_ownership_;
};
}
}
#endif

View File

@@ -1,68 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_KERNEL_H
#define ISAAC_DRIVER_KERNEL_H
#include "isaac/driver/module.h"
#include "isaac/driver/handle.h"
#include <memory>
namespace isaac
{
namespace driver
{
class Buffer;
// Kernel
class Kernel: public HandleInterface<Kernel, CUfunction>
{
public:
//Constructors
Kernel(Module const & program, const char * name);
//Accessors
Handle<CUfunction> const & cu() const;
Module const & module() const;
//Arguments setters
void setArg(unsigned int index, std::size_t size, void* ptr);
void setArg(unsigned int index, Buffer const &);
template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); }
//Arguments getters
void* const* cu_params() const;
private:
Handle<CUfunction> cu_;
Module program_;
unsigned int address_bits_;
std::vector<std::shared_ptr<void> > cu_params_store_;
std::vector<void*> cu_params_;
};
}
}
#endif

View File

@@ -1,61 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_MODULE_H
#define ISAAC_DRIVER_MODULE_H
#include <map>
#include "isaac/driver/handle.h"
#include "isaac/driver/context.h"
#include "isaac/driver/buffer.h"
namespace isaac
{
namespace driver
{
class Context;
class Device;
class Module: public HandleInterface<Module, CUmodule>
{
static std::string header(Device const & device);
public:
Module(Context const & context, std::string const & source);
Context const & context() const;
Handle<CUmodule> const & cu() const;
Buffer symbol(const char * name) const;
private:
Handle<CUmodule> cu_;
Context context_;
std::string source_;
};
}
}
#endif

View File

@@ -1,54 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_PLATFORM_H
#define ISAAC_DRIVER_PLATFORM_H
#include <vector>
#include <string>
#include "isaac/driver/handle.h"
namespace isaac
{
namespace driver
{
class Device;
class Platform
{
public:
//Accessors
std::string name() const;
std::string version() const;
std::vector<Device> devices() const;
private:
Handle<cu_platform> cu_;
};
}
}
#endif

View File

@@ -1,82 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_DRIVER_STREAM_H
#define ISAAC_DRIVER_STREAM_H
#include <map>
#include "isaac/driver/context.h"
#include "isaac/driver/device.h"
#include "isaac/driver/handle.h"
#include "isaac/driver/buffer.h"
namespace isaac
{
namespace driver
{
class Kernel;
class Event;
class Range;
class Buffer;
// Command Queue
class Stream: public HandleInterface<Stream, CUstream>
{
public:
//Constructors
Stream(CUstream stream, bool take_ownership);
Stream(Context const & context);
//Accessors
Handle<CUstream> const & cu() const;
Context const & context() const;
//Synchronize
void synchronize();
//Enqueue
void enqueue(Kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
// Write
void write(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
template<class T> void write(Buffer const & buffer, bool blocking, std::size_t offset, std::vector<T> const & x)
{ write(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
// Read
void read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
template<class T> void read(Buffer const & buffer, bool blocking, std::size_t offset, std::vector<T>& x)
{ read(buffer, blocking, offset, x.size()*sizeof(T), x.data()); }
private:
Context context_;
Handle<CUstream> cu_;
};
}
}
#endif

View File

@@ -1,64 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "device_types.h"
#if !defined(__CUDACC_RTC__)
#define EXCLUDE_FROM_RTC
#include "driver_types.h"
#undef EXCLUDE_FROM_RTC
#endif /* !__CUDACC_RTC__ */
#include "surface_types.h"
#include "texture_types.h"
#include "vector_types.h"

View File

@@ -1,412 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__CHANNEL_DESCRIPTOR_H__)
#define __CHANNEL_DESCRIPTOR_H__
#if defined(__cplusplus)
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "driver_types.h"
#include "cuda_runtime_api.h"
#include "host_defines.h"
#include "vector_types.h"
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
/**
* \addtogroup CUDART_HIGHLEVEL
*
* @{
*/
/**
* \brief \hl Returns a channel descriptor using the specified format
*
* Returns a channel descriptor with format \p f and number of bits of each
* component \p x, \p y, \p z, and \p w. The ::cudaChannelFormatDesc is
* defined as:
* \code
struct cudaChannelFormatDesc {
int x, y, z, w;
enum cudaChannelFormatKind f;
};
* \endcode
*
* where ::cudaChannelFormatKind is one of ::cudaChannelFormatKindSigned,
* ::cudaChannelFormatKindUnsigned, or ::cudaChannelFormatKindFloat.
*
* \return
* Channel descriptor with format \p f
*
* \sa \ref ::cudaCreateChannelDesc(int,int,int,int,cudaChannelFormatKind) "cudaCreateChannelDesc (Low level)",
* ::cudaGetChannelDesc, ::cudaGetTextureReference,
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture (High level)",
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, size_t) "cudaBindTexture (High level, inherited channel descriptor)",
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t) "cudaBindTexture2D (High level)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t, const struct cudaChannelFormatDesc&) "cudaBindTextureToArray (High level)",
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t) "cudaBindTextureToArray (High level, inherited channel descriptor)",
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cudaUnbindTexture (High level)",
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, dim, readMode>&) "cudaGetTextureAlignmentOffset (High level)"
*/
template<class T> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc(void)
{
return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
}
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf1(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf2(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
}
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf4(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char>(void)
{
int e = (int)sizeof(char) * 8;
#if defined(_CHAR_UNSIGNED) || defined(__CHAR_UNSIGNED__)
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
#else /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
#endif /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<signed char>(void)
{
int e = (int)sizeof(signed char) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned char>(void)
{
int e = (int)sizeof(unsigned char) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char1>(void)
{
int e = (int)sizeof(signed char) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar1>(void)
{
int e = (int)sizeof(unsigned char) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char2>(void)
{
int e = (int)sizeof(signed char) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar2>(void)
{
int e = (int)sizeof(unsigned char) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char4>(void)
{
int e = (int)sizeof(signed char) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar4>(void)
{
int e = (int)sizeof(unsigned char) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short>(void)
{
int e = (int)sizeof(short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned short>(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short1>(void)
{
int e = (int)sizeof(short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort1>(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short2>(void)
{
int e = (int)sizeof(short) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort2>(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short4>(void)
{
int e = (int)sizeof(short) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort4>(void)
{
int e = (int)sizeof(unsigned short) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int>(void)
{
int e = (int)sizeof(int) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned int>(void)
{
int e = (int)sizeof(unsigned int) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int1>(void)
{
int e = (int)sizeof(int) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint1>(void)
{
int e = (int)sizeof(unsigned int) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int2>(void)
{
int e = (int)sizeof(int) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint2>(void)
{
int e = (int)sizeof(unsigned int) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int4>(void)
{
int e = (int)sizeof(int) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint4>(void)
{
int e = (int)sizeof(unsigned int) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}
#if !defined(__LP64__)
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long>(void)
{
int e = (int)sizeof(long) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned long>(void)
{
int e = (int)sizeof(unsigned long) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long1>(void)
{
int e = (int)sizeof(long) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong1>(void)
{
int e = (int)sizeof(unsigned long) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long2>(void)
{
int e = (int)sizeof(long) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong2>(void)
{
int e = (int)sizeof(unsigned long) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long4>(void)
{
int e = (int)sizeof(long) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong4>(void)
{
int e = (int)sizeof(unsigned long) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}
#endif /* !__LP64__ */
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float>(void)
{
int e = (int)sizeof(float) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float1>(void)
{
int e = (int)sizeof(float) * 8;
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float2>(void)
{
int e = (int)sizeof(float) * 8;
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
}
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float4>(void)
{
int e = (int)sizeof(float) * 8;
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
}
#endif /* __cplusplus */
/** @} */
/** @} */ /* END CUDART_TEXTURE_HL */
#endif /* !__CHANNEL_DESCRIPTOR_H__ */

View File

@@ -1,266 +0,0 @@
/*
* Copyright 1993-2016 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__HOST_CONFIG_H__)
#define __HOST_CONFIG_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#if defined(__CUDACC__)
#if defined(__CUDACC_RTC__)
#define _CRTIMP
#define __THROW
#else /* __CUDACC_RTC__ */
/* check for host compilers that are compatible with nvcc */
#if !defined(__GNUC__) && !defined(_WIN32)
#error --- !!! UNSUPPORTED COMPILER !!! ---
#endif /* !__GNUC__ && !_WIN32 */
#if defined(__ICC)
#if (__ICC != 1500 && __ICC != 1600 && __ICC != 1700) || !defined(__GNUC__) || !defined(__LP64__)
#error -- unsupported ICC configuration! Only ICC 15.0, ICC 16.0, and ICC 17.0 on Linux x86_64 are supported!
#endif /* (__ICC != 1500 && __ICC != 1600 && __ICC != 17.0) || !__GNUC__ || !__LP64__ */
#endif /* __ICC */
#if defined(__PGIC__)
#if (!(__PGIC__ == 17) && \
!(__PGIC__ == 99 && __PGIC_MINOR__ == 99)) || \
!defined(__GNUC__) || !defined(__LP64__)
#error -- unsupported pgc++ configuration! Only pgc++ 17 on Linux x86_64 is supported!
#endif /* (!(__PGIC__ == 17) &&
!(__PGIC__ == 99 && __PGIC_MINOR__ == 99 )) ||
!__GNUC__ || !__LP64__ */
#endif /* __PGIC__ */
#if defined(__powerpc__)
#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
#if defined(__ibmxl_vrm__) && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000)
#error -- unsupported xlC version! only xlC 13.1 is supported
#endif /* __ibmxl_vrm__ && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000) */
#endif /* __powerpc__ */
#if defined(__GNUC__)
#if __GNUC__ > 6
#error -- unsupported GNU version! gcc versions later than 6 are not supported!
#endif /* __GNUC__ > 6 */
#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
#error -- clang and clang++ are the only supported host compilers on Mac OS X!
#endif /* __APPLE__ && __MACH__ && !__clang__ */
#endif /* __GNUC__ */
#if defined(_WIN32)
#if _MSC_VER < 1600 || _MSC_VER > 1911
#error -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!
#elif _MSC_VER == 1600 /* _MSC_VERION == 1600 */
#pragma message("support for Microsoft Visual Studio 2010 has been deprecated!")
#endif /* _MSC_VER < 1600 || _MSC_VER > 1800 || _MSC_VERSION == 1600 */
#endif /* _WIN32 */
/* configure host compiler */
#if defined(__APPLE__)
#define _CRTIMP
#define _ACRTIMP
#define __THROW
#if defined(__BLOCKS__) /* nvcc does not support closures */
#undef __BLOCKS__
#endif /* __BLOCKS__ */
#elif defined(__ANDROID__)
#define _CRTIMP
#define _ACRTIMP
#define __THROW
#elif defined(__QNX__)
#define _CRTIMP
#define _ACRTIMP
#define __THROW
#elif defined(__HORIZON__)
#define _CRTIMP
#define _ACRTIMP
#define __THROW
#elif defined(__GNUC__)
#define _CRTIMP
#define _ACRTIMP
#include <features.h> /* for __THROW */
#elif defined(_WIN32)
#if _MSC_VER >= 1500
#undef _USE_DECLSPECS_FOR_SAL
#define _USE_DECLSPECS_FOR_SAL \
1
#endif /* _MSC_VER >= 1500 */
#if !defined(_CRT_NONSTDC_NO_WARNINGS)
#define _CRT_NONSTDC_NO_WARNINGS /* to suppress warnings */
#endif /* !_CRT_NONSTDC_NO_WARNINGS */
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS /* to suppress warnings */
#endif /* !_CRT_SECURE_NO_WARNINGS */
#if !defined(NOMINMAX)
#define NOMINMAX /* min and max are part of cuda runtime */
#endif /* !NOMINMAX */
#include <crtdefs.h> /* for _CRTIMP */
#if _MSC_VER >= 1900
#include <corecrt.h> /* for _ACRTIMP */
#endif /* _MSC_VER >= 1900 */
#define __THROW
#endif /* __APPLE__ */
#endif /* __CUDACC_RTC__ */
#if defined(__cplusplus) && defined(__CUDA_ARCH__) && (defined(__PGIC__) || defined(__CUDACC_RTC__) || (defined(_WIN32) && defined(_MSC_VER)))
#if __CUDACC_RTC__
typedef char *va_list;
#else /* !__CUDACC_RTC__ */
#include <cstdarg>
#endif /* __CUDACC_RTC__ */
#undef va_start
#undef va_end
#undef va_arg
#ifdef __PGIC__
#undef __builtin_va_end
#define va_start(v,l) __builtin_alt_va_start(v,l)
#define va_end(v) __builtin_va_end(v)
#define va_arg(v,l) __builtin_alt_va_arg(v,l)
#if (__cplusplus >= 201103L)
#undef va_copy
#define va_copy(d,s) __builtin_va_copy(d,s)
#endif
#else /* !__PGIC__ */
#define va_start(ap, x) (__cu_va_start(&ap, x))
#define va_end(ap) (__cu_va_end(&ap))
#define va_arg(ap, t) (*((t *)__cu_va_arg(&ap, (t *)0)))
#if (_MSC_VER >= 1800) || (defined(__CUDACC_RTC__) && (__cplusplus >= 201103L))
#undef va_copy
#define va_copy(apd, aps) (__cu_va_copy(&(apd), &(aps)))
#endif /* (_MSC_VER >= 1800) || (defined(__CUDACC_RTC__) && (__cplusplus >= 201103L)) */
#endif /* __PGIC__ */
#endif /* defined(__cplusplus) && (defined(__CUDACC_RTC__) || (defined(_WIN32) && defined(_MSC_VER))) */
#endif /* __CUDACC__ */
#endif /* !__HOST_CONFIG_H__ */

View File

@@ -1,216 +0,0 @@
/*
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__HOST_DEFINES_H__)
#define __HOST_DEFINES_H__
/* CUDA JIT mode (__CUDACC_RTC__) also uses GNU style attributes */
#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)
#if defined(__CUDACC_RTC__)
#define __volatile__ volatile
#endif /* __CUDACC_RTC__ */
#define __no_return__ \
__attribute__((noreturn))
#if defined(__CUDACC__) || defined(__CUDA_ARCH__) || defined(__CUDA_LIBDEVICE__)
/* gcc allows users to define attributes with underscores,
e.g., __attribute__((__noinline__)).
Consider a non-CUDA source file (e.g. .cpp) that has the
above attribute specification, and includes this header file. In that case,
defining __noinline__ as below would cause a gcc compilation error.
Hence, only define __noinline__ when the code is being processed
by a CUDA compiler component.
*/
#define __noinline__ \
__attribute__((noinline))
#endif /* __CUDACC__ || __CUDA_ARCH__ || __CUDA_LIBDEVICE__ */
#define __forceinline__ \
__inline__ __attribute__((always_inline))
#define __align__(n) \
__attribute__((aligned(n)))
#define __thread__ \
__thread
#define __import__
#define __export__
#define __cdecl
#define __annotate__(a) \
__attribute__((a))
#define __location__(a) \
__annotate__(a)
#define CUDARTAPI
#elif defined(_MSC_VER)
#if _MSC_VER >= 1400
#define __restrict__ \
__restrict
#else /* _MSC_VER >= 1400 */
#define __restrict__
#endif /* _MSC_VER >= 1400 */
#define __inline__ \
__inline
#define __no_return__ \
__declspec(noreturn)
#define __noinline__ \
__declspec(noinline)
#define __forceinline__ \
__forceinline
#define __align__(n) \
__declspec(align(n))
#define __thread__ \
__declspec(thread)
#define __import__ \
__declspec(dllimport)
#define __export__ \
__declspec(dllexport)
#define __annotate__(a) \
__declspec(a)
#define __location__(a) \
__annotate__(__##a##__)
#define CUDARTAPI \
__stdcall
#else /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
#define __inline__
#if !defined(__align__)
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for '__align__' !!! ---
#endif /* !__align__ */
#if !defined(CUDARTAPI)
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for 'CUDARTAPI' !!! ---
#endif /* !CUDARTAPI */
#endif /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
#if (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !defined(__clang__)))) || \
(defined(_MSC_VER) && _MSC_VER < 1900) || \
(!defined(__GNUC__) && !defined(_MSC_VER))
#define __specialization_static \
static
#else /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
(_MSC_VER && _MSC_VER < 1900) ||
(!__GNUC__ && !_MSC_VER) */
#define __specialization_static
#endif /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
(_MSC_VER && _MSC_VER < 1900) ||
(!__GNUC__ && !_MSC_VER) */
#if !defined(__CUDACC__) && !defined(__CUDA_LIBDEVICE__)
#undef __annotate__
#define __annotate__(a)
#else /* !__CUDACC__ && !__CUDA_LIBDEVICE__ */
#define __launch_bounds__(...) \
__annotate__(launch_bounds(__VA_ARGS__))
#endif /* !__CUDACC__ && !__CUDA_LIBDEVICE__ */
#if defined(__CUDACC__) || defined(__CUDA_LIBDEVICE__) || \
defined(__GNUC__) || defined(_WIN64)
#define __builtin_align__(a) \
__align__(a)
#else /* __CUDACC__ || __CUDA_LIBDEVICE__ || __GNUC__ || _WIN64 */
#define __builtin_align__(a)
#endif /* __CUDACC__ || __CUDA_LIBDEVICE__ || __GNUC__ || _WIN64 */
#define __host__ \
__location__(host)
#define __device__ \
__location__(device)
#define __global__ \
__location__(global)
#define __shared__ \
__location__(shared)
#define __constant__ \
__location__(constant)
#define __managed__ \
__location__(managed)
#if !defined(__CUDACC__)
#define __device_builtin__
#define __device_builtin_texture_type__
#define __device_builtin_surface_type__
#define __cudart_builtin__
#else /* defined(__CUDACC__) */
#define __device_builtin__ \
__location__(device_builtin)
#define __device_builtin_texture_type__ \
__location__(device_builtin_texture_type)
#define __device_builtin_surface_type__ \
__location__(device_builtin_surface_type)
#define __cudart_builtin__ \
__location__(cudart_builtin)
#endif /* !defined(__CUDACC__) */
#endif /* !__HOST_DEFINES_H__ */

View File

@@ -1,338 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(CU_COMPLEX_H_)
#define CU_COMPLEX_H_
/* When trying to include C header file in C++ Code extern "C" is required
* But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
* extern "C" cannot be nested
* Hence keep the header out of extern "C" block
*/
#include <math.h> /* import fabsf, sqrt */
#if defined(__cplusplus)
extern "C" {
#endif /* __cplusplus */
#include "vector_types.h"
typedef float2 cuFloatComplex;
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
{
return x.x;
}
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
{
return x.y;
}
__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
(float r, float i)
{
cuFloatComplex res;
res.x = r;
res.y = i;
return res;
}
__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComplex x)
{
return make_cuFloatComplex (cuCrealf(x), -cuCimagf(x));
}
__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComplex x,
cuFloatComplex y)
{
return make_cuFloatComplex (cuCrealf(x) + cuCrealf(y),
cuCimagf(x) + cuCimagf(y));
}
__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComplex x,
cuFloatComplex y)
{
return make_cuFloatComplex (cuCrealf(x) - cuCrealf(y),
cuCimagf(x) - cuCimagf(y));
}
/* This implementation could suffer from intermediate overflow even though
* the final result would be in range. However, various implementations do
* not guard against this (presumably to avoid losing performance), so we
* don't do it either to stay competitive.
*/
__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComplex x,
cuFloatComplex y)
{
cuFloatComplex prod;
prod = make_cuFloatComplex ((cuCrealf(x) * cuCrealf(y)) -
(cuCimagf(x) * cuCimagf(y)),
(cuCrealf(x) * cuCimagf(y)) +
(cuCimagf(x) * cuCrealf(y)));
return prod;
}
/* This implementation guards against intermediate underflow and overflow
* by scaling. Such guarded implementations are usually the default for
* complex library implementations, with some also offering an unguarded,
* faster version.
*/
__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComplex x,
cuFloatComplex y)
{
cuFloatComplex quot;
float s = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y));
float oos = 1.0f / s;
float ars = cuCrealf(x) * oos;
float ais = cuCimagf(x) * oos;
float brs = cuCrealf(y) * oos;
float bis = cuCimagf(y) * oos;
s = (brs * brs) + (bis * bis);
oos = 1.0f / s;
quot = make_cuFloatComplex (((ars * brs) + (ais * bis)) * oos,
((ais * brs) - (ars * bis)) * oos);
return quot;
}
/*
* We would like to call hypotf(), but it's not available on all platforms.
* This discrete implementation guards against intermediate underflow and
* overflow by scaling. Otherwise we would lose half the exponent range.
* There are various ways of doing guarded computation. For now chose the
* simplest and fastest solution, however this may suffer from inaccuracies
* if sqrt and division are not IEEE compliant.
*/
__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
{
float a = cuCrealf(x);
float b = cuCimagf(x);
float v, w, t;
a = fabsf(a);
b = fabsf(b);
if (a > b) {
v = a;
w = b;
} else {
v = b;
w = a;
}
t = w / v;
t = 1.0f + t * t;
t = v * sqrtf(t);
if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) {
t = v + w;
}
return t;
}
/* Double precision */
typedef double2 cuDoubleComplex;
__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
{
return x.x;
}
__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
{
return x.y;
}
__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
(double r, double i)
{
cuDoubleComplex res;
res.x = r;
res.y = i;
return res;
}
__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComplex x)
{
return make_cuDoubleComplex (cuCreal(x), -cuCimag(x));
}
__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComplex x,
cuDoubleComplex y)
{
return make_cuDoubleComplex (cuCreal(x) + cuCreal(y),
cuCimag(x) + cuCimag(y));
}
__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComplex x,
cuDoubleComplex y)
{
return make_cuDoubleComplex (cuCreal(x) - cuCreal(y),
cuCimag(x) - cuCimag(y));
}
/* This implementation could suffer from intermediate overflow even though
* the final result would be in range. However, various implementations do
* not guard against this (presumably to avoid losing performance), so we
* don't do it either to stay competitive.
*/
__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComplex x,
cuDoubleComplex y)
{
cuDoubleComplex prod;
prod = make_cuDoubleComplex ((cuCreal(x) * cuCreal(y)) -
(cuCimag(x) * cuCimag(y)),
(cuCreal(x) * cuCimag(y)) +
(cuCimag(x) * cuCreal(y)));
return prod;
}
/* This implementation guards against intermediate underflow and overflow
* by scaling. Such guarded implementations are usually the default for
* complex library implementations, with some also offering an unguarded,
* faster version.
*/
__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComplex x,
cuDoubleComplex y)
{
cuDoubleComplex quot;
double s = (fabs(cuCreal(y))) + (fabs(cuCimag(y)));
double oos = 1.0 / s;
double ars = cuCreal(x) * oos;
double ais = cuCimag(x) * oos;
double brs = cuCreal(y) * oos;
double bis = cuCimag(y) * oos;
s = (brs * brs) + (bis * bis);
oos = 1.0 / s;
quot = make_cuDoubleComplex (((ars * brs) + (ais * bis)) * oos,
((ais * brs) - (ars * bis)) * oos);
return quot;
}
/* This implementation guards against intermediate underflow and overflow
* by scaling. Otherwise we would lose half the exponent range. There are
* various ways of doing guarded computation. For now chose the simplest
* and fastest solution, however this may suffer from inaccuracies if sqrt
* and division are not IEEE compliant.
*/
__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
{
double a = cuCreal(x);
double b = cuCimag(x);
double v, w, t;
a = fabs(a);
b = fabs(b);
if (a > b) {
v = a;
w = b;
} else {
v = b;
w = a;
}
t = w / v;
t = 1.0 + t * t;
t = v * sqrt(t);
if ((v == 0.0) ||
(v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {
t = v + w;
}
return t;
}
#if defined(__cplusplus)
}
#endif /* __cplusplus */
/* aliases */
typedef cuFloatComplex cuComplex;
__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
float y)
{
return make_cuFloatComplex (x, y);
}
/* float-to-double promotion */
__host__ __device__ static __inline__ cuDoubleComplex cuComplexFloatToDouble
(cuFloatComplex c)
{
return make_cuDoubleComplex ((double)cuCrealf(c), (double)cuCimagf(c));
}
__host__ __device__ static __inline__ cuFloatComplex cuComplexDoubleToFloat
(cuDoubleComplex c)
{
return make_cuFloatComplex ((float)cuCreal(c), (float)cuCimag(c));
}
__host__ __device__ static __inline__ cuComplex cuCfmaf( cuComplex x, cuComplex y, cuComplex d)
{
float real_res;
float imag_res;
real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d);
imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d);
real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res;
imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res;
return make_cuComplex(real_res, imag_res);
}
__host__ __device__ static __inline__ cuDoubleComplex cuCfma( cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)
{
double real_res;
double imag_res;
real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d);
imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d);
real_res = -(cuCimag(x) * cuCimag(y)) + real_res;
imag_res = (cuCimag(x) * cuCreal(y)) + imag_res;
return make_cuDoubleComplex(real_res, imag_res);
}
#endif /* !defined(CU_COMPLEX_H_) */

View File

@@ -1,565 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/*
* This is the public header file for the CUBLAS library, defining the API
*
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
* on top of the CUDA runtime.
*/
#if !defined(CUBLAS_H_)
#define CUBLAS_H_
#include <cuda_runtime.h>
#ifndef CUBLASWINAPI
#ifdef _WIN32
#define CUBLASWINAPI __stdcall
#else
#define CUBLASWINAPI
#endif
#endif
#undef CUBLASAPI
#ifdef __CUDACC__
#define CUBLASAPI __host__
#else
#define CUBLASAPI
#endif
#include "cublas_api.h"
#if defined(__cplusplus)
extern "C" {
#endif
/* CUBLAS data types */
#define cublasStatus cublasStatus_t
cublasStatus CUBLASWINAPI cublasInit (void);
cublasStatus CUBLASWINAPI cublasShutdown (void);
cublasStatus CUBLASWINAPI cublasGetError (void);
cublasStatus CUBLASWINAPI cublasGetVersion(int *version);
cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr);
cublasStatus CUBLASWINAPI cublasFree (void *devicePtr);
cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream);
/* ---------------- CUBLAS BLAS1 functions ---------------- */
/* NRM2 */
float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx);
double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx);
float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx);
double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* DOT */
float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y,
int incy);
double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y,
int incy);
cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y,
int incy);
cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y,
int incy);
cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
int incy);
cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
int incy);
/*------------------------------------------------------------------------*/
/* SCAL */
void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx);
void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx);
void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx);
void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx);
void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx);
void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* AXPY */
void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx,
float *y, int incy);
void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x,
int incx, double *y, int incy);
void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x,
int incx, cuComplex *y, int incy);
void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
int incx, cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* COPY */
void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y,
int incy);
void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y,
int incy);
void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
int incy);
void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
int incy);
/*------------------------------------------------------------------------*/
/* SWAP */
void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy);
void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy);
void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy);
void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* AMAX */
int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx);
int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx);
int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx);
int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* AMIN */
int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx);
int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx);
int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx);
int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* ASUM */
float CUBLASWINAPI cublasSasum (int n, const float *x, int incx);
double CUBLASWINAPI cublasDasum (int n, const double *x, int incx);
float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx);
double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* ROT */
void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy,
float sc, float ss);
void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy,
double sc, double ss);
void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y,
int incy, float c, cuComplex s);
void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx,
cuDoubleComplex *y, int incy, double sc,
cuDoubleComplex cs);
void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
int incy, float c, float s);
void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx,
cuDoubleComplex *y, int incy, double c, double s);
/*------------------------------------------------------------------------*/
/* ROTG */
void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss);
void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss);
void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
cuComplex *cs);
void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
cuDoubleComplex *cs);
/*------------------------------------------------------------------------*/
/* ROTM */
void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
const float* sparam);
void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
const double* sparam);
/*------------------------------------------------------------------------*/
/* ROTMG */
void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1,
const float *sy1, float* sparam);
void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1,
const double *sy1, double* sparam);
/* --------------- CUBLAS BLAS2 functions ---------------- */
/* GEMV */
void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
const float *A, int lda, const float *x, int incx,
float beta, float *y, int incy);
void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
const double *A, int lda, const double *x, int incx,
double beta, double *y, int incy);
void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *x, int incx,
cuComplex beta, cuComplex *y, int incy);
void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* GBMV */
void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku,
float alpha, const float *A, int lda,
const float *x, int incx, float beta, float *y,
int incy);
void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku,
double alpha, const double *A, int lda,
const double *x, int incx, double beta, double *y,
int incy);
void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *x, int incx, cuComplex beta, cuComplex *y,
int incy);
void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y,
int incy);
/*------------------------------------------------------------------------*/
/* TRMV */
void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n,
const float *A, int lda, float *x, int incx);
void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n,
const double *A, int lda, double *x, int incx);
void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n,
const cuComplex *A, int lda, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n,
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* TBMV */
void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k,
const float *A, int lda, float *x, int incx);
void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k,
const double *A, int lda, double *x, int incx);
void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k,
const cuComplex *A, int lda, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k,
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* TPMV */
void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx);
void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* TRSV */
void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx);
void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx);
void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda,
cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* TPSV */
void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP,
float *x, int incx);
void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* TBSV */
void CUBLASWINAPI cublasStbsv(char uplo, char trans,
char diag, int n, int k, const float *A,
int lda, float *x, int incx);
void CUBLASWINAPI cublasDtbsv(char uplo, char trans,
char diag, int n, int k, const double *A,
int lda, double *x, int incx);
void CUBLASWINAPI cublasCtbsv(char uplo, char trans,
char diag, int n, int k, const cuComplex *A,
int lda, cuComplex *x, int incx);
void CUBLASWINAPI cublasZtbsv(char uplo, char trans,
char diag, int n, int k, const cuDoubleComplex *A,
int lda, cuDoubleComplex *x, int incx);
/*------------------------------------------------------------------------*/
/* SYMV/HEMV */
void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
int lda, const float *x, int incx, float beta,
float *y, int incy);
void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
int lda, const double *x, int incx, double beta,
double *y, int incy);
void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
int lda, const cuComplex *x, int incx, cuComplex beta,
cuComplex *y, int incy);
void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta,
cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* SBMV/HBMV */
void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha,
const float *A, int lda, const float *x, int incx,
float beta, float *y, int incy);
void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha,
const double *A, int lda, const double *x, int incx,
double beta, double *y, int incy);
void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *x, int incx,
cuComplex beta, cuComplex *y, int incy);
void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* SPMV/HPMV */
void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
const float *AP, const float *x,
int incx, float beta, float *y, int incy);
void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
const double *AP, const double *x,
int incx, double beta, double *y, int incy);
void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
const cuComplex *AP, const cuComplex *x,
int incx, cuComplex beta, cuComplex *y, int incy);
void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
const cuDoubleComplex *AP, const cuDoubleComplex *x,
int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy);
/*------------------------------------------------------------------------*/
/* GER */
void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
const float *y, int incy, float *A, int lda);
void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
const double *y, int incy, double *A, int lda);
void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
int incx, const cuComplex *y, int incy,
cuComplex *A, int lda);
void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
int incx, const cuComplex *y, int incy,
cuComplex *A, int lda);
void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
int incx, const cuDoubleComplex *y, int incy,
cuDoubleComplex *A, int lda);
void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
int incx, const cuDoubleComplex *y, int incy,
cuDoubleComplex *A, int lda);
/*------------------------------------------------------------------------*/
/* SYR/HER */
void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
int incx, float *A, int lda);
void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
int incx, double *A, int lda);
void CUBLASWINAPI cublasCher (char uplo, int n, float alpha,
const cuComplex *x, int incx, cuComplex *A, int lda);
void CUBLASWINAPI cublasZher (char uplo, int n, double alpha,
const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda);
/*------------------------------------------------------------------------*/
/* SPR/HPR */
void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
int incx, float *AP);
void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
int incx, double *AP);
void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
int incx, cuComplex *AP);
void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
int incx, cuDoubleComplex *AP);
/*------------------------------------------------------------------------*/
/* SYR2/HER2 */
void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x,
int incx, const float *y, int incy, float *A,
int lda);
void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x,
int incx, const double *y, int incy, double *A,
int lda);
void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x,
int incx, const cuComplex *y, int incy, cuComplex *A,
int lda);
void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A,
int lda);
/*------------------------------------------------------------------------*/
/* SPR2/HPR2 */
void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x,
int incx, const float *y, int incy, float *AP);
void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
const double *x, int incx, const double *y,
int incy, double *AP);
void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
const cuComplex *x, int incx, const cuComplex *y,
int incy, cuComplex *AP);
void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
int incy, cuDoubleComplex *AP);
/* ------------------------BLAS3 Functions ------------------------------- */
/* GEMM */
void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k,
float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C,
int ldc);
void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
double alpha, const double *A, int lda,
const double *B, int ldb, double beta, double *C,
int ldc);
void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *B, int ldb, cuComplex beta,
cuComplex *C, int ldc);
void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
int k, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb,
cuDoubleComplex beta, cuDoubleComplex *C,
int ldc);
/* -------------------------------------------------------*/
/* SYRK */
void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha,
const float *A, int lda, float beta, float *C,
int ldc);
void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
double alpha, const double *A, int lda,
double beta, double *C, int ldc);
void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
cuComplex alpha, const cuComplex *A, int lda,
cuComplex beta, cuComplex *C, int ldc);
void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda,
cuDoubleComplex beta,
cuDoubleComplex *C, int ldc);
/* ------------------------------------------------------- */
/* HERK */
void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
float alpha, const cuComplex *A, int lda,
float beta, cuComplex *C, int ldc);
void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
double alpha,
const cuDoubleComplex *A, int lda,
double beta,
cuDoubleComplex *C, int ldc);
/* ------------------------------------------------------- */
/* SYR2K */
void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha,
const float *A, int lda, const float *B, int ldb,
float beta, float *C, int ldc);
void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
double alpha, const double *A, int lda,
const double *B, int ldb, double beta,
double *C, int ldc);
void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *B, int ldb, cuComplex beta,
cuComplex *C, int ldc);
void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
cuDoubleComplex *C, int ldc);
/* ------------------------------------------------------- */
/* HER2K */
void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *B, int ldb, float beta,
cuComplex *C, int ldc);
void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb, double beta,
cuDoubleComplex *C, int ldc);
/*------------------------------------------------------------------------*/
/* SYMM*/
void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha,
const float *A, int lda, const float *B, int ldb,
float beta, float *C, int ldc);
void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha,
const double *A, int lda, const double *B, int ldb,
double beta, double *C, int ldc);
void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha,
const cuComplex *A, int lda, const cuComplex *B, int ldb,
cuComplex beta, cuComplex *C, int ldc);
void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
/*------------------------------------------------------------------------*/
/* HEMM*/
void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
cuComplex alpha, const cuComplex *A, int lda,
const cuComplex *B, int ldb, cuComplex beta,
cuComplex *C, int ldc);
void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
cuDoubleComplex *C, int ldc);
/*------------------------------------------------------------------------*/
/* TRSM*/
void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
int m, int n, float alpha, const float *A, int lda,
float *B, int ldb);
void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
char diag, int m, int n, double alpha,
const double *A, int lda, double *B,
int ldb);
void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
int m, int n, cuComplex alpha, const cuComplex *A,
int lda, cuComplex *B, int ldb);
void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
char diag, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda,
cuDoubleComplex *B, int ldb);
/*------------------------------------------------------------------------*/
/* TRMM*/
void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
int m, int n, float alpha, const float *A, int lda,
float *B, int ldb);
void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
char diag, int m, int n, double alpha,
const double *A, int lda, double *B,
int ldb);
void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
int m, int n, cuComplex alpha, const cuComplex *A,
int lda, cuComplex *B, int ldb);
void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
char diag, int m, int n, cuDoubleComplex alpha,
const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
int ldb);
#if defined(__cplusplus)
}
#endif /* __cplusplus */
#endif /* !defined(CUBLAS_H_) */

File diff suppressed because it is too large Load Diff

View File

@@ -1,274 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
/*
* This is the public header file for the new CUBLAS library API, it mapped the generic
* Cublas name functions to the actual _v2 implementations.
*/
#if !defined(CUBLAS_V2_H_)
#define CUBLAS_V2_H_
#undef CUBLASAPI
#ifdef __CUDACC__
#define CUBLASAPI __host__ __device__
#else
#define CUBLASAPI
#endif
#include "cublas_api.h"
#define cublasCreate cublasCreate_v2
#define cublasDestroy cublasDestroy_v2
#define cublasGetVersion cublasGetVersion_v2
#define cublasSetStream cublasSetStream_v2
#define cublasGetStream cublasGetStream_v2
#define cublasGetPointerMode cublasGetPointerMode_v2
#define cublasSetPointerMode cublasSetPointerMode_v2
/* Blas3 Routines */
#define cublasSnrm2 cublasSnrm2_v2
#define cublasDnrm2 cublasDnrm2_v2
#define cublasScnrm2 cublasScnrm2_v2
#define cublasDznrm2 cublasDznrm2_v2
#define cublasSdot cublasSdot_v2
#define cublasDdot cublasDdot_v2
#define cublasCdotu cublasCdotu_v2
#define cublasCdotc cublasCdotc_v2
#define cublasZdotu cublasZdotu_v2
#define cublasZdotc cublasZdotc_v2
#define cublasSscal cublasSscal_v2
#define cublasDscal cublasDscal_v2
#define cublasCscal cublasCscal_v2
#define cublasCsscal cublasCsscal_v2
#define cublasZscal cublasZscal_v2
#define cublasZdscal cublasZdscal_v2
#define cublasSaxpy cublasSaxpy_v2
#define cublasDaxpy cublasDaxpy_v2
#define cublasCaxpy cublasCaxpy_v2
#define cublasZaxpy cublasZaxpy_v2
#define cublasScopy cublasScopy_v2
#define cublasDcopy cublasDcopy_v2
#define cublasCcopy cublasCcopy_v2
#define cublasZcopy cublasZcopy_v2
#define cublasSswap cublasSswap_v2
#define cublasDswap cublasDswap_v2
#define cublasCswap cublasCswap_v2
#define cublasZswap cublasZswap_v2
#define cublasIsamax cublasIsamax_v2
#define cublasIdamax cublasIdamax_v2
#define cublasIcamax cublasIcamax_v2
#define cublasIzamax cublasIzamax_v2
#define cublasIsamin cublasIsamin_v2
#define cublasIdamin cublasIdamin_v2
#define cublasIcamin cublasIcamin_v2
#define cublasIzamin cublasIzamin_v2
#define cublasSasum cublasSasum_v2
#define cublasDasum cublasDasum_v2
#define cublasScasum cublasScasum_v2
#define cublasDzasum cublasDzasum_v2
#define cublasSrot cublasSrot_v2
#define cublasDrot cublasDrot_v2
#define cublasCrot cublasCrot_v2
#define cublasCsrot cublasCsrot_v2
#define cublasZrot cublasZrot_v2
#define cublasZdrot cublasZdrot_v2
#define cublasSrotg cublasSrotg_v2
#define cublasDrotg cublasDrotg_v2
#define cublasCrotg cublasCrotg_v2
#define cublasZrotg cublasZrotg_v2
#define cublasSrotm cublasSrotm_v2
#define cublasDrotm cublasDrotm_v2
#define cublasSrotmg cublasSrotmg_v2
#define cublasDrotmg cublasDrotmg_v2
/* Blas2 Routines */
#define cublasSgemv cublasSgemv_v2
#define cublasDgemv cublasDgemv_v2
#define cublasCgemv cublasCgemv_v2
#define cublasZgemv cublasZgemv_v2
#define cublasSgbmv cublasSgbmv_v2
#define cublasDgbmv cublasDgbmv_v2
#define cublasCgbmv cublasCgbmv_v2
#define cublasZgbmv cublasZgbmv_v2
#define cublasStrmv cublasStrmv_v2
#define cublasDtrmv cublasDtrmv_v2
#define cublasCtrmv cublasCtrmv_v2
#define cublasZtrmv cublasZtrmv_v2
#define cublasStbmv cublasStbmv_v2
#define cublasDtbmv cublasDtbmv_v2
#define cublasCtbmv cublasCtbmv_v2
#define cublasZtbmv cublasZtbmv_v2
#define cublasStpmv cublasStpmv_v2
#define cublasDtpmv cublasDtpmv_v2
#define cublasCtpmv cublasCtpmv_v2
#define cublasZtpmv cublasZtpmv_v2
#define cublasStrsv cublasStrsv_v2
#define cublasDtrsv cublasDtrsv_v2
#define cublasCtrsv cublasCtrsv_v2
#define cublasZtrsv cublasZtrsv_v2
#define cublasStpsv cublasStpsv_v2
#define cublasDtpsv cublasDtpsv_v2
#define cublasCtpsv cublasCtpsv_v2
#define cublasZtpsv cublasZtpsv_v2
#define cublasStbsv cublasStbsv_v2
#define cublasDtbsv cublasDtbsv_v2
#define cublasCtbsv cublasCtbsv_v2
#define cublasZtbsv cublasZtbsv_v2
#define cublasSsymv cublasSsymv_v2
#define cublasDsymv cublasDsymv_v2
#define cublasCsymv cublasCsymv_v2
#define cublasZsymv cublasZsymv_v2
#define cublasChemv cublasChemv_v2
#define cublasZhemv cublasZhemv_v2
#define cublasSsbmv cublasSsbmv_v2
#define cublasDsbmv cublasDsbmv_v2
#define cublasChbmv cublasChbmv_v2
#define cublasZhbmv cublasZhbmv_v2
#define cublasSspmv cublasSspmv_v2
#define cublasDspmv cublasDspmv_v2
#define cublasChpmv cublasChpmv_v2
#define cublasZhpmv cublasZhpmv_v2
#define cublasSger cublasSger_v2
#define cublasDger cublasDger_v2
#define cublasCgeru cublasCgeru_v2
#define cublasCgerc cublasCgerc_v2
#define cublasZgeru cublasZgeru_v2
#define cublasZgerc cublasZgerc_v2
#define cublasSsyr cublasSsyr_v2
#define cublasDsyr cublasDsyr_v2
#define cublasCsyr cublasCsyr_v2
#define cublasZsyr cublasZsyr_v2
#define cublasCher cublasCher_v2
#define cublasZher cublasZher_v2
#define cublasSspr cublasSspr_v2
#define cublasDspr cublasDspr_v2
#define cublasChpr cublasChpr_v2
#define cublasZhpr cublasZhpr_v2
#define cublasSsyr2 cublasSsyr2_v2
#define cublasDsyr2 cublasDsyr2_v2
#define cublasCsyr2 cublasCsyr2_v2
#define cublasZsyr2 cublasZsyr2_v2
#define cublasCher2 cublasCher2_v2
#define cublasZher2 cublasZher2_v2
#define cublasSspr2 cublasSspr2_v2
#define cublasDspr2 cublasDspr2_v2
#define cublasChpr2 cublasChpr2_v2
#define cublasZhpr2 cublasZhpr2_v2
/* Blas3 Routines */
#define cublasSgemm cublasSgemm_v2
#define cublasDgemm cublasDgemm_v2
#define cublasCgemm cublasCgemm_v2
#define cublasZgemm cublasZgemm_v2
#define cublasSsyrk cublasSsyrk_v2
#define cublasDsyrk cublasDsyrk_v2
#define cublasCsyrk cublasCsyrk_v2
#define cublasZsyrk cublasZsyrk_v2
#define cublasCherk cublasCherk_v2
#define cublasZherk cublasZherk_v2
#define cublasSsyr2k cublasSsyr2k_v2
#define cublasDsyr2k cublasDsyr2k_v2
#define cublasCsyr2k cublasCsyr2k_v2
#define cublasZsyr2k cublasZsyr2k_v2
#define cublasCher2k cublasCher2k_v2
#define cublasZher2k cublasZher2k_v2
#define cublasSsymm cublasSsymm_v2
#define cublasDsymm cublasDsymm_v2
#define cublasCsymm cublasCsymm_v2
#define cublasZsymm cublasZsymm_v2
#define cublasChemm cublasChemm_v2
#define cublasZhemm cublasZhemm_v2
#define cublasStrsm cublasStrsm_v2
#define cublasDtrsm cublasDtrsm_v2
#define cublasCtrsm cublasCtrsm_v2
#define cublasZtrsm cublasZtrsm_v2
#define cublasStrmm cublasStrmm_v2
#define cublasDtrmm cublasDtrmm_v2
#define cublasCtrmm cublasCtrmm_v2
#define cublasZtrmm cublasZtrmm_v2
#endif /* !defined(CUBLAS_V2_H_) */

File diff suppressed because it is too large Load Diff

View File

@@ -1,248 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__CUDA_DEVICE_RUNTIME_API_H__)
#define __CUDA_DEVICE_RUNTIME_API_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#if !defined(__CUDACC_RTC__)
#if (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
#if defined(__cplusplus)
extern "C" {
#endif
struct cudaFuncAttributes;
#if defined(_WIN32)
#define __NV_WEAK__ __declspec(nv_weak)
#else
#define __NV_WEAK__ __attribute__((nv_weak))
#endif
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaMalloc(void **p, size_t s)
{
return cudaErrorUnknown;
}
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
{
return cudaErrorUnknown;
}
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
{
return cudaErrorUnknown;
}
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaGetDevice(int *device)
{
return cudaErrorUnknown;
}
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
{
return cudaErrorUnknown;
}
__device__ __NV_WEAK__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
{
return cudaErrorUnknown;
}
#undef __NV_WEAK__
#if defined(__cplusplus)
}
#endif
#endif /* (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
#endif /* !defined(__CUDACC_RTC__) */
#if defined(__cplusplus) && defined(__CUDACC__) /* Visible to nvcc front-end only */
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350) // Visible to SM>=3.5 and "__host__ __device__" only
#include "driver_types.h"
#include "host_defines.h"
extern "C"
{
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(void);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaPeekAtLastError(void);
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cudaError_t error);
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorName(cudaError_t error);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamDestroy(cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventDestroy(cudaEvent_t event);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
/**
* \ingroup CUDART_EXECUTION
* \brief Obtains a parameter buffer
*
* Obtains a parameter buffer which can be filled with parameters for a kernel launch.
* Parameters passed to ::cudaLaunchDevice must be allocated via this function.
*
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
* CUDA user code should use <<< >>> to launch kernels.
*
* \param alignment - Specifies alignment requirement of the parameter buffer
* \param size - Specifies size requirement in bytes
*
* \return
* Returns pointer to the allocated parameterBuffer
* \notefnerr
*
* \sa cudaLaunchDevice
*/
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBuffer(size_t alignment, size_t size);
/**
* \ingroup CUDART_EXECUTION
* \brief Launches a specified kernel
*
* Launches a specified kernel with the specified parameter buffer. A parameter buffer can be obtained
* by calling ::cudaGetParameterBuffer().
*
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
* CUDA user code should use <<< >>> to launch the kernels.
*
* \param func - Pointer to the kernel to be launched
* \param parameterBuffer - Holds the parameters to the launched kernel. parameterBuffer can be NULL. (Optional)
* \param gridDimension - Specifies grid dimensions
* \param blockDimension - Specifies block dimensions
* \param sharedMemSize - Specifies size of shared memory
* \param stream - Specifies the stream to be used
*
* \return
* ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorLaunchMaxDepthExceeded, ::cudaErrorInvalidConfiguration,
* ::cudaErrorStartupFailure, ::cudaErrorLaunchPendingCountExceeded, ::cudaErrorLaunchOutOfResources
* \notefnerr
* \n Please refer to Execution Configuration and Parameter Buffer Layout from the CUDA Programming
* Guide for the detailed descriptions of launch configuration and parameter layout respectively.
*
* \sa cudaGetParameterBuffer
*/
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) && defined(__CUDA_ARCH__)
// When compiling for the device and per thread default stream is enabled, add
// a static inline redirect to the per thread stream entry points.
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream)
{
return cudaLaunchDevice_ptsz(func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream);
}
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream)
{
return cudaLaunchDeviceV2_ptsz(parameterBuffer, stream);
}
#else
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);
#endif
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
extern __device__ __cudart_builtin__ unsigned long long CUDARTAPI cudaCGGetIntrinsicHandle(enum cudaCGScope scope);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGSynchronize(unsigned long long handle, unsigned int flags);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetSize(unsigned int *numThreads, unsigned int *numGrids, unsigned long long handle);
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaCGGetRank(unsigned int *threadRank, unsigned int *gridRank, unsigned long long handle);
}
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaMalloc(T **devPtr, size_t size);
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
#endif // !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)
#endif /* defined(__cplusplus) && defined(__CUDACC__) */
#endif /* !__CUDA_DEVICE_RUNTIME_API_H__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,69 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__DEVICE_TYPES_H__)
#define __DEVICE_TYPES_H__
#include "host_defines.h"
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
enum __device_builtin__ cudaRoundMode
{
cudaRoundNearest,
cudaRoundZero,
cudaRoundPosInf,
cudaRoundMinInf
};
#endif /* !__DEVICE_TYPES_H__ */

View File

@@ -1,145 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__DRIVER_FUNCTIONS_H__)
#define __DRIVER_FUNCTIONS_H__
#include "builtin_types.h"
#include "host_defines.h"
#include "driver_types.h"
/**
* \addtogroup CUDART_MEMORY
*
* @{
*/
/**
* \brief Returns a cudaPitchedPtr based on input parameters
*
* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
* \p p, \p xsz, and \p ysz.
*
* \param d - Pointer to allocated memory
* \param p - Pitch of allocated memory in bytes
* \param xsz - Logical width of allocation in elements
* \param ysz - Logical height of allocation in elements
*
* \return
* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
*
* \sa make_cudaExtent, make_cudaPos
*/
static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
{
struct cudaPitchedPtr s;
s.ptr = d;
s.pitch = p;
s.xsize = xsz;
s.ysize = ysz;
return s;
}
/**
* \brief Returns a cudaPos based on input parameters
*
* Returns a ::cudaPos based on the specified input parameters \p x,
* \p y, and \p z.
*
* \param x - X position
* \param y - Y position
* \param z - Z position
*
* \return
* ::cudaPos specified by \p x, \p y, and \p z
*
* \sa make_cudaExtent, make_cudaPitchedPtr
*/
static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
{
struct cudaPos p;
p.x = x;
p.y = y;
p.z = z;
return p;
}
/**
* \brief Returns a cudaExtent based on input parameters
*
* Returns a ::cudaExtent based on the specified input parameters \p w,
* \p h, and \p d.
*
* \param w - Width in elements when referring to array memory, in bytes when referring to linear memory
* \param h - Height in elements
* \param d - Depth in elements
*
* \return
* ::cudaExtent specified by \p w, \p h, and \p d
*
* \sa make_cudaPitchedPtr, make_cudaPos
*/
static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d)
{
struct cudaExtent e;
e.width = w;
e.height = h;
e.depth = d;
return e;
}
/** @} */ /* END CUDART_MEMORY */
#endif /* !__DRIVER_FUNCTIONS_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +0,0 @@
/*
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#include "crt/host_config.h"

View File

@@ -1,50 +0,0 @@
/*
* Copyright 1993-2017 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#include "crt/host_defines.h"

View File

@@ -1,80 +0,0 @@
/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__LIBRARY_TYPES_H__)
#define __LIBRARY_TYPES_H__
typedef enum cudaDataType_t
{
CUDA_R_16F= 2, /* real as a half */
CUDA_C_16F= 6, /* complex as a pair of half numbers */
CUDA_R_32F= 0, /* real as a float */
CUDA_C_32F= 4, /* complex as a pair of float numbers */
CUDA_R_64F= 1, /* real as a double */
CUDA_C_64F= 5, /* complex as a pair of double numbers */
CUDA_R_8I = 3, /* real as a signed char */
CUDA_C_8I = 7, /* complex as a pair of signed char numbers */
CUDA_R_8U = 8, /* real as a unsigned char */
CUDA_C_8U = 9, /* complex as a pair of unsigned char numbers */
CUDA_R_32I= 10, /* real as a signed int */
CUDA_C_32I= 11, /* complex as a pair of signed int numbers */
CUDA_R_32U= 12, /* real as a unsigned int */
CUDA_C_32U= 13 /* complex as a pair of unsigned int numbers */
} cudaDataType;
typedef enum libraryPropertyType_t
{
MAJOR_VERSION,
MINOR_VERSION,
PATCH_LEVEL
} libraryPropertyType;
#endif /* !__LIBRARY_TYPES_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,525 +0,0 @@
//
// NVIDIA_COPYRIGHT_BEGIN
//
// Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
// NVIDIA_COPYRIGHT_END
//
#ifndef __NVRTC_H__
#define __NVRTC_H__
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#include <stdlib.h>
/*************************************************************************//**
*
* \defgroup error Error Handling
*
* NVRTC defines the following enumeration type and function for API call
* error handling.
*
****************************************************************************/
/**
* \ingroup error
* \brief The enumerated type nvrtcResult defines API call result codes.
* NVRTC API functions return nvrtcResult to indicate the call
* result.
*/
typedef enum {
NVRTC_SUCCESS = 0,
NVRTC_ERROR_OUT_OF_MEMORY = 1,
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
NVRTC_ERROR_INVALID_INPUT = 3,
NVRTC_ERROR_INVALID_PROGRAM = 4,
NVRTC_ERROR_INVALID_OPTION = 5,
NVRTC_ERROR_COMPILATION = 6,
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
NVRTC_ERROR_INTERNAL_ERROR = 11
} nvrtcResult;
/**
* \ingroup error
* \brief nvrtcGetErrorString is a helper function that returns a string
* describing the given nvrtcResult code, e.g., NVRTC_SUCCESS to
* \c "NVRTC_SUCCESS".
* For unrecognized enumeration values, it returns
* \c "NVRTC_ERROR unknown".
*
* \param [in] result CUDA Runtime Compilation API result code.
* \return Message string for the given #nvrtcResult code.
*/
const char *nvrtcGetErrorString(nvrtcResult result);
/*************************************************************************//**
*
* \defgroup query General Information Query
*
* NVRTC defines the following function for general information query.
*
****************************************************************************/
/**
* \ingroup query
* \brief nvrtcVersion sets the output parameters \p major and \p minor
* with the CUDA Runtime Compilation version number.
*
* \param [out] major CUDA Runtime Compilation major version number.
* \param [out] minor CUDA Runtime Compilation minor version number.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
*
*/
nvrtcResult nvrtcVersion(int *major, int *minor);
/*************************************************************************//**
*
* \defgroup compilation Compilation
*
* NVRTC defines the following type and functions for actual compilation.
*
****************************************************************************/
/**
* \ingroup compilation
* \brief nvrtcProgram is the unit of compilation, and an opaque handle for
* a program.
*
* To compile a CUDA program string, an instance of nvrtcProgram must be
* created first with ::nvrtcCreateProgram, then compiled with
* ::nvrtcCompileProgram.
*/
typedef struct _nvrtcProgram *nvrtcProgram;
/**
* \ingroup compilation
* \brief nvrtcCreateProgram creates an instance of nvrtcProgram with the
* given input parameters, and sets the output parameter \p prog with
* it.
*
* \param [out] prog CUDA Runtime Compilation program.
* \param [in] src CUDA program source.
* \param [in] name CUDA program name.\n
* \p name can be \c NULL; \c "default_program" is
* used when \p name is \c NULL.
* \param [in] numHeaders Number of headers used.\n
* \p numHeaders must be greater than or equal to 0.
* \param [in] headers Sources of the headers.\n
* \p headers can be \c NULL when \p numHeaders is
* 0.
* \param [in] includeNames Name of each header by which they can be
* included in the CUDA program source.\n
* \p includeNames can be \c NULL when \p numHeaders
* is 0.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
* - \link #nvrtcResult NVRTC_ERROR_PROGRAM_CREATION_FAILURE \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcDestroyProgram
*/
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
const char *src,
const char *name,
int numHeaders,
const char * const *headers,
const char * const *includeNames);
/**
* \ingroup compilation
* \brief nvrtcDestroyProgram destroys the given program.
*
* \param [in] prog CUDA Runtime Compilation program.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcCreateProgram
*/
nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog);
/**
* \ingroup compilation
* \brief nvrtcCompileProgram compiles the given program.
*
* It supports compile options listed in \ref options.
*/
nvrtcResult nvrtcCompileProgram(nvrtcProgram prog,
int numOptions, const char * const *options);
/**
* \ingroup compilation
* \brief nvrtcGetPTXSize sets \p ptxSizeRet with the size of the PTX
* generated by the previous compilation of \p prog (including the
* trailing \c NULL).
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [out] ptxSizeRet Size of the generated PTX (including the trailing
* \c NULL).
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetPTX
*/
nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
/**
* \ingroup compilation
* \brief nvrtcGetPTX stores the PTX generated by the previous compilation
* of \p prog in the memory pointed by \p ptx.
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [out] ptx Compiled result.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetPTXSize
*/
nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
/**
* \ingroup compilation
* \brief nvrtcGetProgramLogSize sets \p logSizeRet with the size of the
* log generated by the previous compilation of \p prog (including the
* trailing \c NULL).
*
* Note that compilation log may be generated with warnings and informative
* messages, even when the compilation of \p prog succeeds.
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [out] logSizeRet Size of the compilation log
* (including the trailing \c NULL).
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetProgramLog
*/
nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
/**
* \ingroup compilation
* \brief nvrtcGetProgramLog stores the log generated by the previous
* compilation of \p prog in the memory pointed by \p log.
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [out] log Compilation log.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
*
* \see ::nvrtcGetProgramLogSize
*/
nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
/**
* \ingroup compilation
* \brief nvrtcAddNameExpression notes the given name expression
* denoting a __global__ function or function template
* instantiation.
*
* The identical name expression string must be provided on a subsequent
* call to nvrtcGetLoweredName to extract the lowered name.
* \param [in] prog CUDA Runtime Compilation program.
* \param [in] name_expression constant expression denoting a __global__
* function or function template instantiation.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION \endlink
*
* \see ::nvrtcGetLoweredName
*/
nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog,
const char * const name_expression);
/**
* \ingroup compilation
* \brief nvrtcGetLoweredName extracts the lowered (mangled) name
* for a __global__ function or function template instantiation,
* and updates *lowered_name to point to it. The memory containing
* the name is released when the NVRTC program is destroyed by
* nvrtcDestroyProgram.
* The identical name expression must have been previously
* provided to nvrtcAddNameExpression.
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [in] name_expression constant expression denoting a __global__
* function or function template instantiation.
* \param [out] lowered_name initialized by the function to point to a
* C string containing the lowered (mangled)
* name corresponding to the provided name expression.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION \endlink
* - \link #nvrtcResult NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID \endlink
*
* \see ::nvrtcAddNameExpression
*/
nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog,
const char *const name_expression,
const char** lowered_name);
/**
* \defgroup options Supported Compile Options
*
* NVRTC supports the compile options below.
* Option names with two preceding dashs (\c --) are long option names and
* option names with one preceding dash (\c -) are short option names.
* Short option names can be used instead of long option names.
* When a compile option takes an argument, an assignment operator (\c =)
* is used to separate the compile option argument from the compile option
* name, e.g., \c "--gpu-architecture=compute_30".
* Alternatively, the compile option name and the argument can be specified in
* separate strings without an assignment operator, .e.g,
* \c "--gpu-architecture" \c "compute_30".
* Single-character short option names, such as \c -D, \c -U, and \c -I, do
* not require an assignment operator, and the compile option name and the
* argument can be present in the same string with or without spaces between
* them.
* For instance, \c "-D=<def>", \c "-D<def>", and \c "-D <def>" are all
* supported.
*
* The valid compiler options are:
*
* - Compilation targets
* - \c --gpu-architecture=\<arch\> (\c -arch)\n
* Specify the name of the class of GPU architectures for which the
* input must be compiled.\n
* - Valid <c>\<arch\></c>s:
* - \c compute_30
* - \c compute_32
* - \c compute_35
* - \c compute_37
* - \c compute_50
* - \c compute_52
* - \c compute_53
* - \c compute_60
* - \c compute_61
* - \c compute_62
* - \c compute_70
* - \c compute_72
* - Default: \c compute_30
* - Separate compilation / whole-program compilation
* - \c --device-c (\c -dc)\n
* Generate relocatable code that can be linked with other relocatable
* device code. It is equivalent to --relocatable-device-code=true.
* - \c --device-w (\c -dw)\n
* Generate non-relocatable code. It is equivalent to
* \c --relocatable-device-code=false.
* - \c --relocatable-device-code={true|false} (\c -rdc)\n
* Enable (disable) the generation of relocatable device code.
* - Default: \c false
* - Debugging support
* - \c --device-debug (\c -G)\n
* Generate debug information.
* - \c --generate-line-info (\c -lineinfo)\n
* Generate line-number information.
* - Code generation
* - \c --maxrregcount=\<N\> (\c -maxrregcount)\n
* Specify the maximum amount of registers that GPU functions can use.
* Until a function-specific limit, a higher value will generally
* increase the performance of individual GPU threads that execute this
* function. However, because thread registers are allocated from a
* global register pool on each GPU, a higher value of this option will
* also reduce the maximum thread block size, thereby reducing the amount
* of thread parallelism. Hence, a good maxrregcount value is the result
* of a trade-off. If this option is not specified, then no maximum is
* assumed. Value less than the minimum registers required by ABI will
* be bumped up by the compiler to ABI minimum limit.
* - \c --ftz={true|false} (\c -ftz)\n
* When performing single-precision floating-point operations, flush
* denormal values to zero or preserve denormal values.
* \c --use_fast_math implies \c --ftz=true.
* - Default: \c false
* - \c --prec-sqrt={true|false} (\c -prec-sqrt)\n
* For single-precision floating-point square root, use IEEE
* round-to-nearest mode or use a faster approximation.
* \c --use_fast_math implies \c --prec-sqrt=false.
* - Default: \c true
* - \c --prec-div={true|false} (\c -prec-div)\n
* For single-precision floating-point division and reciprocals, use IEEE
* round-to-nearest mode or use a faster approximation.
* \c --use_fast_math implies \c --prec-div=false.
* - Default: \c true
* - \c --fmad={true|false} (\c -fmad)\n
* Enables (disables) the contraction of floating-point multiplies and
* adds/subtracts into floating-point multiply-add operations (FMAD,
* FFMA, or DFMA). \c --use_fast_math implies \c --fmad=true.
* - Default: \c true
* - \c --use_fast_math (\c -use_fast_math)\n
* Make use of fast math operations.
* \c --use_fast_math implies \c --ftz=true \c --prec-div=false
* \c --prec-sqrt=false \c --fmad=true.
* - Preprocessing
* - \c --define-macro=\<def\> (\c -D)\n
* \c \<def\> can be either \c \<name\> or \c \<name=definitions\>.
* - \c \<name\> \n
* Predefine \c \<name\> as a macro with definition \c 1.
* - \c \<name\>=\<definition\> \n
* The contents of \c \<definition\> are tokenized and preprocessed
* as if they appeared during translation phase three in a \c \#define
* directive. In particular, the definition will be truncated by
* embedded new line characters.
* - \c --undefine-macro=\<def\> (\c -U)\n
* Cancel any previous definition of \c \<def\>.
* - \c --include-path=\<dir\> (\c -I)\n
* Add the directory \c \<dir\> to the list of directories to be
* searched for headers. These paths are searched after the list of
* headers given to ::nvrtcCreateProgram.
* - \c --pre-include=\<header\> (\c -include)\n
* Preinclude \c \<header\> during preprocessing.
* - Language Dialect
* - \c --std={c++11|c++14} (\c -std={c++11|c++14})\n
* Set language dialect to C++11 or C++14.
* - \c --builtin-move-forward={true|false} (\c -builtin-move-forward)\n
* Provide builtin definitions of \c std::move and \c std::forward,
* when C++11 language dialect is selected.
* - Default: \c true
* - \c --builtin-initializer-list={true|false}
* (\c -builtin-initializer-list)\n
* Provide builtin definitions of \c std::initializer_list class and
* member functions when C++11 language dialect is selected.
* - Default: \c true
* - Misc.
* - \c --disable-warnings (\c -w)\n
* Inhibit all warning messages.
* - \c --restrict (\c -restrict)\n
* Programmer assertion that all kernel pointer parameters are restrict
* pointers.
* - \c --device-as-default-execution-space
* (\c -default-device)\n
* Treat entities with no execution space annotation as \c __device__
* entities.
*
* \param [in] prog CUDA Runtime Compilation program.
* \param [in] numOptions Number of compiler options passed.
* \param [in] options Compiler options in the form of C string array.\n
* \p options can be \c NULL when \p numOptions is 0.
*
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_OUT_OF_MEMORY \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \endlink
* - \link #nvrtcResult NVRTC_ERROR_INVALID_OPTION \endlink
* - \link #nvrtcResult NVRTC_ERROR_COMPILATION \endlink
* - \link #nvrtcResult NVRTC_ERROR_BUILTIN_OPERATION_FAILURE \endlink
*/
#ifdef __cplusplus
}
#endif /* __cplusplus */
/* The utility function 'nvrtcGetTypeName' is not available by default. Define
the macro 'NVRTC_GET_TYPE_NAME' to a non-zero value to make it available.
*/
#if NVRTC_GET_TYPE_NAME || __DOXYGEN_ONLY__
#if NVRTC_USE_CXXABI || __clang__ || __GNUC__ || __DOXYGEN_ONLY__
#include <cxxabi.h>
#include <cstdlib>
#elif defined(_WIN32)
#include <Windows.h>
#include <DbgHelp.h>
#endif /* NVRTC_USE_CXXABI || __clang__ || __GNUC__ */
#include <string>
#include <typeinfo>
/*************************************************************************//**
*
* \defgroup hosthelper Host Helper
*
* NVRTC defines the following functions for easier interaction with host code.
*
****************************************************************************/
/**
* \ingroup hosthelper
* \brief nvrtcGetTypeName stores the source level name of the template type argument
* T in the given std::string location.
*
* This function is only provided when the macro NVRTC_GET_TYPE_NAME is
* defined with a non-zero value. It uses abi::__cxa_demangle or UnDecorateSymbolName
* function calls to extract the type name, when using gcc/clang or cl.exe compilers,
* respectively. If the name extraction fails, it will return NVRTC_INTERNAL_ERROR,
* otherwise *result is initialized with the extracted name.
*
* \param [in] result: pointer to std::string in which to store the type name.
* \return
* - \link #nvrtcResult NVRTC_SUCCESS \endlink
* - \link #nvrtcResult NVRTC_ERROR_INTERNAL_ERROR \endlink
*
*/
template <typename T>
nvrtcResult nvrtcGetTypeName(std::string *result)
{
const char *name = typeid(T).name();
#if USE_CXXABI || __clang__ || __GNUC__
int status;
char *undecorated_name = abi::__cxa_demangle(name, 0, 0, &status);
if (status == 0) {
*result = undecorated_name;
free(undecorated_name);
return NVRTC_SUCCESS;
}
#elif defined(_WIN32)
char undecorated_name[4096];
if(UnDecorateSymbolName(name, undecorated_name,
sizeof(undecorated_name) / sizeof(*undecorated_name),
UNDNAME_COMPLETE) ) {
*result = undecorated_name;
return NVRTC_SUCCESS;
}
#endif /* USE_CXXABI || __clang__ || __GNUC__ */
return NVRTC_ERROR_INTERNAL_ERROR;
}
#endif /* NVRTC_GET_TYPE_NAME */
#endif /* __NVRTC_H__ */

View File

@@ -1,119 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__SURFACE_TYPES_H__)
#define __SURFACE_TYPES_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "driver_types.h"
/**
* \addtogroup CUDART_TYPES
*
* @{
*/
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#define cudaSurfaceType1D 0x01
#define cudaSurfaceType2D 0x02
#define cudaSurfaceType3D 0x03
#define cudaSurfaceTypeCubemap 0x0C
#define cudaSurfaceType1DLayered 0xF1
#define cudaSurfaceType2DLayered 0xF2
#define cudaSurfaceTypeCubemapLayered 0xFC
/**
* CUDA Surface boundary modes
*/
enum __device_builtin__ cudaSurfaceBoundaryMode
{
cudaBoundaryModeZero = 0, /**< Zero boundary mode */
cudaBoundaryModeClamp = 1, /**< Clamp boundary mode */
cudaBoundaryModeTrap = 2 /**< Trap boundary mode */
};
/**
* CUDA Surface format modes
*/
enum __device_builtin__ cudaSurfaceFormatMode
{
cudaFormatModeForced = 0, /**< Forced format mode */
cudaFormatModeAuto = 1 /**< Auto format mode */
};
/**
* CUDA Surface reference
*/
struct __device_builtin__ surfaceReference
{
/**
* Channel descriptor for surface reference
*/
struct cudaChannelFormatDesc channelDesc;
};
/**
* An opaque value that represents a CUDA Surface object
*/
typedef __device_builtin__ unsigned long long cudaSurfaceObject_t;
/** @} */
/** @} */ /* END CUDART_TYPES */
#endif /* !__SURFACE_TYPES_H__ */

View File

@@ -1,217 +0,0 @@
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__TEXTURE_TYPES_H__)
#define __TEXTURE_TYPES_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "driver_types.h"
/**
* \addtogroup CUDART_TYPES
*
* @{
*/
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#define cudaTextureType1D 0x01
#define cudaTextureType2D 0x02
#define cudaTextureType3D 0x03
#define cudaTextureTypeCubemap 0x0C
#define cudaTextureType1DLayered 0xF1
#define cudaTextureType2DLayered 0xF2
#define cudaTextureTypeCubemapLayered 0xFC
/**
* CUDA texture address modes
*/
enum __device_builtin__ cudaTextureAddressMode
{
cudaAddressModeWrap = 0, /**< Wrapping address mode */
cudaAddressModeClamp = 1, /**< Clamp to edge address mode */
cudaAddressModeMirror = 2, /**< Mirror address mode */
cudaAddressModeBorder = 3 /**< Border address mode */
};
/**
* CUDA texture filter modes
*/
enum __device_builtin__ cudaTextureFilterMode
{
cudaFilterModePoint = 0, /**< Point filter mode */
cudaFilterModeLinear = 1 /**< Linear filter mode */
};
/**
* CUDA texture read modes
*/
enum __device_builtin__ cudaTextureReadMode
{
cudaReadModeElementType = 0, /**< Read texture as specified element type */
cudaReadModeNormalizedFloat = 1 /**< Read texture as normalized float */
};
/**
* CUDA texture reference
*/
struct __device_builtin__ textureReference
{
/**
* Indicates whether texture reads are normalized or not
*/
int normalized;
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode;
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3];
/**
* Channel descriptor for the texture reference
*/
struct cudaChannelFormatDesc channelDesc;
/**
* Perform sRGB->linear conversion during texture read
*/
int sRGB;
/**
* Limit to the anisotropy ratio
*/
unsigned int maxAnisotropy;
/**
* Mipmap filter mode
*/
enum cudaTextureFilterMode mipmapFilterMode;
/**
* Offset applied to the supplied mipmap level
*/
float mipmapLevelBias;
/**
* Lower end of the mipmap level range to clamp access to
*/
float minMipmapLevelClamp;
/**
* Upper end of the mipmap level range to clamp access to
*/
float maxMipmapLevelClamp;
int __cudaReserved[15];
};
/**
* CUDA texture descriptor
*/
struct __device_builtin__ cudaTextureDesc
{
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3];
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode;
/**
* Texture read mode
*/
enum cudaTextureReadMode readMode;
/**
* Perform sRGB->linear conversion during texture read
*/
int sRGB;
/**
* Texture Border Color
*/
float borderColor[4];
/**
* Indicates whether texture reads are normalized or not
*/
int normalizedCoords;
/**
* Limit to the anisotropy ratio
*/
unsigned int maxAnisotropy;
/**
* Mipmap filter mode
*/
enum cudaTextureFilterMode mipmapFilterMode;
/**
* Offset applied to the supplied mipmap level
*/
float mipmapLevelBias;
/**
* Lower end of the mipmap level range to clamp access to
*/
float minMipmapLevelClamp;
/**
* Upper end of the mipmap level range to clamp access to
*/
float maxMipmapLevelClamp;
};
/**
* An opaque value that represents a CUDA texture object
*/
typedef __device_builtin__ unsigned long long cudaTextureObject_t;
/** @} */
/** @} */ /* END CUDART_TYPES */
#endif /* !__TEXTURE_TYPES_H__ */

View File

@@ -1,177 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__VECTOR_FUNCTIONS_H__)
#define __VECTOR_FUNCTIONS_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "builtin_types.h"
#include "host_defines.h"
#include "vector_types.h"
#if defined(__CUDACC_RTC__)
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
#else /* !__CUDACC_RTC__ */
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
#endif /* __CUDACC_RTC__ */
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x);
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x);
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y);
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y);
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z);
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z);
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w);
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w);
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x);
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x);
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y);
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y);
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z);
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z);
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w);
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x);
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x);
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y);
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y);
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z);
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z);
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w);
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x);
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x);
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y);
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y);
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z);
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z);
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w);
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w);
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x);
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y);
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z);
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w);
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x);
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x);
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y);
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y);
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z);
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z);
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w);
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w);
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x);
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y);
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z);
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w);
#undef __VECTOR_FUNCTIONS_DECL__
#if !defined(__CUDACC_RTC__)
#include "vector_functions.hpp"
#endif /* !__CUDACC_RTC__ */
#endif /* !__VECTOR_FUNCTIONS_H__ */

View File

@@ -1,318 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__VECTOR_FUNCTIONS_HPP__)
#define __VECTOR_FUNCTIONS_HPP__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "builtin_types.h"
#include "host_defines.h"
#include "vector_types.h"
#if defined(__CUDACC_RTC__)
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
#else /* !__CUDACC_RTC__ */
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
#endif /* __CUDACC_RTC__ */
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x)
{
char1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x)
{
uchar1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y)
{
char2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y)
{
uchar2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z)
{
char3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z)
{
uchar3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w)
{
char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w)
{
uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x)
{
short1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x)
{
ushort1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y)
{
short2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y)
{
ushort2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z)
{
short3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z)
{
ushort3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w)
{
short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
{
ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x)
{
int1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x)
{
uint1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y)
{
int2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y)
{
uint2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z)
{
int3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z)
{
uint3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w)
{
int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
{
uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x)
{
long1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x)
{
ulong1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y)
{
long2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y)
{
ulong2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z)
{
long3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z)
{
ulong3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w)
{
long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w)
{
ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x)
{
float1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y)
{
float2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z)
{
float3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w)
{
float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x)
{
longlong1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x)
{
ulonglong1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y)
{
longlong2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y)
{
ulonglong2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z)
{
longlong3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z)
{
ulonglong3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w)
{
longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w)
{
ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x)
{
double1 t; t.x = x; return t;
}
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y)
{
double2 t; t.x = x; t.y = y; return t;
}
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z)
{
double3 t; t.x = x; t.y = y; t.z = z; return t;
}
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w)
{
double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
#undef __VECTOR_FUNCTIONS_DECL__
#endif /* !__VECTOR_FUNCTIONS_HPP__ */

View File

@@ -1,425 +0,0 @@
/*
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#if !defined(__VECTOR_TYPES_H__)
#define __VECTOR_TYPES_H__
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "host_defines.h"
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) && \
defined(_WIN32) && !defined(_WIN64)
#pragma warning(push)
#pragma warning(disable: 4201 4408)
#define __cuda_builtin_vector_align8(tag, members) \
struct __device_builtin__ tag \
{ \
union \
{ \
struct { members }; \
struct { long long int :1,:0; }; \
}; \
}
#else /* !__CUDACC__ && !__CUDACC_RTC__ && _WIN32 && !_WIN64 */
#define __cuda_builtin_vector_align8(tag, members) \
struct __device_builtin__ __align__(8) tag \
{ \
members \
}
#endif /* !__CUDACC__ && !__CUDACC_RTC__ && _WIN32 && !_WIN64 */
struct __device_builtin__ char1
{
signed char x;
};
struct __device_builtin__ uchar1
{
unsigned char x;
};
struct __device_builtin__ __align__(2) char2
{
signed char x, y;
};
struct __device_builtin__ __align__(2) uchar2
{
unsigned char x, y;
};
struct __device_builtin__ char3
{
signed char x, y, z;
};
struct __device_builtin__ uchar3
{
unsigned char x, y, z;
};
struct __device_builtin__ __align__(4) char4
{
signed char x, y, z, w;
};
struct __device_builtin__ __align__(4) uchar4
{
unsigned char x, y, z, w;
};
struct __device_builtin__ short1
{
short x;
};
struct __device_builtin__ ushort1
{
unsigned short x;
};
struct __device_builtin__ __align__(4) short2
{
short x, y;
};
struct __device_builtin__ __align__(4) ushort2
{
unsigned short x, y;
};
struct __device_builtin__ short3
{
short x, y, z;
};
struct __device_builtin__ ushort3
{
unsigned short x, y, z;
};
__cuda_builtin_vector_align8(short4, short x; short y; short z; short w;);
__cuda_builtin_vector_align8(ushort4, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
struct __device_builtin__ int1
{
int x;
};
struct __device_builtin__ uint1
{
unsigned int x;
};
__cuda_builtin_vector_align8(int2, int x; int y;);
__cuda_builtin_vector_align8(uint2, unsigned int x; unsigned int y;);
struct __device_builtin__ int3
{
int x, y, z;
};
struct __device_builtin__ uint3
{
unsigned int x, y, z;
};
struct __device_builtin__ __builtin_align__(16) int4
{
int x, y, z, w;
};
struct __device_builtin__ __builtin_align__(16) uint4
{
unsigned int x, y, z, w;
};
struct __device_builtin__ long1
{
long int x;
};
struct __device_builtin__ ulong1
{
unsigned long x;
};
#if defined(_WIN32)
__cuda_builtin_vector_align8(long2, long int x; long int y;);
__cuda_builtin_vector_align8(ulong2, unsigned long int x; unsigned long int y;);
#else /* !_WIN32 */
struct __device_builtin__ __align__(2*sizeof(long int)) long2
{
long int x, y;
};
struct __device_builtin__ __align__(2*sizeof(unsigned long int)) ulong2
{
unsigned long int x, y;
};
#endif /* _WIN32 */
struct __device_builtin__ long3
{
long int x, y, z;
};
struct __device_builtin__ ulong3
{
unsigned long int x, y, z;
};
struct __device_builtin__ __builtin_align__(16) long4
{
long int x, y, z, w;
};
struct __device_builtin__ __builtin_align__(16) ulong4
{
unsigned long int x, y, z, w;
};
struct __device_builtin__ float1
{
float x;
};
#if !defined(__CUDACC__) && defined(__arm__) && \
defined(__ARM_PCS_VFP) && __GNUC__ == 4 && __GNUC_MINOR__ == 6
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-pedantic"
struct __device_builtin__ __attribute__((aligned(8))) float2
{
float x; float y; float __cuda_gnu_arm_ice_workaround[0];
};
#pragma GCC poison __cuda_gnu_arm_ice_workaround
#pragma GCC diagnostic pop
#else /* !__CUDACC__ && __arm__ && __ARM_PCS_VFP &&
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
__cuda_builtin_vector_align8(float2, float x; float y;);
#endif /* !__CUDACC__ && __arm__ && __ARM_PCS_VFP &&
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
struct __device_builtin__ float3
{
float x, y, z;
};
struct __device_builtin__ __builtin_align__(16) float4
{
float x, y, z, w;
};
struct __device_builtin__ longlong1
{
long long int x;
};
struct __device_builtin__ ulonglong1
{
unsigned long long int x;
};
struct __device_builtin__ __builtin_align__(16) longlong2
{
long long int x, y;
};
struct __device_builtin__ __builtin_align__(16) ulonglong2
{
unsigned long long int x, y;
};
struct __device_builtin__ longlong3
{
long long int x, y, z;
};
struct __device_builtin__ ulonglong3
{
unsigned long long int x, y, z;
};
struct __device_builtin__ __builtin_align__(16) longlong4
{
long long int x, y, z ,w;
};
struct __device_builtin__ __builtin_align__(16) ulonglong4
{
unsigned long long int x, y, z, w;
};
struct __device_builtin__ double1
{
double x;
};
struct __device_builtin__ __builtin_align__(16) double2
{
double x, y;
};
struct __device_builtin__ double3
{
double x, y, z;
};
struct __device_builtin__ __builtin_align__(16) double4
{
double x, y, z, w;
};
#if !defined(__CUDACC__) && defined(_WIN32) && !defined(_WIN64)
#pragma warning(pop)
#endif /* !__CUDACC__ && _WIN32 && !_WIN64 */
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
typedef __device_builtin__ struct char1 char1;
typedef __device_builtin__ struct uchar1 uchar1;
typedef __device_builtin__ struct char2 char2;
typedef __device_builtin__ struct uchar2 uchar2;
typedef __device_builtin__ struct char3 char3;
typedef __device_builtin__ struct uchar3 uchar3;
typedef __device_builtin__ struct char4 char4;
typedef __device_builtin__ struct uchar4 uchar4;
typedef __device_builtin__ struct short1 short1;
typedef __device_builtin__ struct ushort1 ushort1;
typedef __device_builtin__ struct short2 short2;
typedef __device_builtin__ struct ushort2 ushort2;
typedef __device_builtin__ struct short3 short3;
typedef __device_builtin__ struct ushort3 ushort3;
typedef __device_builtin__ struct short4 short4;
typedef __device_builtin__ struct ushort4 ushort4;
typedef __device_builtin__ struct int1 int1;
typedef __device_builtin__ struct uint1 uint1;
typedef __device_builtin__ struct int2 int2;
typedef __device_builtin__ struct uint2 uint2;
typedef __device_builtin__ struct int3 int3;
typedef __device_builtin__ struct uint3 uint3;
typedef __device_builtin__ struct int4 int4;
typedef __device_builtin__ struct uint4 uint4;
typedef __device_builtin__ struct long1 long1;
typedef __device_builtin__ struct ulong1 ulong1;
typedef __device_builtin__ struct long2 long2;
typedef __device_builtin__ struct ulong2 ulong2;
typedef __device_builtin__ struct long3 long3;
typedef __device_builtin__ struct ulong3 ulong3;
typedef __device_builtin__ struct long4 long4;
typedef __device_builtin__ struct ulong4 ulong4;
typedef __device_builtin__ struct float1 float1;
typedef __device_builtin__ struct float2 float2;
typedef __device_builtin__ struct float3 float3;
typedef __device_builtin__ struct float4 float4;
typedef __device_builtin__ struct longlong1 longlong1;
typedef __device_builtin__ struct ulonglong1 ulonglong1;
typedef __device_builtin__ struct longlong2 longlong2;
typedef __device_builtin__ struct ulonglong2 ulonglong2;
typedef __device_builtin__ struct longlong3 longlong3;
typedef __device_builtin__ struct ulonglong3 ulonglong3;
typedef __device_builtin__ struct longlong4 longlong4;
typedef __device_builtin__ struct ulonglong4 ulonglong4;
typedef __device_builtin__ struct double1 double1;
typedef __device_builtin__ struct double2 double2;
typedef __device_builtin__ struct double3 double3;
typedef __device_builtin__ struct double4 double4;
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
struct __device_builtin__ dim3
{
unsigned int x, y, z;
#if defined(__cplusplus)
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
#endif /* __cplusplus */
};
typedef __device_builtin__ struct dim3 dim3;
#undef __cuda_builtin_vector_align8
#endif /* !__VECTOR_TYPES_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,148 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_RUNTIME_PREDICT_H_
#define ISAAC_RUNTIME_PREDICT_H_
#include <fstream>
#include <vector>
#include <memory>
#include <iostream>
#include <cstring>
#include <algorithm>
#include "isaac/tools/matrix.hpp"
#include "isaac/driver/device.h"
#include "isaac/templates/common.hpp"
#include "isaac/templates/pool.h"
#include "isaac/templates/conv.h"
#include "isaac/templates/gemm.h"
#include <map>
namespace isaac{
namespace runtime{
// Layers
class Layer{
public:
static Layer* read(u_char*& current);
virtual void forward(matrix<float> const & X, matrix<float> & Y) = 0;
virtual size_t n_outs(size_t n_outs_prev) = 0;
};
class Activation: public Layer{
public:
static const int BINARY_CODE = 0;
size_t n_outs(size_t n_outs_prev);
private:
};
class ReLU: public Activation{
public:
static const int BINARY_CODE = 0;
void forward(matrix<float> const & X, matrix<float> & Y);
};
class Linear: public Activation{
public:
static const int BINARY_CODE = 1;
void forward(matrix<float> const & X, matrix<float> & Y);
};
// Dense
class Dense: public Layer{
public:
static const int BINARY_CODE = 1;
Dense(u_char*& data);
size_t n_outs(size_t);
void forward(matrix<float> const & X, matrix<float> & Y);
private:
matrix<float> W_;
std::vector<float> b_;
};
// Network
class Network{
public:
Network(u_char* data);
void predict(const matrix<float>& X, matrix<float>& Y);
private:
std::vector<std::shared_ptr<Layer>> layers_;
};
enum OperationType{
GEMM,
CONV,
POOL
};
//Profile
class Profile{
protected:
typedef void (&validator_t)(driver::Device const &, size_t, param_t*, uint8_t*);
typedef std::function<double(std::vector<param_t> const&)> benchmark_t;
public:
Profile(u_char* data, size_t nshapes);
std::vector<param_t> predict(driver::Device const & device, std::vector<param_t> const & shapes, validator_t const & validator, benchmark_t const & benchmark, size_t num_re_evaluate);
matrix<param_t> const & kernels() const;
private:
matrix<param_t> kernels_;
driver::Device device_;
Network predictor_;
};
class ConvProfile: public Profile{
public:
ConvProfile(u_char* data);
templates::Conv predict(driver::Stream& stream, DType in_dtype, DType out_dtype, param_t C, param_t D, param_t H, param_t W, param_t N, param_t K, param_t M, param_t P, param_t Q, param_t T, param_t R, param_t S,
param_t pad_d, param_t pad_h, param_t pad_w,
param_t stride_d, param_t stride_h, param_t stride_w,
param_t upsample_d, param_t upsample_h, param_t upsample_w,
ActivationType activation, size_t num_outputs,
ResidualType residual, param_t Zk, param_t crop_z_m0, param_t crop_z_m1, param_t crop_z_p0, param_t crop_z_p1, param_t crop_z_q0, param_t crop_z_q1, size_t num_re_evaluate = 1);
};
class PoolProfile: public Profile{
public:
PoolProfile(u_char* data);
templates::Pool predict(driver::Stream& stream, DType in_dtype, DType out_dtype, PoolType pool_type, param_t C, param_t D, param_t H, param_t W, param_t N, param_t M, param_t P, param_t Q, param_t T, param_t R, param_t S,
param_t pad_d, param_t pad_h, param_t pad_w, param_t stride_d, param_t stride_h, param_t stride_w, size_t num_re_evaluate = 1);
};
class GEMMProfile: public Profile{
public:
GEMMProfile(u_char* data);
templates::GEMM predict(driver::Stream& stream, DType in_dtype, DType out_dtype, IsaacOperation_t AT, IsaacOperation_t BT, param_t M, param_t N, param_t K,
param_t offa, param_t lda, param_t offb, param_t ldb, param_t offc, param_t ldc, size_t num_re_evaluate = 1);
};
//Database
extern const std::map<std::pair<driver::Device::Architecture, OperationType>, std::shared_ptr<Profile> > database;
}
}
#endif

View File

@@ -1,95 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_SCALAR_H
#define ISAAC_SCALAR_H
#include "isaac/external/half.hpp"
namespace isaac{
enum DType{
INT8X4_TYPE = 1,
INT32_TYPE,
FLOAT_TYPE,
DOUBLE_TYPE,
};
inline size_t size_of(DType dtype){
switch (dtype) {
case INT8X4_TYPE: return 4;
case INT32_TYPE: return 4;
case FLOAT_TYPE: return 4;
case DOUBLE_TYPE: return 8;
default: throw;
}
}
template<class T> struct to_DType;
template<> struct to_DType<int32_t>{ static const DType value = INT8X4_TYPE; };
template<> struct to_DType<float>{ static const DType value = FLOAT_TYPE; };
template<> struct to_DType<double>{ static const DType value = DOUBLE_TYPE; };
class scalar{
private:
template<class T>
void init(T const & x){
switch(dtype_){
case INT32_TYPE: value_.int32 = (int32_t)x; break;
case FLOAT_TYPE: value_.float32 = (float)x; break;
case DOUBLE_TYPE: value_.float64 = (double)x; break;
default: throw;
}
}
public:
#define ISAAC_INSTANTIATE(TYPE) scalar(TYPE value, DType dtype = to_DType<TYPE>::value) : dtype_(dtype) { init(value); }
ISAAC_INSTANTIATE(float)
ISAAC_INSTANTIATE(double)
#undef ISAAC_INSTANTIATE
void* data() const{
switch(dtype_){
case INT32_TYPE: return (void*)&value_.int32;
case FLOAT_TYPE: return (void*)&value_.float32;
case DOUBLE_TYPE: return (void*)&value_.float64;
default: throw;
}
}
DType dtype() const{
return dtype_;
}
private:
DType dtype_;
union{
int32_t int32;
float float32;
double float64;
}value_;
};
}
#endif

View File

@@ -1,89 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TEMPLATES_COMMON_HPP_
#define ISAAC_TEMPLATES_COMMON_HPP_
#include <cstddef>
#include <cstdint>
#include <vector>
#include "isaac/scalar.h"
namespace isaac{
inline int32_t ceil(int32_t num, int32_t div){
return (num + div - 1)/div;
}
inline size_t log2(size_t x){
size_t res = 0;
while((x>>=1)>0) res++;
return res;
}
inline size_t next_pow2(size_t N){
size_t res = 1;
while(res < N)
res*=2;
return res;
}
inline std::string arith_str(DType dtype){
switch (dtype) {
case INT8X4_TYPE: return "s32";
case FLOAT_TYPE: return "f32";
case DOUBLE_TYPE: return "f64";
default: throw;
}
}
inline std::string io_str(DType dtype){
switch (dtype) {
case INT8X4_TYPE: return "b32";
case FLOAT_TYPE: return "b32";
case DOUBLE_TYPE: return "b64";
default: throw;
}
}
typedef uint32_t param_t;
namespace driver{
class Device;
class Stream;
class Kernel;
class Buffer;
}
namespace templates{
class Generator{
public:
Generator(){}
virtual std::string dump(driver::Device const & device, std::string const & name) = 0;
virtual std::vector<param_t> tuning_params() const = 0;
};
}
}
#endif

View File

@@ -1,155 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TEMPLATES_CONV_H_
#define ISAAC_TEMPLATES_CONV_H_
#include <cstddef>
#include <string>
#include "isaac/templates/common.hpp"
namespace isaac{
enum ActivationType{
Linear,
ReLU,
ELU,
Sigmoid
};
enum ResidualType{
NoResidual,
CatResidual,
AddResidual
};
namespace templates{
class Conv: public Generator{
public:
static const std::string id;
static const size_t Nshapes;
static const size_t Ntune;
static const size_t Nparams;
private:
void init_constant_memory(std::vector<int32_t>& delta, std::vector<uint32_t> &masks, size_t nlut, int32_t strideIc, int32_t strideIw, int32_t strideIh, int32_t strideId);
public:
Conv(DType in_dtype, DType out_dtype, param_t C, param_t D, param_t H, param_t W, param_t N, param_t K, param_t M, param_t P, param_t Q, param_t T, param_t R, param_t S,
param_t pad_h, param_t pad_w, param_t pad_d, param_t stride_h, param_t stride_w, param_t stride_d, param_t upsample_d, param_t upsample_h, param_t upsample_w,
ActivationType activation, size_t num_outputs,
ResidualType residual_type, param_t Zk, param_t z_crop_m0, param_t z_crop_m1, param_t z_crop_p0, param_t z_crop_p1, param_t z_crop_q0, param_t z_crop_q1,
param_t vec, param_t bpqn, param_t bk, param_t pqns, param_t ks, param_t crs_l, param_t cs, param_t bc, param_t gridc);
// Execution
std::string dump(driver::Device const & device, std::string const & name);
std::vector<param_t> tuning_params() const;
void enqueue(driver::Kernel& kernel, driver::Stream& queue, driver::Buffer const & I, driver::Buffer const & F, driver::Buffer *O, driver::Buffer const * bias = NULL, float alpha = 0, float iscale = 1, float fscale = 1, std::vector<float> oscale = {1}, float z_scale = 1, driver::Buffer const *Z = NULL);
// Validity
static void output_shapes(param_t D, param_t H, param_t W, param_t T, param_t R, param_t S, param_t pad_d,
param_t pad_h, param_t pad_w, param_t stride_d, param_t stride_h, param_t stride_w,
param_t upsample_d, param_t upsample_h, param_t upsample_w,
param_t& M, param_t& P, param_t& Q);
static void check_valid(driver::Device const & device, size_t M, param_t* params, uint8_t* valid);
// Benchmark
static double tflops(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t C, param_t R, param_t S, param_t T, double time);
private:
// data types
DType in_dtype_;
DType out_dtype_;
// activation type
ActivationType activation_;
size_t num_outputs_;
// residual
ResidualType residual_type_;
param_t Zk_;
param_t z_crop_m0_;
param_t z_crop_m1_;
param_t z_crop_p0_;
param_t z_crop_p1_;
param_t z_crop_q0_;
param_t z_crop_q1_;
param_t Zm_;
param_t Zp_;
param_t Zq_;
//input shapes
param_t C_;
param_t N_;
param_t K_;
param_t Kout_;
// Input dimensions
param_t D_;
param_t H_;
param_t W_;
// Output Dimensions
param_t M_;
param_t P_;
param_t Q_;
// Filter Dimensions
param_t T_;
param_t R_;
param_t S_;
// Pad
param_t pad_d_;
param_t pad_h_;
param_t pad_w_;
// stride
param_t stride_d_;
param_t stride_h_;
param_t stride_w_;
// upsample
param_t upsample_d_;
param_t upsample_h_;
param_t upsample_w_;
//parameters
param_t vec_;
param_t bc0_;
param_t bc1_;
param_t cs0_;
param_t cs1_;
param_t bf_n_;
param_t u_;
param_t us_;
param_t zs_;
param_t bz_;
param_t gridz_;
// constant memory
std::vector<int32_t> cLUT;
std::vector<uint32_t> masks_;
};
}
}
#endif

View File

@@ -1,39 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TEMPLATES_ERROR_HPP_
#define ISAAC_TEMPLATES_ERROR_HPP_
#include <exception>
namespace isaac{
namespace templates{
class invalid_parameters: public std::exception {
public:
const char * what() const throw(){ return "Invalid parameters";}
};
}
}
#endif

View File

@@ -1,102 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TEMPLATES_GEMM_H_
#define ISAAC_TEMPLATES_GEMM_H_
#include <cstddef>
#include <string>
#include "isaac/templates/common.hpp"
#include "isaac/scalar.h"
namespace isaac{
namespace driver{
class Device;
class Stream;
class Kernel;
class Buffer;
}
enum IsaacOperation_t{
ISAAC_OP_N = 1,
ISAAC_OP_T = 2
};
namespace templates{
class GEMM: public Generator{
public:
static const std::string id;
static const size_t Nshapes;
static const size_t Ntune;
static const size_t Nparams;
public:
GEMM(DType in_dtype, DType out_dtype, IsaacOperation_t AT, IsaacOperation_t BT, param_t M, param_t N, param_t K, param_t offa, param_t lda, param_t offb, param_t ldb, param_t offc, param_t ldc,
param_t vec, param_t bm, param_t u, param_t bn, param_t ms, param_t us, param_t ns, param_t ba0, param_t ba1, param_t bb0, param_t bb1,
param_t ks, param_t bk, param_t kg);
std::string dump(driver::Device const & device, std::string const & name);
std::vector<param_t> tuning_params() const;
void enqueue(driver::Kernel& kernel, driver::Stream& queue, scalar const & alpha, driver::Buffer const & A, driver::Buffer const & B, scalar const & beta, driver::Buffer& C, float a_scale = 1, float b_scale = 1, float c_scale = 1, const driver::Buffer *bias = NULL);
static void check_valid(driver::Device const & device, size_t M, param_t* params, uint8_t* valid);
static double tflops(param_t M, param_t N, param_t K, double time);
private:
DType in_dtype_;
DType out_dtype_;
//transposition
IsaacOperation_t AT_;
IsaacOperation_t BT_;
//input shapes
param_t M_;
param_t N_;
param_t K_;
param_t offa_;
param_t lda_;
param_t offb_;
param_t ldb_;
param_t offc_;
param_t ldc_;
//parameters
param_t vec_;
param_t bc0_;
param_t bc1_;
param_t cs0_;
param_t cs1_;
param_t u_;
param_t us_;
param_t ba0_;
param_t ba1_;
param_t bb0_;
param_t bb1_;
param_t zs_;
param_t bz_;
param_t gridz_;
param_t stn_;
};
}
}
#endif

View File

@@ -1,100 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TEMPLATES_POOL_H_
#define ISAAC_TEMPLATES_POOL_H_
#include <cstddef>
#include <string>
#include "isaac/templates/common.hpp"
namespace isaac{
enum PoolType{
MaxPool,
AvgPool
};
namespace templates{
class Pool: public Generator{
private:
void init_constant_memory(std::vector<int32_t>& delta, std::vector<uint32_t> &masks, size_t nlut, int32_t strideIc, int32_t strideIw, int32_t strideIh, int32_t strideId);
public:
static const std::string id;
static const size_t Nshapes;
static const size_t Ntune;
static const size_t Nparams;
public:
Pool(DType in_dtype, DType out_dtype, PoolType pool_type,
param_t C, param_t D, param_t H, param_t W, param_t N, param_t M, param_t P, param_t Q, param_t T, param_t R, param_t S,
param_t pad_d, param_t pad_h, param_t pad_w,
param_t stride_d, param_t stride_h, param_t stride_w,
param_t vec = 1, param_t bc0 = 32, param_t cs0 = 4, param_t u = 1);
// Execution
std::string dump(driver::Device const & device, std::string const & name);
static void check_valid(driver::Device const & device, size_t M, param_t* params, uint8_t* valid);
void enqueue(driver::Kernel& kernel, driver::Stream& queue, driver::Buffer const & I, driver::Buffer &O, float i_scale = 1, float o_scale = 1);
std::vector<unsigned int> tuning_params() const;
static double tflops(param_t P, param_t Q, param_t M, param_t K, param_t N, param_t T, param_t R, param_t S, double time);
private:
DType in_dtype_;
DType out_dtype_;
PoolType pool_type_;
// Shapes
param_t Cin_;
param_t Cout_;
param_t D_;
param_t H_;
param_t W_;
param_t N_;
param_t M_;
param_t P_;
param_t Q_;
param_t T_;
param_t R_;
param_t S_;
param_t pad_d_;
param_t pad_h_;
param_t pad_w_;
param_t stride_d_;
param_t stride_h_;
param_t stride_w_;
// Tuning params
param_t vec_;
param_t bc0_;
param_t cs0_;
param_t u_;
// Constant buffer
std::vector<int32_t> cLUT;
std::vector<uint32_t> masks_;
};
}
}
#endif

View File

@@ -1,80 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BENCH_HPP
#define BENCH_HPP
#include <chrono>
#include <algorithm>
#include <isaac/driver/device.h>
#include <iostream>
#include <iomanip>
#include <iterator>
class Timer
{
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::nanoseconds nanoseconds;
public:
explicit Timer(bool run = false)
{ if (run) start(); }
void start()
{ _start = high_resolution_clock::now(); }
nanoseconds get() const
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
private:
high_resolution_clock::time_point _start;
};
template<class T>
T min(std::vector<T> x)
{ return *std::min_element(x.begin(), x.end()); }
template<class OP, class SYNC>
double bench(OP const & op, SYNC const & sync, isaac::driver::Device const & device)
{
Timer tmr;
std::vector<size_t> times;
double total_time = 0;
op();
sync();
while(total_time*1e-9 < 1e-1){
float norm = (float)device.current_sm_clock()/device.max_sm_clock();
tmr.start();
op();
sync();
times.push_back(norm*tmr.get().count());
total_time+=times.back();
}
return min(times);
}
template<class T>
std::string str(T const & x){ return std::to_string(x); }
#endif

View File

@@ -1,286 +0,0 @@
/*
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
*
* This file is part of ISAAC.
*
* ISAAC is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#ifndef ISAAC_CPP_COLLECTIONS_HPP
#define ISAAC_CPP_COLLECTIONS_HPP
#include <vector>
#include <iostream>
#include <sstream>
#include <iterator>
#include <algorithm>
#include <numeric>
#include <memory>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <type_traits>
#include <deque>
namespace isaac
{
namespace cpp
{
/* ---- Cached Map ----- */
template<class K, class V>
class CachedMap{
public:
CachedMap(std::function<V(K const &)> value_maker) : value_maker_(value_maker)
{ }
V const & get(K const & key){
auto it = cache_.find(key);
if(it==cache_.end())
return cache_.insert(std::make_pair(key, value_maker_(key))).first->second;
return it->second;
}
private:
std::map<K, V> cache_;
std::function<V(K const &)> value_maker_;
};
/* ---- Cartesian ---- */
inline std::vector<std::vector<int>> cartesian(const std::vector<std::vector<int>>& v) {
std::vector<std::vector<int>> res = {{}};
for (const auto& u : v){
std::vector<std::vector<int>> current;
for (const auto& x : res)
for (const auto y : u){
current.push_back(x);
current.back().push_back(y);
}
res = std::move(current);
}
return res;
}
/* ---- Tuple ----- */
template<class T>
class tuple
{
template<class U>
friend std::ostream& operator<<(std::ostream & oss, tuple<U> const &);
public:
tuple() {}
tuple(std::vector<T> const & list): data_(list){}
tuple(std::initializer_list<T> const & list) : data_(list){}
tuple(T a) : data_{a} {}
tuple(T a, T b) : data_{a, b} {}
tuple(tuple const & other) = default;
tuple(tuple&& other) = default;
tuple& operator=(tuple const & other) = default;
tuple& operator=(tuple && other) = default;
typename std::vector<T>::iterator begin() { return data_.begin(); }
typename std::vector<T>::const_iterator begin() const { return data_.begin(); }
typename std::vector<T>::iterator end() { return data_.end(); }
typename std::vector<T>::const_iterator end() const { return data_.end(); }
size_t size() const { return data_.size(); }
T front() const { return data_.front(); }
T back() const { return data_.back(); }
void remove_index(size_t i) { data_.erase(std::next(data_.begin(), i)); }
T& operator[](size_t i) { return data_[i]; }
T operator[](size_t i) const { return data_[i]; }
bool operator==(tuple const & other) const { return data_==other.data_; }
operator std::vector<T>() const { return data_; }
private:
std::vector<T> data_;
};
template<class T>
inline std::ostream& operator<<(std::ostream & oss, tuple<T> const &tp)
{
oss << "(";
std::copy(tp.data_.begin(), tp.data_.end() - 1, std::ostream_iterator<T>(oss, ","));
oss << tp.data_.back();
if(tp.size()==1)
oss << ",";
oss << ")";
return oss;
}
template<class T>
inline std::string to_string(tuple<T> const & tp)
{
std::ostringstream oss;
oss << tp;
return oss.str();
}
template<class T>
inline void remove_index(std::vector<T>& tp, size_t i)
{ tp.erase(std::next(tp.begin(), i)); }
template<class T>
inline T max(std::vector<T> const & tp)
{ return std::accumulate(tp.begin(), tp.end(), std::numeric_limits<T>::min(), [](T a, T b){ return std::max(a, b); }); }
template<class T>
inline T min(std::vector<T> const & tp)
{ return std::accumulate(tp.begin(), tp.end(), std::numeric_limits<T>::max(), [](T a, T b){ return std::min(a, b); }); }
template<class T>
inline T prod(std::vector<T> const & tp)
{ return std::accumulate(tp.begin(), tp.end(), 1, std::multiplies<T>()); }
template<class T>
inline size_t numgt1(std::vector<T> const & tp)
{ return std::accumulate(tp.begin(), tp.end(), 0, [](size_t a, size_t b){ return a + (b>1); }); }
/* ----- Set/Map ----- */
template<class T>
struct deref_hash
{ size_t operator()(T const & x) const { return x.hash();} };
template<class T>
struct deref_hash<T*>
{ size_t operator()(T const * x) const { return x->hash();} };
template<class T>
struct deref_hash<std::shared_ptr<T>>
{ size_t operator()(std::shared_ptr<T> const & x) const { return x->hash();} };
template<class T>
struct deref_eq
{ size_t operator()(T const & x, T const & y) const { return x == y;} };
template<class T>
struct deref_eq<T*>
{ size_t operator()(T const * x, T const * y) const { return *x == *y;} };
template<class T>
struct deref_eq<std::shared_ptr<T>>
{ size_t operator()(std::shared_ptr<T> const & x, std::shared_ptr<T> const & y) const { return *x == *y;} };
template<class KEY>
using deref_unordered_set = std::unordered_set<KEY, deref_hash<KEY>, deref_eq<KEY>>;
template<class U>
using set_map = std::map<U, std::set<U>>;
template<class U, class H = std::hash<U>, class E = std::equal_to<U>>
using unordered_set_map = std::unordered_map<U, std::unordered_set<U,H,E>, H, E>;
template<class T>
struct is_set_map
{ static const bool value = false; };
template<class U>
struct is_set_map<set_map<U>> { static const bool value = true; };
template<class U, class H, class E>
struct is_set_map<unordered_set_map<U,H,E>> { static const bool value = true; };
/* ---- Transformations ---- */
//Pairs
template<class T, class Enable = typename std::enable_if<is_set_map<T>::value>::type>
std::deque<std::pair<typename T::key_type, typename T::key_type>> pairs(T const & map)
{
typedef typename T::key_type K;
std::deque<std::pair<K,K>> result;
for(auto const& x: map)
for(auto const & y: x.second)
result.push_back({x.first, y});
return result;
}
//Invert
template<class T, class Enable = typename std::enable_if<is_set_map<T>::value>::type>
static T invert(T const & in)
{
T result;
typedef typename T::key_type U;
typedef typename T::mapped_type V;
for(auto const & x: in){
U u = x.first;
result.insert({u, V()});
for(U v: x.second)
result[v].insert(u);
}
return result;
}
//Intersect
template<class T, class H, class E>
std::unordered_set<T,H,E> intersection(std::unordered_set<T,H,E> const & x,
std::unordered_set<T,H,E> const & y)
{
if(y.size() < x.size())
return intersection(y, x);
std::unordered_set<T,H,E> result;
for(auto const & u: x)
if(y.find(u)!=y.end())
result.insert(u);
return result;
}
//Merge
template<class T>
typename std::enable_if<!is_set_map<T>::value, T&>::type merge(T& x, T const & y)
{
std::merge(x.begin(), x.end(), y.begin(), y.end(), std::inserter(x, x.end()));
return x;
}
template<class T>
typename std::enable_if<is_set_map<T>::value, T&>::type merge(T& x, T const & y)
{
for(auto const & p: y) merge(x[p.first], p.second);
return x;
}
//Transfer
template<class T, class U, class Enable = typename std::enable_if<is_set_map<T>::value>::type>
void transfer(T& map, U u, U v, typename T::mapped_type const & exclude)
{
for(auto const & x: exclude)
map[v].erase(x);
merge(map[u], map[v]);
for(auto& x: map)
x.second.erase(v);
map.erase(v);
}
//subset
template<class T, class Enable = typename std::enable_if<is_set_map<T>::value>::type>
T subset(T& map, typename T::mapped_type const & include)
{
T result;
for(auto const & e: map)
if(include.find(e.first)!=include.end())
result[e.first] = cpp::intersection(e.second, include);
return result;
}
}
}
#endif

View File

@@ -1,84 +0,0 @@
/*
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
*
* This file is part of ISAAC.
*
* ISAAC is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#ifndef ISAAC_CPP_FUNCTIONAL_HPP
#define ISAAC_CPP_FUNCTIONAL_HPP
#include <type_traits>
#include <tuple>
namespace isaac
{
namespace cpp
{
template <typename T>
struct function_traits
: public function_traits<decltype(&T::operator())>
{};
// For generic types, directly use the result of the signature of its 'operator()'
template <typename ClassType, typename ReturnType, typename... Args>
struct function_traits<ReturnType(ClassType::*)(Args...) const>
// we specialize for pointers to member function
{
enum { arity = sizeof...(Args) };
// arity is the number of arguments.
typedef ReturnType result_type;
template <size_t i>
struct arg
{
typedef typename std::tuple_element<i, std::tuple<Args...>>::type type;
// the i-th argument is equivalent to the i-th tuple element of a tuple
// composed of those arguments.
};
};
template<class U, class FN, class V>
V forward_dyncast(U const & x, FN const & fn, V const &backup)
{
typedef typename function_traits<FN>::template arg<0>::type RT;
typedef typename std::remove_reference<RT>::type T;
if(T const * p = dynamic_cast<T const *>(&x))
return fn(*p);
return backup;
}
template<class U, class FN>
void forward_dyncast(U const & x, FN const & fn)
{
typedef typename function_traits<FN>::template arg<0>::type RT;
typedef typename std::remove_reference<RT>::type T;
if(T const * p = dynamic_cast<T const *>(&x))
fn(*p);
}
template<class U, class FN>
bool compare_if_same(U const & base, FN const & f)
{ return cpp::forward_dyncast(base, f, false); }
}
}
#endif

View File

@@ -1,92 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ISAAC_TOOLS_MATRIX_HPP_
#define ISAAC_TOOLS_MATRIX_HPP_
#include <cstddef>
inline void read_inc(void* dst, u_char*& data, size_t nbytes){
std::memcpy(dst, (void*)data, nbytes);
data += nbytes;
}
template<class T>
void gemm(uint32_t M, uint32_t N, uint32_t K, T alpha, T* A, uint32_t lda, T* B, uint32_t ldb, T, T* C, uint32_t ldc, T* bias){
for(uint32_t i = 0; i < M ; ++i)
for(uint32_t j = 0; j < N ; ++j){
T acc = 0;
for(uint32_t k = 0; k < K; ++k)
acc += A[i*lda + k] * B[k*ldb + j];
C[i*ldc + j] = alpha*acc + bias[j];
}
}
template<class T>
class matrix{
typedef std::array<uint32_t, 2> shapes_t;
public:
matrix(u_char*& data){
read_inc((void*)shapes_.data(), data, 8);
values_.resize(shapes_[0]*shapes_[1]);
ld_ = shapes_[1];
read_inc((void*)values_.data(), data, values_.size()*4);
data_ = values_.data();
}
matrix(shapes_t const & shapes, size_t ld, T* data): shapes_(shapes), ld_(ld), data_(data){}
matrix(shapes_t const & shapes): shapes_(shapes), ld_(shapes.back()), values_(shapes[0]*shapes[1]), data_(values_.data()){}
shapes_t const & shapes() const
{ return shapes_; }
T const & operator()(size_t i, size_t j) const
{ return data_[i*ld_ + j]; }
T & operator ()(size_t i, size_t j)
{ return data_[i*ld_ + j]; }
T* data() const
{ return data_; }
T* data()
{ return data_; }
uint32_t ld() const
{ return ld_; }
private:
shapes_t shapes_;
size_t ld_;
std::vector<T> values_;
T* data_;
};
template<class T>
matrix<T> pad_left(matrix<T> const & in, uint32_t npad){
uint32_t M = in.shapes()[0], N = in.shapes()[1];
matrix<T> result({M, N + npad});
for(size_t i = 0; i < M; ++i)
for(size_t j = 0; j < N; ++j)
result(i, npad + j) = in(i, j);
return result;
}
#endif

View File

@@ -1,56 +0,0 @@
/*
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
*
* This file is part of ISAAC.
*
* ISAAC is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#ifndef ISAAC_TOOLS_GETENV
#define ISAAC_TOOLS_GETENV
#include <string>
#include <cstdlib>
namespace isaac
{
namespace tools
{
inline std::string getenv(const char * name)
{
#ifdef _MSC_VER
char* cache_path = 0;
std::size_t sz = 0;
_dupenv_s(&cache_path, &sz, name);
#else
const char * cache_path = std::getenv(name);
#endif
if(!cache_path)
return "";
std::string result(cache_path);
#ifdef _MSC_VER
free(cache_path);
#endif
return result;
}
}
}
#endif

View File

@@ -1,67 +0,0 @@
/*
* Copyright (c) 2015, PHILIPPE TILLET. All rights reserved.
*
* This file is part of ISAAC.
*
* ISAAC is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*/
#ifndef ISAAC_TOOLS_MKDIR
#define ISAAC_TOOLS_MKDIR
#include <cstring>
#include <string>
#include <cstdlib>
#include <sys/stat.h>
#include <errno.h>
#if defined(_WIN32)
#include <direct.h>
#endif
namespace isaac
{
namespace tools
{
inline int mkdir(std::string const & path)
{
#if defined(_WIN32)
return _mkdir(path.c_str());
#else
return ::mkdir(path.c_str(), 0777);
#endif
}
inline int mkpath(std::string const & path)
{
int status = 0;
size_t pp = 0;
size_t sp;
while ((sp = path.find('/', pp)) != std::string::npos)
{
if (sp != pp){
status = mkdir(path.substr(0, sp));
}
pp = sp + 1;
}
return (status==0 || errno==EEXIST)?0:-1;
}
}
}
#endif

View File

@@ -1,142 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/api.h"
namespace isaac{
inline size_t num_re_evaluate(size_t optimization_level){
if(optimization_level <= 1)
return 1;
return 5*optimization_level;
}
void GEMM(driver::Device const &, driver::Stream & stream,
DType in_dtype, DType out_dtype, IsaacOperation_t AT, IsaacOperation_t BT, param_t M, param_t N, param_t K,
param_t offa, param_t lda, param_t offb, param_t ldb, param_t offc, param_t ldc,
scalar const & alpha, driver::Buffer const & A, driver::Buffer const & B, scalar const & beta, driver::Buffer& C,
float a_scale, float b_scale, float c_scale,
const driver::Buffer *bias,
templates::GEMM* generator, size_t optimization_level)
{
typedef std::tuple<driver::Stream, DType, DType, IsaacOperation_t, IsaacOperation_t, std::vector<param_t>> key_type;
// Build the generator if necessary
static cpp::CachedMap<key_type, std::shared_ptr<templates::GEMM>> inference([optimization_level](key_type const & key){
driver::Stream & stream = (driver::Stream&)std::get<0>(key);
DType in_dtype = std::get<1>(key);
DType out_dtype = std::get<2>(key);
IsaacOperation_t AT = std::get<3>(key), BT = std::get<4>(key);
runtime::GEMMProfile* profile = (runtime::GEMMProfile*)runtime::database.at({stream.context().device().architecture(), runtime::GEMM}).get();
std::vector<param_t> const & x = std::get<5>(key);
templates::GEMM result = profile->predict(stream, in_dtype, out_dtype, AT, BT, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], num_re_evaluate(optimization_level));
return std::make_shared<templates::GEMM>(result);
});
// Build the kernel
static cpp::CachedMap<std::pair<driver::Stream, templates::GEMM*>, std::shared_ptr<driver::Kernel>> kernels([](std::pair<driver::Stream, templates::GEMM*> key){
driver::Context const & context = key.first.context();
driver::Module module(context, key.second->dump(context.device(), "gemm"));
return std::make_shared<driver::Kernel>(module, "gemm");
});
//Retrieve profile/kernel and execute
if(generator == NULL)
generator = inference.get(key_type(stream, in_dtype, out_dtype, AT, BT, {M, N, K, offa, lda, offb, ldb, offc, ldc})).get();
generator->enqueue(*kernels.get(std::make_pair(stream, generator)), stream, alpha, A, B, beta, C, a_scale, b_scale, c_scale, bias);
}
void CONV(driver::Device const &, driver::Stream & stream,
DType in_dtype, DType out_dtype, param_t N, param_t K, param_t M, param_t P, param_t Q, param_t C, param_t T, param_t R, param_t S,
param_t D, param_t H, param_t W,
param_t pad_d, param_t pad_h, param_t pad_w,
param_t stride_d, param_t stride_h, param_t stride_w,
param_t upsample_d, param_t upsample_h, param_t upsample_w,
driver::Buffer const & I, driver::Buffer const & F, driver::Buffer* O, param_t num_outputs,
driver::Buffer const * bias,
ActivationType activation, float alpha,
float iscale, float fscale, std::vector<float> const & oscale, float z_scale,
ResidualType residual, param_t Zk, param_t crop_z_m0, param_t crop_z_m1, param_t crop_z_p0, param_t crop_z_p1, param_t crop_z_q0, param_t crop_z_q1, driver::Buffer const *Z,
templates::Conv* generator, size_t optimization_level)
{
typedef std::tuple<driver::Stream, DType, DType, std::vector<param_t>> key_type;
// Build the generator if necessary
static cpp::CachedMap<key_type, std::shared_ptr<templates::Conv>> inference([optimization_level](key_type const & key){
driver::Stream & stream = (driver::Stream&)std::get<0>(key);
DType in_dtype = std::get<1>(key);
DType out_dtype = std::get<2>(key);
std::vector<param_t> const & x = std::get<3>(key);
runtime::ConvProfile* profile = (runtime::ConvProfile*)runtime::database.at({stream.context().device().architecture(), runtime::CONV}).get();
templates::Conv result = profile->predict(stream, in_dtype, out_dtype, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16], x[17], x[18], x[19], x[20], (ActivationType)x[21], x[22], (ResidualType)x[23], x[24], x[25], x[26], x[27], x[28], x[29], x[30], num_re_evaluate(optimization_level));
return std::make_shared<templates::Conv>(result);
});
// Build the kernel
static cpp::CachedMap<std::pair<driver::Stream, templates::Conv*>, std::shared_ptr<driver::Kernel>> kernels([](std::pair<driver::Stream, templates::Conv*> const & key){
driver::Context const & context = key.first.context();
driver::Module module(context, key.second->dump(context.device(), "conv"));
return std::make_shared<driver::Kernel>(module, "conv");
});
//Retrieve profile/kernel and execute
if(generator == NULL)
generator = inference.get(key_type(stream, in_dtype, out_dtype, {C, D, H, W, N, K, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, upsample_d, upsample_h, upsample_w, activation, num_outputs, residual, Zk, crop_z_m0, crop_z_m1, crop_z_p0, crop_z_p1, crop_z_q0, crop_z_q1})).get();
generator->enqueue(*kernels.get(std::make_pair(stream, generator)), stream, I, F, O, bias, alpha, iscale, fscale, oscale, z_scale, Z);
}
void POOL(driver::Device const &, driver::Stream & stream,
DType in_dtype, DType out_dtype, PoolType pool_type, param_t C, param_t M, param_t P, param_t Q, param_t N, param_t T, param_t R, param_t S,
param_t D, param_t H, param_t W, param_t pad_d, param_t pad_h, param_t pad_w, param_t stride_d, param_t stride_h, param_t stride_w,
driver::Buffer const & I, driver::Buffer& O,
float iscale, float oscale,
templates::Pool* generator, size_t optimization_level)
{
typedef std::tuple<driver::Stream, DType, DType, std::vector<param_t>> key_type;
// Build the generator if necessary
static cpp::CachedMap<key_type, std::shared_ptr<templates::Pool>> inference([optimization_level](key_type const & key){
driver::Stream & stream = (driver::Stream&)std::get<0>(key);
runtime::PoolProfile* profile = (runtime::PoolProfile*)runtime::database.at({stream.context().device().architecture(), runtime::POOL}).get();
DType in_dtype = std::get<1>(key);
DType out_dtype = std::get<2>(key);
std::vector<param_t> const & x = std::get<3>(key);
templates::Pool result = profile->predict(stream, in_dtype, out_dtype, (PoolType)x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16], x[17], num_re_evaluate(optimization_level));
return std::make_shared<templates::Pool>(result);
});
// Build the kernel
static cpp::CachedMap<std::pair<driver::Stream, templates::Pool*>, std::shared_ptr<driver::Kernel>> kernels([](std::pair<driver::Stream, templates::Pool*> const & key){
driver::Context const & context = key.first.context();
driver::Module module(context, key.second->dump(context.device(), "pool"));
return std::make_shared<driver::Kernel>(module, "pool");
});
//Retrieve profile/kernel and execute
if(generator == NULL)
generator = inference.get(key_type(stream, in_dtype, out_dtype, {pool_type, C, D, H, W, N, M, P, Q, T, R, S, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w})).get();
generator->enqueue(*kernels.get(std::make_pair(stream, generator)), stream, I, O, iscale, oscale);
}
}

View File

@@ -1,196 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/driver/dispatch.h"
#include "isaac/driver/backend.h"
#include "isaac/driver/buffer.h"
#include "isaac/driver/context.h"
#include "isaac/driver/stream.h"
#include "isaac/driver/kernel.h"
#include <assert.h>
#include <stdexcept>
#include <vector>
namespace isaac
{
namespace driver
{
/*-----------------------------------*/
//---------- Modules ----------------*/
/*-----------------------------------*/
void backend::modules::release(){
for(auto & x: cache_)
delete x.second;
cache_.clear();
}
Module& backend::modules::get(Stream const & stream, std::string const & name, std::string const & src){
std::tuple<Stream, std::string> key(stream, name);
if(cache_.find(key)==cache_.end())
return *cache_.insert(std::make_pair(key, new Module(stream.context(), src))).first->second;
return *cache_.at(key);
}
std::map<std::tuple<Stream, std::string>, Module * > backend::modules::cache_;
/*-----------------------------------*/
//----------- Kernels --------------*/
/*-----------------------------------*/
void backend::kernels::release(){
for(auto & x: cache_)
delete x.second;
cache_.clear();
}
Kernel & backend::kernels::get(Module const & program, std::string const & name){
std::tuple<Module, std::string> key(program, name);
if(cache_.find(key)==cache_.end())
return *cache_.insert(std::make_pair(key, new Kernel(program, name.c_str()))).first->second;
return *cache_.at(key);
}
std::map<std::tuple<Module, std::string>, Kernel * > backend::kernels::cache_;
/*-----------------------------------*/
//------------ Queues --------------*/
/*-----------------------------------*/
void backend::streams::init(std::list<const Context *> const & contexts){
for(Context const * ctx : contexts)
if(cache_.find(*ctx)==cache_.end())
cache_.insert(std::make_pair(*ctx, std::vector<Stream*>{new Stream(*ctx)}));
}
void backend::streams::release(){
for(auto & x: cache_)
for(auto & y: x.second)
delete y;
cache_.clear();
}
Stream & backend::streams::get_default()
{ return get(contexts::get_default(), 0); }
Stream & backend::streams::get(Context const & context, unsigned int id){
init(std::list<Context const *>(1,&context));
for(auto & x : cache_)
if(x.first==context)
return *x.second[id];
throw;
}
void backend::streams::get(Context const & context, std::vector<Stream*> & queues){
init(std::list<Context const *>(1,&context));
queues = cache_.at(context);
}
std::map<Context, std::vector<Stream*> > backend::streams::cache_;
/*-----------------------------------*/
//------------ Contexts ------------*/
/*-----------------------------------*/
void backend::contexts::init(std::vector<Platform> const & platforms){
for(Platform const & platform: platforms){
for(Device const & device: platform.devices())
cache_.push_back(new Context(device));
}
}
void backend::contexts::release(){
for(auto & x: cache_)
delete x;
cache_.clear();
}
Context const & backend::contexts::get_default(){
backend::init();
std::list<Context const *>::const_iterator it = cache_.begin();
std::advance(it, default_device);
return **it;
}
void backend::contexts::get(std::list<Context const *> & contexts){
backend::init();
contexts = cache_;
}
std::list<Context const *> backend::contexts::cache_;
/*-----------------------------------*/
//------------ General -------------*/
/*-----------------------------------*/
std::vector<Device> backend::devices(){
std::vector<Platform> platforms = backend::platforms();
std::vector<Device> result;
for(Platform const & platform: platforms){
auto devices = platform.devices();
result.insert(result.end(), devices.begin(), devices.end());
}
return result;
}
std::vector<Platform> backend::platforms(){
std::vector<Platform> platforms;
//if CUDA is here
if(dispatch::cuinit())
platforms.push_back(Platform());
if(platforms.empty())
throw std::runtime_error("ISAAC: No backend available. Make sure CUDA is available in your library path");
return platforms;
}
void backend::synchronize(Context const & context){
for(Stream * queue: streams::cache_.at(context))
queue->synchronize();
}
void backend::release(){
backend::kernels::release();
// backend::programs::release();
backend::streams::release();
backend::contexts::release();
}
void backend::init(){
if(!contexts::cache_.empty())
return;
std::vector<Platform> platforms = backend::platforms();
contexts::init(platforms);
streams::init(contexts::cache_);
}
unsigned int backend::default_device = 0;
}
}

View File

@@ -1,60 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <iostream>
#include "isaac/driver/stream.h"
#include "isaac/driver/buffer.h"
#include "isaac/driver/context.h"
#include "isaac/driver/dispatch.h"
namespace isaac
{
namespace driver
{
Buffer::Buffer(Context const & context, size_t size) : context_(context)
{
ContextSwitcher ctx_switch(context_);
dispatch::cuMemAlloc(&*cu_, size);
}
Buffer::Buffer(Context const & context, CUdeviceptr cu, bool take_ownership):
context_(context), cu_(cu, take_ownership)
{ }
void Buffer::set_zero(Stream const & queue, size_t size)
{
ContextSwitcher ctx_switch(context_);
dispatch::cuMemsetD8Async(*cu_, 0, size, queue);
}
Handle<CUdeviceptr> const & Buffer::cu() const
{ return cu_; }
Handle<CUdeviceptr> & Buffer::cu()
{ return cu_; }
}
}

View File

@@ -1,99 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <iostream>
#include <cassert>
#include "isaac/driver/context.h"
#include "isaac/driver/module.h"
#include "isaac/tools/sys/getenv.hpp"
#include "isaac/tools/sys/mkdir.hpp"
namespace isaac
{
namespace driver
{
std::string Context::get_cache_path(){
//user-specified cache path
std::string result = tools::getenv("ISAAC_CACHE_PATH");
if(!result.empty()){
if(tools::mkpath(result)==0)
return result;
}
//create in home
result = tools::getenv("HOME");
if(!result.empty())
{
result = result + "/.isaac/cache/";
if(tools::mkpath(result)==0)
return result;
}
//couldn't find a directory
return "";
}
CUdevice Context::device(CUcontext context){
dispatch::cuCtxPushCurrent_v2(context);
CUdevice res;
dispatch::cuCtxGetDevice(&res);
dispatch::cuCtxPopCurrent_v2(NULL);
return res;
}
Context::Context(CUcontext context, bool take_ownership): cu_(context, take_ownership), device_(device(context), false), cache_path_(get_cache_path())
{ }
Context::Context(Device const & device): device_(device), cache_path_(get_cache_path())
{
dispatch::cuCtxCreate(&*cu_, CU_CTX_SCHED_AUTO, (CUdevice)device);
dispatch::cuCtxPopCurrent_v2(NULL);
}
Device const & Context::device() const
{ return device_; }
std::string const & Context::cache_path() const
{ return cache_path_; }
Handle<CUcontext> const & Context::cu() const
{ return cu_; }
/* Context Switcher */
ContextSwitcher::ContextSwitcher(Context const & ctx): ctx_(ctx)
{
dispatch::cuCtxPushCurrent_v2(ctx_);
}
ContextSwitcher::~ContextSwitcher()
{
CUcontext tmp;
dispatch::cuCtxPopCurrent_v2(&tmp);
assert(tmp==(CUcontext)ctx_ && "Switching back to invalid context!");
}
}
}

View File

@@ -1,197 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <map>
#include <algorithm>
#include <sstream>
#include <cstring>
#include <memory>
#include "isaac/driver/device.h"
namespace isaac
{
namespace driver
{
/* Architecture [NVidia] */
Device::Architecture Device::nv_arch(std::pair<unsigned int, unsigned int> sm) const{
switch(sm.first)
{
case 7:
switch(sm.second)
{
case 0: return Architecture::SM_7_0;
}
case 6:
switch(sm.second)
{
case 0: return Architecture::SM_6_0;
case 1: return Architecture::SM_6_1;
}
case 5:
switch(sm.second)
{
case 0: return Architecture::SM_5_0;
case 2: return Architecture::SM_5_2;
default: return Architecture::UNKNOWN;
}
case 3:
switch(sm.second)
{
case 0: return Architecture::SM_3_0;
case 5: return Architecture::SM_3_5;
case 7: return Architecture::SM_3_7;
default: return Architecture::UNKNOWN;
}
case 2:
switch(sm.second)
{
case 0: return Architecture::SM_2_0;
case 1: return Architecture::SM_2_1;
default: return Architecture::UNKNOWN;
}
default: return Architecture::UNKNOWN;
}
}
template<CUdevice_attribute attr>
int Device::cuGetInfo() const{
int res;
dispatch::cuDeviceGetAttribute(&res, attr, *cu_);
return res;
}
nvmlDevice_t Device::nvml_device() const{
std::map<std::string, nvmlDevice_t> map;
std::string key = pci_bus_id();
if(map.find(key)==map.end()){
nvmlDevice_t device;
dispatch::nvmlDeviceGetHandleByPciBusId_v2(key.c_str(), &device);
return map.insert(std::make_pair(key, device)).first->second;
}
return map.at(key);
}
/* Architecture */
Device::Architecture Device::architecture() const
{ return nv_arch(compute_capability()); }
/* Attributes */
size_t Device::address_bits() const
{ return sizeof(size_t)*8; }
driver::Platform Device::platform() const
{ return Platform(); }
std::string Device::name() const{
char tmp[128];
dispatch::cuDeviceGetName(tmp, 128, *cu_);
return std::string(tmp);
}
std::string Device::pci_bus_id() const{
char tmp[128];
dispatch::cuDeviceGetPCIBusId(tmp, 128, *cu_);
return std::string(tmp);
}
void Device::interpret_as(std::pair<size_t, size_t> cc){
interpreted_as_ = std::make_shared<std::pair<size_t, size_t>>(cc);
}
std::pair<size_t, size_t> Device::compute_capability() const{
if(interpreted_as_)
return *interpreted_as_;
size_t _major = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>();
size_t _minor = cuGetInfo<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
return std::make_pair(_major, _minor);
}
size_t Device::max_threads_per_block() const
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK>(); }
size_t Device::max_shared_memory() const
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK>(); }
size_t Device::warp_size() const
{ return cuGetInfo<CU_DEVICE_ATTRIBUTE_WARP_SIZE>(); }
std::vector<size_t> Device::max_block_dim() const{
std::vector<size_t> result(3);
result[0] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X>();
result[1] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y>();
result[2] = cuGetInfo<CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z>();
return result;
}
size_t Device::current_sm_clock() const{
unsigned int result;
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
return result;
}
size_t Device::max_sm_clock() const{
unsigned int result;
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_SM, &result);
return result;
}
size_t Device::current_mem_clock() const{
unsigned int result;
dispatch::nvmlDeviceGetClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
return result;
}
size_t Device::max_mem_clock() const{
unsigned int result;
dispatch::nvmlDeviceGetMaxClockInfo(nvml_device(), NVML_CLOCK_MEM, &result);
return result;
}
/* Infos */
std::string Device::infos() const{
std::ostringstream oss;
std::vector<size_t> max_wi_sizes = max_block_dim();
oss << "Platform: " << platform().name() << std::endl;
oss << "Name: " << name() << std::endl;
oss << "Maximum total work-group size: " << max_threads_per_block() << std::endl;
oss << "Maximum individual work-group sizes: " << max_wi_sizes[0] << ", " << max_wi_sizes[1] << ", " << max_wi_sizes[2] << std::endl;
oss << "Local memory size: " << max_shared_memory() << std::endl;
return oss.str();
}
Handle<CUdevice> const & Device::cu() const
{ return cu_; }
}
}

View File

@@ -1,363 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <map>
#include "isaac/driver/dispatch.h"
#include "isaac/driver/context.h"
namespace isaac
{
namespace driver
{
//Helpers for function definition
#define DEFINE0(init, hlib, ret, fname) ret dispatch::fname()\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname); }
#define DEFINE1(init, hlib, ret, fname, t1) ret dispatch::fname(t1 a)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a); }
#define DEFINE2(init, hlib, ret, fname, t1, t2) ret dispatch::fname(t1 a, t2 b)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b); }
#define DEFINE3(init, hlib, ret, fname, t1, t2, t3) ret dispatch::fname(t1 a, t2 b, t3 c)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c); }
#define DEFINE4(init, hlib, ret, fname, t1, t2, t3, t4) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d); }
#define DEFINE5(init, hlib, ret, fname, t1, t2, t3, t4, t5) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e); }
#define DEFINE6(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f); }
#define DEFINE7(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g); }
#define DEFINE8(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h); }
#define DEFINE9(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i); }
#define DEFINE10(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j); }
#define DEFINE11(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k); }
#define DEFINE13(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m); }
#define DEFINE19(init, hlib, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) ret dispatch::fname(t1 a, t2 b, t3 c, t4 d, t5 e, t6 f, t7 g, t8 h, t9 i, t10 j, t11 k, t12 l, t13 m, t14 n, t15 o, t16 p, t17 q, t18 r, t19 s)\
{return f_impl<dispatch::init>(hlib, fname, fname ## _, #fname, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s); }
//Specialized helpers for CUDA
#define CUDA_DEFINE1(ret, fname, t1) DEFINE1(cuinit, cuda_, ret, fname, t1)
#define CUDA_DEFINE2(ret, fname, t1, t2) DEFINE2(cuinit, cuda_, ret, fname, t1, t2)
#define CUDA_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cuinit, cuda_, ret, fname, t1, t2, t3)
#define CUDA_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(cuinit, cuda_, ret, fname, t1, t2, t3, t4)
#define CUDA_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5)
#define CUDA_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6)
#define CUDA_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define CUDA_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define CUDA_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
#define CUDA_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define CUDA_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(cuinit, cuda_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
#define NVRTC_DEFINE1(ret, fname, t1) DEFINE1(nvrtcinit, nvrtc_, ret, fname, t1)
#define NVRTC_DEFINE2(ret, fname, t1, t2) DEFINE2(nvrtcinit, nvrtc_, ret, fname, t1, t2)
#define NVRTC_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3)
#define NVRTC_DEFINE4(ret, fname, t1, t2, t3, t4) DEFINE4(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4)
#define NVRTC_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5)
#define NVRTC_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6)
#define NVRTC_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define NVRTC_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define NVRTC_DEFINE9(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9) DEFINE9(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9)
#define NVRTC_DEFINE10(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) DEFINE10(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10)
#define NVRTC_DEFINE11(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) DEFINE11(nvrtcinit, nvrtc_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11)
#define NVML_DEFINE0(ret, fname) DEFINE0(nvmlinit, nvml_, ret, fname)
#define NVML_DEFINE1(ret, fname, t1) DEFINE1(nvmlinit, nvml_, ret, fname, t1)
#define NVML_DEFINE2(ret, fname, t1, t2) DEFINE2(nvmlinit, nvml_, ret, fname, t1, t2)
#define NVML_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(nvmlinit, nvml_, ret, fname, t1, t2, t3)
#define CUBLAS_DEFINE1(ret, fname, t1) DEFINE1(cublasinit, cublas_, ret, fname, t1)
#define CUBLAS_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cublasinit, cublas_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
#define CUBLAS_DEFINE19(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19) DEFINE19(cublasinit, cublas_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18, t19)
#define CUDNN_DEFINE1(ret, fname, t1) DEFINE1(cudnninit, cudnn_, ret, fname, t1)
#define CUDNN_DEFINE2(ret, fname, t1, t2) DEFINE2(cudnninit, cudnn_, ret, fname, t1, t2)
#define CUDNN_DEFINE3(ret, fname, t1, t2, t3) DEFINE3(cudnninit, cudnn_, ret, fname, t1, t2, t3)
#define CUDNN_DEFINE5(ret, fname, t1, t2, t3, t4, t5) DEFINE5(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5)
#define CUDNN_DEFINE6(ret, fname, t1, t2, t3, t4, t5, t6) DEFINE6(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6)
#define CUDNN_DEFINE7(ret, fname, t1, t2, t3, t4, t5, t6, t7) DEFINE7(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7)
#define CUDNN_DEFINE8(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8) DEFINE8(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8)
#define CUDNN_DEFINE13(ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13) DEFINE13(cudnninit, cudnn_, ret, fname, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13)
bool dispatch::cuinit(){
if(cuda_==nullptr)
cuda_ = dlopen("libcuda.so", RTLD_LAZY);
CUresult (*fptr)(unsigned int);
cuInit_ = dlsym(cuda_, "cuInit");
*reinterpret_cast<void **>(&fptr) = cuInit_;
CUresult res = (*fptr)(0);
check(res);
return cuda_ != nullptr;
}
bool dispatch::nvrtcinit(){
if(nvrtc_==nullptr)
nvrtc_ = dlopen("libnvrtc.so", RTLD_LAZY);
return nvrtc_ != nullptr;
}
bool dispatch::nvmlinit(){
if(nvml_==nullptr)
nvml_ = dlopen("libnvidia-ml.so", RTLD_LAZY);
nvmlReturn_t (*fptr)();
nvmlInit_v2_ = dlsym(nvml_, "nvmlInit_v2");
*reinterpret_cast<void **>(&fptr) = nvmlInit_v2_;
nvmlReturn_t res = (*fptr)();
check(res);
return res;
}
bool dispatch::cublasinit(){
if(cublas_==nullptr)
cublas_ = dlopen("libcublas.so", RTLD_LAZY);
return cublas_ != nullptr;
}
bool dispatch::cudnninit(){
if(cudnn_==nullptr)
cudnn_ = dlopen("libcudnn.so", RTLD_LAZY);
return cudnn_ != nullptr;
}
//CUDA
CUDA_DEFINE1(CUresult, cuCtxDestroy_v2, CUcontext)
CUDA_DEFINE2(CUresult, cuEventCreate, CUevent *, unsigned int)
CUDA_DEFINE2(CUresult, cuDeviceGet, CUdevice *, int)
CUDA_DEFINE3(CUresult, cuMemcpyDtoH_v2, void *, CUdeviceptr, size_t)
CUDA_DEFINE2(CUresult, cuStreamCreate, CUstream *, unsigned int)
CUDA_DEFINE3(CUresult, cuEventElapsedTime, float *, CUevent, CUevent)
CUDA_DEFINE1(CUresult, cuMemFree_v2, CUdeviceptr)
CUDA_DEFINE4(CUresult, cuMemcpyDtoHAsync_v2, void *, CUdeviceptr, size_t, CUstream)
CUDA_DEFINE1(CUresult, cuDriverGetVersion, int *)
CUDA_DEFINE3(CUresult, cuDeviceGetName, char *, int, CUdevice)
CUDA_DEFINE3(CUresult, cuDeviceGetPCIBusId, char *, int, CUdevice)
CUDA_DEFINE4(CUresult, cuModuleGetGlobal_v2, CUdeviceptr*, size_t*, CUmodule, const char*)
CUDA_DEFINE4(CUresult, cuMemcpyHtoDAsync_v2, CUdeviceptr, const void *, size_t, CUstream)
CUDA_DEFINE2(CUresult, cuModuleLoad, CUmodule *, const char *)
CUDA_DEFINE11(CUresult, cuLaunchKernel, CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **)
CUDA_DEFINE1(CUresult, cuModuleUnload, CUmodule)
CUDA_DEFINE5(CUresult, cuModuleLoadDataEx, CUmodule *, const void *, unsigned int, CUjit_option *, void **)
CUDA_DEFINE3(CUresult, cuDeviceGetAttribute, int *, CUdevice_attribute, CUdevice)
CUDA_DEFINE1(CUresult, cuDeviceGetCount, int *)
CUDA_DEFINE3(CUresult, cuMemcpyHtoD_v2, CUdeviceptr, const void *, size_t )
CUDA_DEFINE1(CUresult, cuInit, unsigned int)
CUDA_DEFINE2(CUresult, cuEventRecord, CUevent, CUstream)
CUDA_DEFINE3(CUresult, cuCtxCreate_v2, CUcontext *, unsigned int, CUdevice)
CUDA_DEFINE3(CUresult, cuModuleGetFunction, CUfunction *, CUmodule, const char *)
CUDA_DEFINE1(CUresult, cuStreamSynchronize, CUstream)
CUDA_DEFINE1(CUresult, cuStreamDestroy_v2, CUstream)
CUDA_DEFINE1(CUresult, cuEventDestroy_v2, CUevent)
CUDA_DEFINE2(CUresult, cuMemAlloc_v2, CUdeviceptr*, size_t)
CUDA_DEFINE3(CUresult, cuPointerGetAttribute, void*, CUpointer_attribute, CUdeviceptr)
CUDA_DEFINE1(CUresult, cuCtxGetDevice, CUdevice*)
CUDA_DEFINE1(CUresult, cuCtxGetCurrent, CUcontext*)
CUDA_DEFINE1(CUresult, cuCtxSetCurrent, CUcontext)
CUDA_DEFINE4(CUresult, cuMemsetD8Async, CUdeviceptr, unsigned char, size_t, CUstream)
CUDA_DEFINE1(CUresult, cuCtxPushCurrent_v2, CUcontext)
CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
NVRTC_DEFINE3(nvrtcResult, nvrtcCompileProgram, nvrtcProgram, int, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLogSize, nvrtcProgram, size_t *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTX, nvrtcProgram, char *)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_DEFINE6(nvrtcResult, nvrtcCreateProgram, nvrtcProgram *, const char *, const char *, int, const char **, const char **)
NVRTC_DEFINE2(nvrtcResult, nvrtcGetProgramLog, nvrtcProgram, char *)
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
cublasHandle_t dispatch::cublasHandle(Context const & ctx){
static std::map<Context, cublasHandle_t> handles;
auto pr = handles.insert({ctx, cublasHandle_t()});
if(pr.second)
cublasCreate_v2(&pr.first->second);
return pr.first->second;
}
cudnnHandle_t dispatch::cudnnHandle(Context const & ctx){
static std::map<Context, cudnnHandle_t> handles;
auto pr = handles.insert({ctx, cudnnHandle_t()});
if(pr.second)
cudnnCreate(&pr.first->second);
return pr.first->second;
}
CUBLAS_DEFINE1(cublasStatus_t, cublasCreate_v2, cublasHandle_t*)
cublasStatus_t dispatch::cublasGetStream_v2(cublasHandle_t h, cudaStream_t *a)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasGetStream_v2, cublasGetStream_v2_, "cublasGetStream_v2", h, a); }
cublasStatus_t dispatch::cublasSetStream_v2(cublasHandle_t h, cudaStream_t a)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSetStream_v2, cublasSetStream_v2_, "cublasSetStream_v2", h, a); }
cublasStatus_t dispatch::cublasSgemm_v2(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasSgemm_v2, cublasSgemm_v2_, "cublasSgemm_v2", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
cublasStatus_t dispatch::cublasDgemm_v2(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasDgemm_v2, cublasDgemm_v2_, "cublasDgemm_v2", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
cublasStatus_t dispatch::cublasHgemm(cublasHandle_t h, cublasOperation_t at, cublasOperation_t bt, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc)
{ return f_impl<dispatch::cublasinit>(cublas_, cublasHgemm, cublasHgemm_, "cublasHgemm", h, at, bt, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);}
CUBLAS_DEFINE19(cublasStatus_t, cublasGemmEx, cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const void*, const void*, cudaDataType, int, const void*, cudaDataType, int, const void*, void*, cudaDataType, int, cudaDataType, cublasGemmAlgo_t)
//cuDNN
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateConvolutionDescriptor, cudnnConvolutionDescriptor_t*)
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateTensorDescriptor, cudnnTensorDescriptor_t*)
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreateFilterDescriptor, cudnnFilterDescriptor_t*)
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreate, cudnnHandle_t*)
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetTensor4dDescriptor, cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int)
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetFilter4dDescriptor, cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int)
CUDNN_DEFINE5(cudnnStatus_t, cudnnSetTensorNdDescriptorEx, cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int*)
CUDNN_DEFINE5(cudnnStatus_t, cudnnSetFilterNdDescriptor, cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int*)
CUDNN_DEFINE1(cudnnStatus_t, cudnnCreatePoolingDescriptor, cudnnPoolingDescriptor_t*)
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetPoolingNdDescriptor, cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int*, const int*, const int*)
CUDNN_DEFINE8(cudnnStatus_t, cudnnPoolingForward, cudnnHandle_t, const cudnnPoolingDescriptor_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
CUDNN_DEFINE8(cudnnStatus_t, cudnnSetConvolution2dDescriptor, cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t)
CUDNN_DEFINE7(cudnnStatus_t, cudnnSetConvolutionNdDescriptor, cudnnConvolutionDescriptor_t, int, const int*, const int*, const int*, cudnnConvolutionMode_t, cudnnDataType_t)
CUDNN_DEFINE8(cudnnStatus_t, cudnnGetConvolutionForwardAlgorithm, cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *)
CUDNN_DEFINE7(cudnnStatus_t, cudnnGetConvolutionForwardWorkspaceSize, cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t*)
CUDNN_DEFINE13(cudnnStatus_t, cudnnConvolutionForward, cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *)
CUDNN_DEFINE2(cudnnStatus_t, cudnnSetStream, cudnnHandle_t, cudaStream_t)
CUDNN_DEFINE7(cudnnStatus_t, cudnnTransformTensor, cudnnHandle_t, const void*, const cudnnTensorDescriptor_t, const void*, const void*, const cudnnTensorDescriptor_t, void*)
void dispatch::release(){
if(cuda_){
dlclose(cuda_);
cuda_ = nullptr;
}
if(nvrtc_){
dlclose(nvrtc_);
nvrtc_ = nullptr;
}
if(cublas_){
dlclose(cublas_);
cublas_ = nullptr;
}
if(cudnn_){
dlclose(cudnn_);
cudnn_ = nullptr;
}
}
void* dispatch::cuda_;
void* dispatch::nvrtc_;
void* dispatch::nvml_;
void* dispatch::cublas_;
void* dispatch::cudnn_;
//CUDA
void* dispatch::cuCtxGetCurrent_;
void* dispatch::cuCtxSetCurrent_;
void* dispatch::cuCtxDestroy_v2_;
void* dispatch::cuEventCreate_;
void* dispatch::cuDeviceGet_;
void* dispatch::cuMemcpyDtoH_v2_;
void* dispatch::cuStreamCreate_;
void* dispatch::cuEventElapsedTime_;
void* dispatch::cuMemFree_v2_;
void* dispatch::cuMemcpyDtoHAsync_v2_;
void* dispatch::cuDriverGetVersion_;
void* dispatch::cuDeviceGetName_;
void* dispatch::cuDeviceGetPCIBusId_;
void* dispatch::cuModuleGetGlobal_v2_;
void* dispatch::cuMemcpyHtoDAsync_v2_;
void* dispatch::cuModuleLoad_;
void* dispatch::cuLaunchKernel_;
void* dispatch::cuModuleUnload_;
void* dispatch::cuModuleLoadDataEx_;
void* dispatch::cuDeviceGetAttribute_;
void* dispatch::cuDeviceGetCount_;
void* dispatch::cuMemcpyHtoD_v2_;
void* dispatch::cuInit_;
void* dispatch::cuEventRecord_;
void* dispatch::cuCtxCreate_v2_;
void* dispatch::cuModuleGetFunction_;
void* dispatch::cuStreamSynchronize_;
void* dispatch::cuStreamDestroy_v2_;
void* dispatch::cuEventDestroy_v2_;
void* dispatch::cuMemAlloc_v2_;
void* dispatch::cuPointerGetAttribute_;
void* dispatch::cuCtxGetDevice_;
void* dispatch::cuMemsetD8Async_;
void* dispatch::cuCtxPushCurrent_v2_;
void* dispatch::cuCtxPopCurrent_v2_;
void* dispatch::nvrtcCompileProgram_;
void* dispatch::nvrtcGetProgramLogSize_;
void* dispatch::nvrtcGetPTX_;
void* dispatch::nvrtcGetPTXSize_;
void* dispatch::nvrtcCreateProgram_;
void* dispatch::nvrtcGetProgramLog_;
void* dispatch::nvmlInit_v2_;
void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_;
void* dispatch::nvmlDeviceGetClockInfo_;
void* dispatch::nvmlDeviceGetMaxClockInfo_;
void* dispatch::cublasCreate_v2_;
void* dispatch::cublasGetStream_v2_;
void* dispatch::cublasSetStream_v2_;
void* dispatch::cublasHgemm_;
void* dispatch::cublasSgemm_v2_;
void* dispatch::cublasDgemm_v2_;
void* dispatch::cublasGemmEx_;
void* dispatch::cudnnCreateConvolutionDescriptor_;
void* dispatch::cudnnCreatePoolingDescriptor_;
void* dispatch::cudnnCreateTensorDescriptor_;
void* dispatch::cudnnCreateFilterDescriptor_;
void* dispatch::cudnnCreate_;
void* dispatch::cudnnSetTensor4dDescriptor_;
void* dispatch::cudnnSetFilter4dDescriptor_;
void* dispatch::cudnnSetTensorNdDescriptorEx_;
void* dispatch::cudnnSetFilterNdDescriptor_;
void* dispatch::cudnnSetPoolingNdDescriptor_;
void* dispatch::cudnnSetConvolution2dDescriptor_;
void* dispatch::cudnnSetConvolutionNdDescriptor_;
void* dispatch::cudnnGetConvolutionForwardAlgorithm_;
void* dispatch::cudnnGetConvolutionForwardWorkspaceSize_;
void* dispatch::cudnnConvolutionForward_;
void* dispatch::cudnnPoolingForward_;
void* dispatch::cudnnSetStream_;
void* dispatch::cudnnTransformTensor_;
}
}

View File

@@ -1,155 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/driver/error.h"
namespace isaac
{
namespace driver
{
void check(CUresult err)
{
using namespace exception::cuda;
switch(err)
{
case CUDA_SUCCESS : break;
case CUDA_ERROR_INVALID_VALUE : throw invalid_value();
case CUDA_ERROR_OUT_OF_MEMORY : throw out_of_memory();
case CUDA_ERROR_NOT_INITIALIZED : throw not_initialized();
case CUDA_ERROR_DEINITIALIZED : throw deinitialized();
case CUDA_ERROR_PROFILER_DISABLED : throw profiler_disabled();
case CUDA_ERROR_PROFILER_NOT_INITIALIZED : throw profiler_not_initialized();
case CUDA_ERROR_PROFILER_ALREADY_STARTED : throw profiler_already_started();
case CUDA_ERROR_PROFILER_ALREADY_STOPPED : throw profiler_already_stopped();
case CUDA_ERROR_NO_DEVICE : throw no_device();
case CUDA_ERROR_INVALID_DEVICE : throw invalid_device();
case CUDA_ERROR_INVALID_IMAGE : throw invalid_image();
case CUDA_ERROR_INVALID_CONTEXT : throw invalid_context();
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT : throw context_already_current();
case CUDA_ERROR_MAP_FAILED : throw map_failed();
case CUDA_ERROR_UNMAP_FAILED : throw unmap_failed();
case CUDA_ERROR_ARRAY_IS_MAPPED : throw array_is_mapped();
case CUDA_ERROR_ALREADY_MAPPED : throw already_mapped();
case CUDA_ERROR_NO_BINARY_FOR_GPU : throw no_binary_for_gpu();
case CUDA_ERROR_ALREADY_ACQUIRED : throw already_acquired();
case CUDA_ERROR_NOT_MAPPED : throw not_mapped();
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY : throw not_mapped_as_array();
case CUDA_ERROR_NOT_MAPPED_AS_POINTER : throw not_mapped_as_pointer();
case CUDA_ERROR_ECC_UNCORRECTABLE : throw ecc_uncorrectable();
case CUDA_ERROR_UNSUPPORTED_LIMIT : throw unsupported_limit();
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE : throw context_already_in_use();
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED : throw peer_access_unsupported();
case CUDA_ERROR_INVALID_PTX : throw invalid_ptx();
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT : throw invalid_graphics_context();
case CUDA_ERROR_INVALID_SOURCE : throw invalid_source();
case CUDA_ERROR_FILE_NOT_FOUND : throw file_not_found();
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND : throw shared_object_symbol_not_found();
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED : throw shared_object_init_failed();
case CUDA_ERROR_OPERATING_SYSTEM : throw operating_system();
case CUDA_ERROR_INVALID_HANDLE : throw invalid_handle();
case CUDA_ERROR_NOT_FOUND : throw not_found();
case CUDA_ERROR_NOT_READY : throw not_ready();
case CUDA_ERROR_ILLEGAL_ADDRESS : throw illegal_address();
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES : throw launch_out_of_resources();
case CUDA_ERROR_LAUNCH_TIMEOUT : throw launch_timeout();
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING : throw launch_incompatible_texturing();
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED : throw peer_access_already_enabled();
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED : throw peer_access_not_enabled();
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE : throw primary_context_active();
case CUDA_ERROR_CONTEXT_IS_DESTROYED : throw context_is_destroyed();
case CUDA_ERROR_ASSERT : throw assert_error();
case CUDA_ERROR_TOO_MANY_PEERS : throw too_many_peers();
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED : throw host_memory_already_registered();
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED : throw host_memory_not_registered();
case CUDA_ERROR_HARDWARE_STACK_ERROR : throw hardware_stack_error();
case CUDA_ERROR_ILLEGAL_INSTRUCTION : throw illegal_instruction();
case CUDA_ERROR_MISALIGNED_ADDRESS : throw misaligned_address();
case CUDA_ERROR_INVALID_ADDRESS_SPACE : throw invalid_address_space();
case CUDA_ERROR_INVALID_PC : throw invalid_pc();
case CUDA_ERROR_LAUNCH_FAILED : throw launch_failed();
case CUDA_ERROR_NOT_PERMITTED : throw not_permitted();
case CUDA_ERROR_NOT_SUPPORTED : throw not_supported();
case CUDA_ERROR_UNKNOWN : throw unknown();
default : throw unknown();
}
}
void check(nvrtcResult err){
using namespace exception::nvrtc;
switch(err)
{
case NVRTC_SUCCESS: break;
case NVRTC_ERROR_OUT_OF_MEMORY: throw out_of_memory();
case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: throw program_creation_failure();
case NVRTC_ERROR_INVALID_INPUT: throw invalid_input();
case NVRTC_ERROR_INVALID_PROGRAM: throw invalid_program();
case NVRTC_ERROR_INVALID_OPTION: throw invalid_option();
case NVRTC_ERROR_COMPILATION: throw compilation();
case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: throw builtin_operation_failure();
default: throw unknown_error();
}
}
void check(cublasStatus_t err){
using namespace exception::cublas;
switch(err)
{
case CUBLAS_STATUS_SUCCESS : break;
case CUBLAS_STATUS_NOT_INITIALIZED : throw not_initialized();
case CUBLAS_STATUS_ALLOC_FAILED : throw alloc_failed();
case CUBLAS_STATUS_INVALID_VALUE : throw invalid_value();
case CUBLAS_STATUS_ARCH_MISMATCH : throw arch_mismatch();
case CUBLAS_STATUS_MAPPING_ERROR : throw mapping_error();
case CUBLAS_STATUS_EXECUTION_FAILED: throw execution_failed();
case CUBLAS_STATUS_INTERNAL_ERROR : throw internal_error();
case CUBLAS_STATUS_NOT_SUPPORTED : throw not_supported();
case CUBLAS_STATUS_LICENSE_ERROR : throw license_error();
default : throw unknown();
}
}
void check(cudnnStatus_t err){
using namespace exception::cudnn;
switch(err)
{
case CUDNN_STATUS_SUCCESS: break;
case CUDNN_STATUS_NOT_INITIALIZED: throw not_initialized();
case CUDNN_STATUS_ALLOC_FAILED: throw alloc_failed();
case CUDNN_STATUS_BAD_PARAM: throw bad_param();
case CUDNN_STATUS_INTERNAL_ERROR: throw internal_error();
case CUDNN_STATUS_INVALID_VALUE: throw invalid_value();
case CUDNN_STATUS_ARCH_MISMATCH: throw arch_mismatch();
case CUDNN_STATUS_MAPPING_ERROR: throw mapping_error();
case CUDNN_STATUS_EXECUTION_FAILED: throw execution_failed();
case CUDNN_STATUS_NOT_SUPPORTED: throw not_supported();
case CUDNN_STATUS_LICENSE_ERROR: throw license_error();
case CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING: throw runtime_prerequisite_missing();
case CUDNN_STATUS_RUNTIME_IN_PROGRESS: throw runtime_in_progress();
case CUDNN_STATUS_RUNTIME_FP_OVERFLOW: throw runtime_fp_overflow();
}
}
}
}

View File

@@ -1,40 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/driver/event.h"
namespace isaac
{
namespace driver
{
float Event::elapsed_time() const{
float time;
dispatch::cuEventElapsedTime(&time, cu_->first, cu_->second);
return time;
}
Handle<cu_event_t> const & Event::cu() const
{ return cu_; }
}
}

View File

@@ -1,66 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <memory>
#include "isaac/driver/handle.h"
namespace isaac
{
namespace driver
{
//CUDA
inline void _delete(CUcontext x) { dispatch::cuCtxDestroy(x); }
inline void _delete(CUdeviceptr x) { dispatch::cuMemFree(x); }
inline void _delete(CUstream x) { dispatch::cuStreamDestroy(x); }
inline void _delete(CUdevice) { }
inline void _delete(CUevent x) { dispatch::cuEventDestroy(x); }
inline void _delete(CUfunction) { }
inline void _delete(CUmodule x) { dispatch::cuModuleUnload(x); }
inline void _delete(cu_event_t x) { _delete(x.first); _delete(x.second); }
inline void _delete(cu_platform){}
//Constructor
template<class CUType>
Handle<CUType>::Handle(CUType cu, bool take_ownership): h_(new CUType(cu)), has_ownership_(take_ownership)
{ }
template<class CUType>
Handle<CUType>::~Handle(){
if(has_ownership_ && h_ && h_.unique() && *h_)
_delete(*h_);
}
template class Handle<CUdeviceptr>;
template class Handle<CUstream>;
template class Handle<CUcontext>;
template class Handle<CUdevice>;
template class Handle<cu_event_t>;
template class Handle<CUfunction>;
template class Handle<CUmodule>;
template class Handle<cu_platform>;
}
}

View File

@@ -1,67 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <iostream>
#include <cstring>
#include "isaac/driver/kernel.h"
#include "isaac/driver/buffer.h"
namespace isaac
{
namespace driver
{
Kernel::Kernel(Module const & program, const char * name) : program_(program), address_bits_(program.context().device().address_bits()){
cu_params_store_.reserve(64);
cu_params_.reserve(64);
dispatch::cuModuleGetFunction(&*cu_, program, name);
}
void Kernel::setArg(unsigned int index, std::size_t size, void* ptr){
if(index + 1> cu_params_store_.size()){
cu_params_store_.resize(index+1);
cu_params_.resize(index+1);
}
cu_params_store_[index].reset(malloc(size), free);
memcpy(cu_params_store_[index].get(), ptr, size);
cu_params_[index] = cu_params_store_[index].get();
}
void Kernel::setArg(unsigned int index, Buffer const & data)
{ return setArg(index, (CUdeviceptr)data);}
void* const* Kernel::cu_params() const
{ return cu_params_.data(); }
Handle<CUfunction> const & Kernel::cu() const
{ return cu_; }
Module const & Kernel::module() const
{ return program_; }
}
}

View File

@@ -1,118 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <iostream>
#include <fstream>
#include "isaac/driver/module.h"
#include "isaac/driver/context.h"
#include "isaac/driver/error.h"
#include "isaac/tools/sys/getenv.hpp"
namespace isaac
{
namespace driver
{
CUjit_target_enum cutarget(Device::Architecture arch){
switch(arch){
case Device::Architecture::SM_2_0: return CU_TARGET_COMPUTE_20;
case Device::Architecture::SM_2_1: return CU_TARGET_COMPUTE_21;
case Device::Architecture::SM_3_0: return CU_TARGET_COMPUTE_30;
case Device::Architecture::SM_3_5: return CU_TARGET_COMPUTE_35;
case Device::Architecture::SM_3_7: return CU_TARGET_COMPUTE_37;
case Device::Architecture::SM_5_0: return CU_TARGET_COMPUTE_50;
case Device::Architecture::SM_5_2: return CU_TARGET_COMPUTE_52;
case Device::Architecture::SM_6_0: return CU_TARGET_COMPUTE_60;
case Device::Architecture::SM_6_1: return CU_TARGET_COMPUTE_61;
default: throw;
}
}
inline std::pair<int, int> ptx(std::pair<int, int> sm){
if(sm.first == 7) return {6, 0};
if(sm.first == 6) return {5, 0};
if(sm.first == 5) return {4, 3};
throw;
}
std::string Module::header(Device const & device){
auto cc = device.compute_capability();
auto vptx = ptx(cc);
std::string header;
header += ".version " + std::to_string(vptx.first) + "." + std::to_string(vptx.second) + "\n";
header += ".target sm_" + std::to_string(cc.first) + std::to_string(cc.second) + "\n";
header += ".address_size 64\n";
return header;
}
Module::Module(Context const & context, std::string const & source) : context_(context), source_(header(context.device()) + source){
ContextSwitcher ctx_switch(context_);
//Path to custom PTX compiler
std::string compiler = tools::getenv("ISAAC_PTXAS");
if(compiler.size()){
auto cc = context.device().compute_capability();
std::string out = context.cache_path() + "tmp.o";
std::string opt = " --gpu-name sm_" + std::to_string(cc.first) + std::to_string(cc.second)
+ " -o " + out
+ " -ias \"" + source_ + "\"";
std::string cmd = compiler + opt;
if(std::system(cmd.c_str()) != 0)
throw;
dispatch::cuModuleLoad(&*cu_, out.c_str());
}
//JIT Compilation
else{
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER};
unsigned int errbufsize = 8096;
std::string errbuf(errbufsize, 0);
//CUjit_target_enum target = cutarget(context.device().architecture());
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)errbuf.data()};
try{
dispatch::cuModuleLoadDataEx(&*cu_, source_.data(), 2, opt, optval);
}catch(exception::cuda::base const &){
std::cerr << "Compilation Failed! Log: " << std::endl;
std::cerr << errbuf << std::endl;
throw;
}
}
}
Context const & Module::context() const
{ return context_; }
Handle<CUmodule> const & Module::cu() const
{ return cu_; }
Buffer Module::symbol(const char *name) const{
CUdeviceptr handle;
size_t size;
dispatch::cuModuleGetGlobal_v2(&handle, &size, *cu_, name);
return Buffer(context_, handle, false);
}
}
}

View File

@@ -1,56 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/driver/platform.h"
#include "isaac/driver/device.h"
#include <string>
namespace isaac
{
namespace driver
{
std::string Platform::version() const{
int version;
dispatch::cuDriverGetVersion(&version);
return std::to_string(version);
}
std::string Platform::name() const
{ return (std::string)"CUDA"; }
std::vector<Device> Platform::devices() const{
std::vector<Device> devices;
int N;
dispatch::cuDeviceGetCount(&N);
for(int i = 0 ; i < N ; ++i){
CUdevice device;
dispatch::cuDeviceGet(&device, i);
devices.push_back(Device(device));
}
return devices;
}
}
}

View File

@@ -1,95 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <iostream>
#include <cassert>
#include <array>
#include "isaac/driver/backend.h"
#include "isaac/driver/stream.h"
#include "isaac/driver/context.h"
#include "isaac/driver/device.h"
#include "isaac/driver/event.h"
#include "isaac/driver/kernel.h"
#include "isaac/driver/buffer.h"
namespace isaac
{
namespace driver
{
inline CUcontext cucontext(){
CUcontext result;
dispatch::cuCtxGetCurrent(&result);
return result;
}
Stream::Stream(CUstream stream, bool take_ownership): context_(cucontext(), take_ownership), cu_(stream, take_ownership)
{}
Stream::Stream(Context const & context): context_(context), cu_(CUstream(), true)
{
ContextSwitcher ctx_switch(context_);
dispatch::cuStreamCreate(&*cu_, 0);
}
void Stream::synchronize()
{
ContextSwitcher ctx_switch(context_);
dispatch::cuStreamSynchronize(*cu_);
}
Context const & Stream::context() const
{ return context_; }
void Stream::enqueue(Kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const *, Event* event){
ContextSwitcher ctx_switch(context_);
if(event)
dispatch::cuEventRecord(((cu_event_t)*event).first, *cu_);
dispatch::cuLaunchKernel(kernel, grid[0], grid[1], grid[2], block[0], block[1], block[2], 0, *cu_,(void**)kernel.cu_params(), NULL);
if(event)
dispatch::cuEventRecord(((cu_event_t)*event).second, *cu_);
}
void Stream::write(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr){
ContextSwitcher ctx_switch(context_);
if(blocking)
dispatch::cuMemcpyHtoD(buffer + offset, ptr, size);
else
dispatch::cuMemcpyHtoDAsync(buffer + offset, ptr, size, *cu_);
}
void Stream::read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr){
ContextSwitcher ctx_switch(context_);
if(blocking)
dispatch::cuMemcpyDtoH(ptr, buffer + offset, size);
else
dispatch::cuMemcpyDtoHAsync(ptr, buffer + offset, size, *cu_);
}
Handle<CUstream> const & Stream::cu() const
{ return cu_; }
}
}

View File

@@ -1,94 +0,0 @@
# Define the fmt library, its includes and the needed defines.
# format.cc is added to FMT_HEADERS for the header-only configuration.
set(FMT_HEADERS format.h format.cc ostream.h ostream.cc printf.h
string.h time.h)
if (HAVE_OPEN)
set(FMT_HEADERS ${FMT_HEADERS} posix.h)
set(FMT_SOURCES ${FMT_SOURCES} posix.cc)
endif ()
add_library(fmt ${FMT_SOURCES} ${FMT_HEADERS} ../README.rst ../ChangeLog.rst)
option(FMT_CPPFORMAT "Build cppformat library for backward compatibility." OFF)
if (FMT_CPPFORMAT)
message(WARNING "The cppformat library is deprecated, use fmt instead.")
add_library(cppformat ${FMT_SOURCES} ${FMT_HEADERS})
endif ()
# Starting with cmake 3.1 the CXX_STANDARD property can be used instead.
target_compile_options(fmt PUBLIC ${CPP11_FLAG})
if (FMT_PEDANTIC)
target_compile_options(fmt PRIVATE ${PEDANTIC_COMPILE_FLAGS})
endif ()
target_include_directories(fmt PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>)
set_target_properties(fmt PROPERTIES
VERSION ${FMT_VERSION} SOVERSION ${CPACK_PACKAGE_VERSION_MAJOR})
if (BUILD_SHARED_LIBS)
if (UNIX AND NOT APPLE)
# Fix rpmlint warning:
# unused-direct-shlib-dependency /usr/lib/libformat.so.1.1.0 /lib/libm.so.6.
target_link_libraries(fmt -Wl,--as-needed)
endif ()
target_compile_definitions(fmt PRIVATE FMT_EXPORT INTERFACE FMT_SHARED)
endif ()
#------------------------------------------------------------------------------
# additionally define a header only library when cmake is new enough
if (CMAKE_VERSION VERSION_GREATER 3.1.0 OR CMAKE_VERSION VERSION_EQUAL 3.1.0)
add_library(fmt-header-only INTERFACE)
target_compile_definitions(fmt-header-only INTERFACE FMT_HEADER_ONLY=1)
target_include_directories(fmt-header-only INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>)
endif ()
# Install targets.
if (FMT_INSTALL)
include(CMakePackageConfigHelpers)
set(FMT_CMAKE_DIR lib/cmake/fmt CACHE STRING
"Installation directory for cmake files, relative to ${CMAKE_INSTALL_PREFIX}.")
set(version_config ${PROJECT_BINARY_DIR}/fmt-config-version.cmake)
set(project_config ${PROJECT_BINARY_DIR}/fmt-config.cmake)
set(targets_export_name fmt-targets)
set (INSTALL_TARGETS fmt)
if (TARGET fmt-header-only)
set(INSTALL_TARGETS ${INSTALL_TARGETS} fmt-header-only)
endif ()
set(FMT_LIB_DIR lib CACHE STRING
"Installation directory for libraries, relative to ${CMAKE_INSTALL_PREFIX}.")
# Generate the version, config and target files into the build directory.
write_basic_package_version_file(
${version_config}
VERSION ${FMT_VERSION}
COMPATIBILITY AnyNewerVersion)
configure_package_config_file(
${PROJECT_SOURCE_DIR}/support/cmake/fmt-config.cmake.in
${project_config}
INSTALL_DESTINATION ${FMT_CMAKE_DIR})
export(TARGETS ${INSTALL_TARGETS}
FILE ${PROJECT_BINARY_DIR}/${targets_export_name}.cmake)
# Install version, config and target files.
install(
FILES ${project_config} ${version_config}
DESTINATION ${FMT_CMAKE_DIR})
install(EXPORT ${targets_export_name} DESTINATION ${FMT_CMAKE_DIR})
# Install the library and headers.
install(TARGETS ${INSTALL_TARGETS} EXPORT ${targets_export_name}
DESTINATION ${FMT_LIB_DIR})
install(FILES ${FMT_HEADERS} DESTINATION include/fmt)
if (FMT_CPPFORMAT)
install(TARGETS cppformat DESTINATION ${FMT_LIB_DIR})
endif ()
endif ()

View File

@@ -1,556 +0,0 @@
/*
Formatting library for C++
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "format.h"
#include "printf.h"
#include <string.h>
#include <cctype>
#include <cerrno>
#include <climits>
#include <cmath>
#include <cstdarg>
#include <cstddef> // for std::ptrdiff_t
#if defined(_WIN32) && defined(__MINGW32__)
# include <cstring>
#endif
#if FMT_USE_WINDOWS_H
# if defined(NOMINMAX) || defined(FMT_WIN_MINMAX)
# include <windows.h>
# else
# define NOMINMAX
# include <windows.h>
# undef NOMINMAX
# endif
#endif
using fmt::internal::Arg;
#if FMT_EXCEPTIONS
# define FMT_TRY try
# define FMT_CATCH(x) catch (x)
#else
# define FMT_TRY if (true)
# define FMT_CATCH(x) if (false)
#endif
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4127) // conditional expression is constant
# pragma warning(disable: 4702) // unreachable code
// Disable deprecation warning for strerror. The latter is not called but
// MSVC fails to detect it.
# pragma warning(disable: 4996)
#endif
// Dummy implementations of strerror_r and strerror_s called if corresponding
// system functions are not available.
static inline fmt::internal::Null<> strerror_r(int, char *, ...) {
return fmt::internal::Null<>();
}
static inline fmt::internal::Null<> strerror_s(char *, std::size_t, ...) {
return fmt::internal::Null<>();
}
namespace fmt {
FMT_FUNC internal::RuntimeError::~RuntimeError() throw() {}
FMT_FUNC FormatError::~FormatError() throw() {}
FMT_FUNC SystemError::~SystemError() throw() {}
namespace {
#ifndef _MSC_VER
# define FMT_SNPRINTF snprintf
#else // _MSC_VER
inline int fmt_snprintf(char *buffer, size_t size, const char *format, ...) {
va_list args;
va_start(args, format);
int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args);
va_end(args);
return result;
}
# define FMT_SNPRINTF fmt_snprintf
#endif // _MSC_VER
#if defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
# define FMT_SWPRINTF snwprintf
#else
# define FMT_SWPRINTF swprintf
#endif // defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
const char RESET_COLOR[] = "\x1b[0m";
typedef void (*FormatFunc)(Writer &, int, StringRef);
// Portable thread-safe version of strerror.
// Sets buffer to point to a string describing the error code.
// This can be either a pointer to a string stored in buffer,
// or a pointer to some static immutable string.
// Returns one of the following values:
// 0 - success
// ERANGE - buffer is not large enough to store the error message
// other - failure
// Buffer should be at least of size 1.
int safe_strerror(
int error_code, char *&buffer, std::size_t buffer_size) FMT_NOEXCEPT {
FMT_ASSERT(buffer != 0 && buffer_size != 0, "invalid buffer");
class StrError {
private:
int error_code_;
char *&buffer_;
std::size_t buffer_size_;
// A noop assignment operator to avoid bogus warnings.
void operator=(const StrError &) {}
// Handle the result of XSI-compliant version of strerror_r.
int handle(int result) {
// glibc versions before 2.13 return result in errno.
return result == -1 ? errno : result;
}
// Handle the result of GNU-specific version of strerror_r.
int handle(char *message) {
// If the buffer is full then the message is probably truncated.
if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1)
return ERANGE;
buffer_ = message;
return 0;
}
// Handle the case when strerror_r is not available.
int handle(internal::Null<>) {
return fallback(strerror_s(buffer_, buffer_size_, error_code_));
}
// Fallback to strerror_s when strerror_r is not available.
int fallback(int result) {
// If the buffer is full then the message is probably truncated.
return result == 0 && strlen(buffer_) == buffer_size_ - 1 ?
ERANGE : result;
}
// Fallback to strerror if strerror_r and strerror_s are not available.
int fallback(internal::Null<>) {
errno = 0;
buffer_ = strerror(error_code_);
return errno;
}
public:
StrError(int err_code, char *&buf, std::size_t buf_size)
: error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {}
int run() {
strerror_r(0, 0, ""); // Suppress a warning about unused strerror_r.
return handle(strerror_r(error_code_, buffer_, buffer_size_));
}
};
return StrError(error_code, buffer, buffer_size).run();
}
void format_error_code(Writer &out, int error_code,
StringRef message) FMT_NOEXCEPT {
// Report error code making sure that the output fits into
// INLINE_BUFFER_SIZE to avoid dynamic memory allocation and potential
// bad_alloc.
out.clear();
static const char SEP[] = ": ";
static const char ERROR_STR[] = "error ";
// Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
typedef internal::IntTraits<int>::MainType MainType;
MainType abs_value = static_cast<MainType>(error_code);
if (internal::is_negative(error_code)) {
abs_value = 0 - abs_value;
++error_code_size;
}
error_code_size += internal::count_digits(abs_value);
if (message.size() <= internal::INLINE_BUFFER_SIZE - error_code_size)
out << message << SEP;
out << ERROR_STR << error_code;
assert(out.size() <= internal::INLINE_BUFFER_SIZE);
}
void report_error(FormatFunc func, int error_code,
StringRef message) FMT_NOEXCEPT {
MemoryWriter full_message;
func(full_message, error_code, message);
// Use Writer::data instead of Writer::c_str to avoid potential memory
// allocation.
std::fwrite(full_message.data(), full_message.size(), 1, stderr);
std::fputc('\n', stderr);
}
} // namespace
namespace internal {
// This method is used to preserve binary compatibility with fmt 3.0.
// It can be removed in 4.0.
FMT_FUNC void format_system_error(
Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
fmt::format_system_error(out, error_code, message);
}
} // namespace internal
FMT_FUNC void SystemError::init(
int err_code, CStringRef format_str, ArgList args) {
error_code_ = err_code;
MemoryWriter w;
format_system_error(w, err_code, format(format_str, args));
std::runtime_error &base = *this;
base = std::runtime_error(w.str());
}
template <typename T>
int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, T value) {
if (width == 0) {
return precision < 0 ?
FMT_SNPRINTF(buffer, size, format, value) :
FMT_SNPRINTF(buffer, size, format, precision, value);
}
return precision < 0 ?
FMT_SNPRINTF(buffer, size, format, width, value) :
FMT_SNPRINTF(buffer, size, format, width, precision, value);
}
template <typename T>
int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, T value) {
if (width == 0) {
return precision < 0 ?
FMT_SWPRINTF(buffer, size, format, value) :
FMT_SWPRINTF(buffer, size, format, precision, value);
}
return precision < 0 ?
FMT_SWPRINTF(buffer, size, format, width, value) :
FMT_SWPRINTF(buffer, size, format, width, precision, value);
}
template <typename T>
const char internal::BasicData<T>::DIGITS[] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
"4041424344454647484950515253545556575859"
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";
#define FMT_POWERS_OF_10(factor) \
factor * 10, \
factor * 100, \
factor * 1000, \
factor * 10000, \
factor * 100000, \
factor * 1000000, \
factor * 10000000, \
factor * 100000000, \
factor * 1000000000
template <typename T>
const uint32_t internal::BasicData<T>::POWERS_OF_10_32[] = {
0, FMT_POWERS_OF_10(1)
};
template <typename T>
const uint64_t internal::BasicData<T>::POWERS_OF_10_64[] = {
0,
FMT_POWERS_OF_10(1),
FMT_POWERS_OF_10(ULongLong(1000000000)),
// Multiply several constants instead of using a single long long constant
// to avoid warnings about C++98 not supporting long long.
ULongLong(1000000000) * ULongLong(1000000000) * 10
};
FMT_FUNC void internal::report_unknown_type(char code, const char *type) {
(void)type;
if (std::isprint(static_cast<unsigned char>(code))) {
FMT_THROW(FormatError(
format("unknown format code '{}' for {}", code, type)));
}
FMT_THROW(FormatError(
format("unknown format code '\\x{:02x}' for {}",
static_cast<unsigned>(code), type)));
}
#if FMT_USE_WINDOWS_H
FMT_FUNC internal::UTF8ToUTF16::UTF8ToUTF16(StringRef s) {
static const char ERROR_MSG[] = "cannot convert string from UTF-8 to UTF-16";
if (s.size() > INT_MAX)
FMT_THROW(WindowsError(ERROR_INVALID_PARAMETER, ERROR_MSG));
int s_size = static_cast<int>(s.size());
int length = MultiByteToWideChar(
CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, 0, 0);
if (length == 0)
FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
buffer_.resize(length + 1);
length = MultiByteToWideChar(
CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, &buffer_[0], length);
if (length == 0)
FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
buffer_[length] = 0;
}
FMT_FUNC internal::UTF16ToUTF8::UTF16ToUTF8(WStringRef s) {
if (int error_code = convert(s)) {
FMT_THROW(WindowsError(error_code,
"cannot convert string from UTF-16 to UTF-8"));
}
}
FMT_FUNC int internal::UTF16ToUTF8::convert(WStringRef s) {
if (s.size() > INT_MAX)
return ERROR_INVALID_PARAMETER;
int s_size = static_cast<int>(s.size());
int length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, 0, 0, 0, 0);
if (length == 0)
return GetLastError();
buffer_.resize(length + 1);
length = WideCharToMultiByte(
CP_UTF8, 0, s.data(), s_size, &buffer_[0], length, 0, 0);
if (length == 0)
return GetLastError();
buffer_[length] = 0;
return 0;
}
FMT_FUNC void WindowsError::init(
int err_code, CStringRef format_str, ArgList args) {
error_code_ = err_code;
MemoryWriter w;
internal::format_windows_error(w, err_code, format(format_str, args));
std::runtime_error &base = *this;
base = std::runtime_error(w.str());
}
FMT_FUNC void internal::format_windows_error(
Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
FMT_TRY {
MemoryBuffer<wchar_t, INLINE_BUFFER_SIZE> buffer;
buffer.resize(INLINE_BUFFER_SIZE);
for (;;) {
wchar_t *system_message = &buffer[0];
int result = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
0, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
system_message, static_cast<uint32_t>(buffer.size()), 0);
if (result != 0) {
UTF16ToUTF8 utf8_message;
if (utf8_message.convert(system_message) == ERROR_SUCCESS) {
out << message << ": " << utf8_message;
return;
}
break;
}
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
break; // Can't get error message, report error code instead.
buffer.resize(buffer.size() * 2);
}
} FMT_CATCH(...) {}
fmt::format_error_code(out, error_code, message); // 'fmt::' is for bcc32.
}
#endif // FMT_USE_WINDOWS_H
FMT_FUNC void format_system_error(
Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
FMT_TRY {
internal::MemoryBuffer<char, internal::INLINE_BUFFER_SIZE> buffer;
buffer.resize(internal::INLINE_BUFFER_SIZE);
for (;;) {
char *system_message = &buffer[0];
int result = safe_strerror(error_code, system_message, buffer.size());
if (result == 0) {
out << message << ": " << system_message;
return;
}
if (result != ERANGE)
break; // Can't get error message, report error code instead.
buffer.resize(buffer.size() * 2);
}
} FMT_CATCH(...) {}
fmt::format_error_code(out, error_code, message); // 'fmt::' is for bcc32.
}
template <typename Char>
void internal::ArgMap<Char>::init(const ArgList &args) {
if (!map_.empty())
return;
typedef internal::NamedArg<Char> NamedArg;
const NamedArg *named_arg = 0;
bool use_values =
args.type(ArgList::MAX_PACKED_ARGS - 1) == internal::Arg::NONE;
if (use_values) {
for (unsigned i = 0;/*nothing*/; ++i) {
internal::Arg::Type arg_type = args.type(i);
switch (arg_type) {
case internal::Arg::NONE:
return;
case internal::Arg::NAMED_ARG:
named_arg = static_cast<const NamedArg*>(args.values_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
break;
default:
/*nothing*/;
}
}
return;
}
for (unsigned i = 0; i != ArgList::MAX_PACKED_ARGS; ++i) {
internal::Arg::Type arg_type = args.type(i);
if (arg_type == internal::Arg::NAMED_ARG) {
named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
}
}
for (unsigned i = ArgList::MAX_PACKED_ARGS;/*nothing*/; ++i) {
switch (args.args_[i].type) {
case internal::Arg::NONE:
return;
case internal::Arg::NAMED_ARG:
named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
break;
default:
/*nothing*/;
}
}
}
template <typename Char>
void internal::FixedBuffer<Char>::grow(std::size_t) {
FMT_THROW(std::runtime_error("buffer overflow"));
}
FMT_FUNC Arg internal::FormatterBase::do_get_arg(
unsigned arg_index, const char *&error) {
Arg arg = args_[arg_index];
switch (arg.type) {
case Arg::NONE:
error = "argument index out of range";
break;
case Arg::NAMED_ARG:
arg = *static_cast<const internal::Arg*>(arg.pointer);
break;
default:
/*nothing*/;
}
return arg;
}
FMT_FUNC void report_system_error(
int error_code, fmt::StringRef message) FMT_NOEXCEPT {
// 'fmt::' is for bcc32.
report_error(format_system_error, error_code, message);
}
#if FMT_USE_WINDOWS_H
FMT_FUNC void report_windows_error(
int error_code, fmt::StringRef message) FMT_NOEXCEPT {
// 'fmt::' is for bcc32.
report_error(internal::format_windows_error, error_code, message);
}
#endif
FMT_FUNC void print(std::FILE *f, CStringRef format_str, ArgList args) {
MemoryWriter w;
w.write(format_str, args);
std::fwrite(w.data(), 1, w.size(), f);
}
FMT_FUNC void print(CStringRef format_str, ArgList args) {
print(stdout, format_str, args);
}
FMT_FUNC void print_colored(Color c, CStringRef format, ArgList args) {
char escape[] = "\x1b[30m";
escape[3] = static_cast<char>('0' + c);
std::fputs(escape, stdout);
print(format, args);
std::fputs(RESET_COLOR, stdout);
}
template <typename Char>
void printf(BasicWriter<Char> &w, BasicCStringRef<Char> format, ArgList args);
FMT_FUNC int fprintf(std::FILE *f, CStringRef format, ArgList args) {
MemoryWriter w;
printf(w, format, args);
std::size_t size = w.size();
return std::fwrite(w.data(), 1, size, f) < size ? -1 : static_cast<int>(size);
}
#ifndef FMT_HEADER_ONLY
template struct internal::BasicData<void>;
// Explicit instantiations for char.
template void internal::FixedBuffer<char>::grow(std::size_t);
template void internal::ArgMap<char>::init(const ArgList &args);
template void PrintfFormatter<char>::format(CStringRef format);
template int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, double value);
template int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, long double value);
// Explicit instantiations for wchar_t.
template void internal::FixedBuffer<wchar_t>::grow(std::size_t);
template void internal::ArgMap<wchar_t>::init(const ArgList &args);
template void PrintfFormatter<wchar_t>::format(WCStringRef format);
template int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, double value);
template int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, long double value);
#endif // FMT_HEADER_ONLY
} // namespace fmt
#ifdef _MSC_VER
# pragma warning(pop)
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +0,0 @@
/*
Formatting library for C++ - std::ostream support
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#include "ostream.h"
namespace fmt {
namespace internal {
FMT_FUNC void write(std::ostream &os, Writer &w) {
const char *data = w.data();
typedef internal::MakeUnsigned<std::streamsize>::Type UnsignedStreamSize;
UnsignedStreamSize size = w.size();
UnsignedStreamSize max_size =
internal::to_unsigned((std::numeric_limits<std::streamsize>::max)());
do {
UnsignedStreamSize n = size <= max_size ? size : max_size;
os.write(data, static_cast<std::streamsize>(n));
data += n;
size -= n;
} while (size != 0);
}
}
FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) {
MemoryWriter w;
w.write(format_str, args);
internal::write(os, w);
}
} // namespace fmt

View File

@@ -1,106 +0,0 @@
/*
Formatting library for C++ - std::ostream support
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_OSTREAM_H_
#define FMT_OSTREAM_H_
#include "format.h"
#include <ostream>
namespace fmt {
namespace internal {
template <class Char>
class FormatBuf : public std::basic_streambuf<Char> {
private:
typedef typename std::basic_streambuf<Char>::int_type int_type;
typedef typename std::basic_streambuf<Char>::traits_type traits_type;
Buffer<Char> &buffer_;
Char *start_;
public:
FormatBuf(Buffer<Char> &buffer) : buffer_(buffer), start_(&buffer[0]) {
this->setp(start_, start_ + buffer_.capacity());
}
int_type overflow(int_type ch = traits_type::eof()) {
if (!traits_type::eq_int_type(ch, traits_type::eof())) {
size_t buf_size = size();
buffer_.resize(buf_size);
buffer_.reserve(buf_size * 2);
start_ = &buffer_[0];
start_[buf_size] = traits_type::to_char_type(ch);
this->setp(start_+ buf_size + 1, start_ + buf_size * 2);
}
return ch;
}
size_t size() const {
return to_unsigned(this->pptr() - start_);
}
};
Yes &convert(std::ostream &);
struct DummyStream : std::ostream {
DummyStream(); // Suppress a bogus warning in MSVC.
// Hide all operator<< overloads from std::ostream.
void operator<<(Null<>);
};
No &operator<<(std::ostream &, int);
template<typename T>
struct ConvertToIntImpl<T, true> {
// Convert to int only if T doesn't have an overloaded operator<<.
enum {
value = sizeof(convert(get<DummyStream>() << get<T>())) == sizeof(No)
};
};
// Write the content of w to os.
void write(std::ostream &os, Writer &w);
} // namespace internal
// Formats a value.
template <typename Char, typename ArgFormatter, typename T>
void format_arg(BasicFormatter<Char, ArgFormatter> &f,
const Char *&format_str, const T &value) {
internal::MemoryBuffer<Char, internal::INLINE_BUFFER_SIZE> buffer;
internal::FormatBuf<Char> format_buf(buffer);
std::basic_ostream<Char> output(&format_buf);
output << value;
BasicStringRef<Char> str(&buffer[0], format_buf.size());
typedef internal::MakeArg< BasicFormatter<Char> > MakeArg;
format_str = f.format(format_str, MakeArg(str));
}
/**
\rst
Prints formatted data to the stream *os*.
**Example**::
print(cerr, "Don't {}!", "panic");
\endrst
*/
FMT_API void print(std::ostream &os, CStringRef format_str, ArgList args);
FMT_VARIADIC(void, print, std::ostream &, CStringRef)
} // namespace fmt
#ifdef FMT_HEADER_ONLY
# include "ostream.cc"
#endif
#endif // FMT_OSTREAM_H_

View File

@@ -1,238 +0,0 @@
/*
A C++ interface to POSIX functions.
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
// Disable bogus MSVC warnings.
#ifndef _CRT_SECURE_NO_WARNINGS
# define _CRT_SECURE_NO_WARNINGS
#endif
#include "posix.h"
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef _WIN32
# include <unistd.h>
#else
# include <windows.h>
# include <io.h>
# define O_CREAT _O_CREAT
# define O_TRUNC _O_TRUNC
# ifndef S_IRUSR
# define S_IRUSR _S_IREAD
# endif
# ifndef S_IWUSR
# define S_IWUSR _S_IWRITE
# endif
# ifdef __MINGW32__
# define _SH_DENYNO 0x40
# endif
#endif // _WIN32
#ifdef fileno
# undef fileno
#endif
namespace {
#ifdef _WIN32
// Return type of read and write functions.
typedef int RWResult;
// On Windows the count argument to read and write is unsigned, so convert
// it from size_t preventing integer overflow.
inline unsigned convert_rwcount(std::size_t count) {
return count <= UINT_MAX ? static_cast<unsigned>(count) : UINT_MAX;
}
#else
// Return type of read and write functions.
typedef ssize_t RWResult;
inline std::size_t convert_rwcount(std::size_t count) { return count; }
#endif
}
fmt::BufferedFile::~BufferedFile() FMT_NOEXCEPT {
if (file_ && FMT_SYSTEM(fclose(file_)) != 0)
fmt::report_system_error(errno, "cannot close file");
}
fmt::BufferedFile::BufferedFile(
fmt::CStringRef filename, fmt::CStringRef mode) {
FMT_RETRY_VAL(file_, FMT_SYSTEM(fopen(filename.c_str(), mode.c_str())), 0);
if (!file_)
throw SystemError(errno, "cannot open file {}", filename);
}
void fmt::BufferedFile::close() {
if (!file_)
return;
int result = FMT_SYSTEM(fclose(file_));
file_ = 0;
if (result != 0)
throw SystemError(errno, "cannot close file");
}
// A macro used to prevent expansion of fileno on broken versions of MinGW.
#define FMT_ARGS
int fmt::BufferedFile::fileno() const {
int fd = FMT_POSIX_CALL(fileno FMT_ARGS(file_));
if (fd == -1)
throw SystemError(errno, "cannot get file descriptor");
return fd;
}
fmt::File::File(fmt::CStringRef path, int oflag) {
int mode = S_IRUSR | S_IWUSR;
#if defined(_WIN32) && !defined(__MINGW32__)
fd_ = -1;
FMT_POSIX_CALL(sopen_s(&fd_, path.c_str(), oflag, _SH_DENYNO, mode));
#else
FMT_RETRY(fd_, FMT_POSIX_CALL(open(path.c_str(), oflag, mode)));
#endif
if (fd_ == -1)
throw SystemError(errno, "cannot open file {}", path);
}
fmt::File::~File() FMT_NOEXCEPT {
// Don't retry close in case of EINTR!
// See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
if (fd_ != -1 && FMT_POSIX_CALL(close(fd_)) != 0)
fmt::report_system_error(errno, "cannot close file");
}
void fmt::File::close() {
if (fd_ == -1)
return;
// Don't retry close in case of EINTR!
// See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
int result = FMT_POSIX_CALL(close(fd_));
fd_ = -1;
if (result != 0)
throw SystemError(errno, "cannot close file");
}
fmt::LongLong fmt::File::size() const {
#ifdef _WIN32
// Use GetFileSize instead of GetFileSizeEx for the case when _WIN32_WINNT
// is less than 0x0500 as is the case with some default MinGW builds.
// Both functions support large file sizes.
DWORD size_upper = 0;
HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd_));
DWORD size_lower = FMT_SYSTEM(GetFileSize(handle, &size_upper));
if (size_lower == INVALID_FILE_SIZE) {
DWORD error = GetLastError();
if (error != NO_ERROR)
throw WindowsError(GetLastError(), "cannot get file size");
}
fmt::ULongLong long_size = size_upper;
return (long_size << sizeof(DWORD) * CHAR_BIT) | size_lower;
#else
typedef struct stat Stat;
Stat file_stat = Stat();
if (FMT_POSIX_CALL(fstat(fd_, &file_stat)) == -1)
throw SystemError(errno, "cannot get file attributes");
FMT_STATIC_ASSERT(sizeof(fmt::LongLong) >= sizeof(file_stat.st_size),
"return type of File::size is not large enough");
return file_stat.st_size;
#endif
}
std::size_t fmt::File::read(void *buffer, std::size_t count) {
RWResult result = 0;
FMT_RETRY(result, FMT_POSIX_CALL(read(fd_, buffer, convert_rwcount(count))));
if (result < 0)
throw SystemError(errno, "cannot read from file");
return internal::to_unsigned(result);
}
std::size_t fmt::File::write(const void *buffer, std::size_t count) {
RWResult result = 0;
FMT_RETRY(result, FMT_POSIX_CALL(write(fd_, buffer, convert_rwcount(count))));
if (result < 0)
throw SystemError(errno, "cannot write to file");
return internal::to_unsigned(result);
}
fmt::File fmt::File::dup(int fd) {
// Don't retry as dup doesn't return EINTR.
// http://pubs.opengroup.org/onlinepubs/009695399/functions/dup.html
int new_fd = FMT_POSIX_CALL(dup(fd));
if (new_fd == -1)
throw SystemError(errno, "cannot duplicate file descriptor {}", fd);
return File(new_fd);
}
void fmt::File::dup2(int fd) {
int result = 0;
FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
if (result == -1) {
throw SystemError(errno,
"cannot duplicate file descriptor {} to {}", fd_, fd);
}
}
void fmt::File::dup2(int fd, ErrorCode &ec) FMT_NOEXCEPT {
int result = 0;
FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
if (result == -1)
ec = ErrorCode(errno);
}
void fmt::File::pipe(File &read_end, File &write_end) {
// Close the descriptors first to make sure that assignments don't throw
// and there are no leaks.
read_end.close();
write_end.close();
int fds[2] = {};
#ifdef _WIN32
// Make the default pipe capacity same as on Linux 2.6.11+.
enum { DEFAULT_CAPACITY = 65536 };
int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
#else
// Don't retry as the pipe function doesn't return EINTR.
// http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
int result = FMT_POSIX_CALL(pipe(fds));
#endif
if (result != 0)
throw SystemError(errno, "cannot create pipe");
// The following assignments don't throw because read_fd and write_fd
// are closed.
read_end = File(fds[0]);
write_end = File(fds[1]);
}
fmt::BufferedFile fmt::File::fdopen(const char *mode) {
// Don't retry as fdopen doesn't return EINTR.
FILE *f = FMT_POSIX_CALL(fdopen(fd_, mode));
if (!f)
throw SystemError(errno, "cannot associate stream with file descriptor");
BufferedFile file(f);
fd_ = -1;
return file;
}
long fmt::getpagesize() {
#ifdef _WIN32
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
long size = FMT_POSIX_CALL(sysconf(_SC_PAGESIZE));
if (size < 0)
throw SystemError(errno, "cannot get memory page size");
return size;
#endif
}

View File

@@ -1,367 +0,0 @@
/*
A C++ interface to POSIX functions.
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_POSIX_H_
#define FMT_POSIX_H_
#if defined(__MINGW32__) || defined(__CYGWIN__)
// Workaround MinGW bug https://sourceforge.net/p/mingw/bugs/2024/.
# undef __STRICT_ANSI__
#endif
#include <errno.h>
#include <fcntl.h> // for O_RDONLY
#include <locale.h> // for locale_t
#include <stdio.h>
#include <stdlib.h> // for strtod_l
#include <cstddef>
#if defined __APPLE__ || defined(__FreeBSD__)
# include <xlocale.h> // for LC_NUMERIC_MASK on OS X
#endif
#include "format.h"
#ifndef FMT_POSIX
# if defined(_WIN32) && !defined(__MINGW32__)
// Fix warnings about deprecated symbols.
# define FMT_POSIX(call) _##call
# else
# define FMT_POSIX(call) call
# endif
#endif
// Calls to system functions are wrapped in FMT_SYSTEM for testability.
#ifdef FMT_SYSTEM
# define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
#else
# define FMT_SYSTEM(call) call
# ifdef _WIN32
// Fix warnings about deprecated symbols.
# define FMT_POSIX_CALL(call) ::_##call
# else
# define FMT_POSIX_CALL(call) ::call
# endif
#endif
// Retries the expression while it evaluates to error_result and errno
// equals to EINTR.
#ifndef _WIN32
# define FMT_RETRY_VAL(result, expression, error_result) \
do { \
result = (expression); \
} while (result == error_result && errno == EINTR)
#else
# define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
#endif
#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
namespace fmt {
// An error code.
class ErrorCode {
private:
int value_;
public:
explicit ErrorCode(int value = 0) FMT_NOEXCEPT : value_(value) {}
int get() const FMT_NOEXCEPT { return value_; }
};
// A buffered file.
class BufferedFile {
private:
FILE *file_;
friend class File;
explicit BufferedFile(FILE *f) : file_(f) {}
public:
// Constructs a BufferedFile object which doesn't represent any file.
BufferedFile() FMT_NOEXCEPT : file_(0) {}
// Destroys the object closing the file it represents if any.
~BufferedFile() FMT_NOEXCEPT;
#if !FMT_USE_RVALUE_REFERENCES
// Emulate a move constructor and a move assignment operator if rvalue
// references are not supported.
private:
// A proxy object to emulate a move constructor.
// It is private to make it impossible call operator Proxy directly.
struct Proxy {
FILE *file;
};
public:
// A "move constructor" for moving from a temporary.
BufferedFile(Proxy p) FMT_NOEXCEPT : file_(p.file) {}
// A "move constructor" for moving from an lvalue.
BufferedFile(BufferedFile &f) FMT_NOEXCEPT : file_(f.file_) {
f.file_ = 0;
}
// A "move assignment operator" for moving from a temporary.
BufferedFile &operator=(Proxy p) {
close();
file_ = p.file;
return *this;
}
// A "move assignment operator" for moving from an lvalue.
BufferedFile &operator=(BufferedFile &other) {
close();
file_ = other.file_;
other.file_ = 0;
return *this;
}
// Returns a proxy object for moving from a temporary:
// BufferedFile file = BufferedFile(...);
operator Proxy() FMT_NOEXCEPT {
Proxy p = {file_};
file_ = 0;
return p;
}
#else
private:
FMT_DISALLOW_COPY_AND_ASSIGN(BufferedFile);
public:
BufferedFile(BufferedFile &&other) FMT_NOEXCEPT : file_(other.file_) {
other.file_ = 0;
}
BufferedFile& operator=(BufferedFile &&other) {
close();
file_ = other.file_;
other.file_ = 0;
return *this;
}
#endif
// Opens a file.
BufferedFile(CStringRef filename, CStringRef mode);
// Closes the file.
void close();
// Returns the pointer to a FILE object representing this file.
FILE *get() const FMT_NOEXCEPT { return file_; }
// We place parentheses around fileno to workaround a bug in some versions
// of MinGW that define fileno as a macro.
int (fileno)() const;
void print(CStringRef format_str, const ArgList &args) {
fmt::print(file_, format_str, args);
}
FMT_VARIADIC(void, print, CStringRef)
};
// A file. Closed file is represented by a File object with descriptor -1.
// Methods that are not declared with FMT_NOEXCEPT may throw
// fmt::SystemError in case of failure. Note that some errors such as
// closing the file multiple times will cause a crash on Windows rather
// than an exception. You can get standard behavior by overriding the
// invalid parameter handler with _set_invalid_parameter_handler.
class File {
private:
int fd_; // File descriptor.
// Constructs a File object with a given descriptor.
explicit File(int fd) : fd_(fd) {}
public:
// Possible values for the oflag argument to the constructor.
enum {
RDONLY = FMT_POSIX(O_RDONLY), // Open for reading only.
WRONLY = FMT_POSIX(O_WRONLY), // Open for writing only.
RDWR = FMT_POSIX(O_RDWR) // Open for reading and writing.
};
// Constructs a File object which doesn't represent any file.
File() FMT_NOEXCEPT : fd_(-1) {}
// Opens a file and constructs a File object representing this file.
File(CStringRef path, int oflag);
#if !FMT_USE_RVALUE_REFERENCES
// Emulate a move constructor and a move assignment operator if rvalue
// references are not supported.
private:
// A proxy object to emulate a move constructor.
// It is private to make it impossible call operator Proxy directly.
struct Proxy {
int fd;
};
public:
// A "move constructor" for moving from a temporary.
File(Proxy p) FMT_NOEXCEPT : fd_(p.fd) {}
// A "move constructor" for moving from an lvalue.
File(File &other) FMT_NOEXCEPT : fd_(other.fd_) {
other.fd_ = -1;
}
// A "move assignment operator" for moving from a temporary.
File &operator=(Proxy p) {
close();
fd_ = p.fd;
return *this;
}
// A "move assignment operator" for moving from an lvalue.
File &operator=(File &other) {
close();
fd_ = other.fd_;
other.fd_ = -1;
return *this;
}
// Returns a proxy object for moving from a temporary:
// File file = File(...);
operator Proxy() FMT_NOEXCEPT {
Proxy p = {fd_};
fd_ = -1;
return p;
}
#else
private:
FMT_DISALLOW_COPY_AND_ASSIGN(File);
public:
File(File &&other) FMT_NOEXCEPT : fd_(other.fd_) {
other.fd_ = -1;
}
File& operator=(File &&other) {
close();
fd_ = other.fd_;
other.fd_ = -1;
return *this;
}
#endif
// Destroys the object closing the file it represents if any.
~File() FMT_NOEXCEPT;
// Returns the file descriptor.
int descriptor() const FMT_NOEXCEPT { return fd_; }
// Closes the file.
void close();
// Returns the file size. The size has signed type for consistency with
// stat::st_size.
LongLong size() const;
// Attempts to read count bytes from the file into the specified buffer.
std::size_t read(void *buffer, std::size_t count);
// Attempts to write count bytes from the specified buffer to the file.
std::size_t write(const void *buffer, std::size_t count);
// Duplicates a file descriptor with the dup function and returns
// the duplicate as a file object.
static File dup(int fd);
// Makes fd be the copy of this file descriptor, closing fd first if
// necessary.
void dup2(int fd);
// Makes fd be the copy of this file descriptor, closing fd first if
// necessary.
void dup2(int fd, ErrorCode &ec) FMT_NOEXCEPT;
// Creates a pipe setting up read_end and write_end file objects for reading
// and writing respectively.
static void pipe(File &read_end, File &write_end);
// Creates a BufferedFile object associated with this file and detaches
// this File object from the file.
BufferedFile fdopen(const char *mode);
};
// Returns the memory page size.
long getpagesize();
#if (defined(LC_NUMERIC_MASK) || defined(_MSC_VER)) && \
!defined(__ANDROID__) && !defined(__CYGWIN__)
# define FMT_LOCALE
#endif
#ifdef FMT_LOCALE
// A "C" numeric locale.
class Locale {
private:
# ifdef _MSC_VER
typedef _locale_t locale_t;
enum { LC_NUMERIC_MASK = LC_NUMERIC };
static locale_t newlocale(int category_mask, const char *locale, locale_t) {
return _create_locale(category_mask, locale);
}
static void freelocale(locale_t locale) {
_free_locale(locale);
}
static double strtod_l(const char *nptr, char **endptr, _locale_t locale) {
return _strtod_l(nptr, endptr, locale);
}
# endif
locale_t locale_;
FMT_DISALLOW_COPY_AND_ASSIGN(Locale);
public:
typedef locale_t Type;
Locale() : locale_(newlocale(LC_NUMERIC_MASK, "C", NULL)) {
if (!locale_)
throw fmt::SystemError(errno, "cannot create locale");
}
~Locale() { freelocale(locale_); }
Type get() const { return locale_; }
// Converts string to floating-point number and advances str past the end
// of the parsed input.
double strtod(const char *&str) const {
char *end = 0;
double result = strtod_l(str, &end, locale_);
str = end;
return result;
}
};
#endif // FMT_LOCALE
} // namespace fmt
#if !FMT_USE_RVALUE_REFERENCES
namespace std {
// For compatibility with C++98.
inline fmt::BufferedFile &move(fmt::BufferedFile &f) { return f; }
inline fmt::File &move(fmt::File &f) { return f; }
}
#endif
#endif // FMT_POSIX_H_

View File

@@ -1,558 +0,0 @@
/*
Formatting library for C++
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_PRINTF_H_
#define FMT_PRINTF_H_
#include <algorithm> // std::fill_n
#include <limits> // std::numeric_limits
#include "ostream.h"
namespace fmt {
namespace internal {
// Checks if a value fits in int - used to avoid warnings about comparing
// signed and unsigned integers.
template <bool IsSigned>
struct IntChecker {
template <typename T>
static bool fits_in_int(T value) {
unsigned max = std::numeric_limits<int>::max();
return value <= max;
}
static bool fits_in_int(bool) { return true; }
};
template <>
struct IntChecker<true> {
template <typename T>
static bool fits_in_int(T value) {
return value >= std::numeric_limits<int>::min() &&
value <= std::numeric_limits<int>::max();
}
static bool fits_in_int(int) { return true; }
};
class PrecisionHandler : public ArgVisitor<PrecisionHandler, int> {
public:
void report_unhandled_arg() {
FMT_THROW(FormatError("precision is not integer"));
}
template <typename T>
int visit_any_int(T value) {
if (!IntChecker<std::numeric_limits<T>::is_signed>::fits_in_int(value))
FMT_THROW(FormatError("number is too big"));
return static_cast<int>(value);
}
};
// IsZeroInt::visit(arg) returns true iff arg is a zero integer.
class IsZeroInt : public ArgVisitor<IsZeroInt, bool> {
public:
template <typename T>
bool visit_any_int(T value) { return value == 0; }
};
template <typename T, typename U>
struct is_same {
enum { value = 0 };
};
template <typename T>
struct is_same<T, T> {
enum { value = 1 };
};
// An argument visitor that converts an integer argument to T for printf,
// if T is an integral type. If T is void, the argument is converted to
// corresponding signed or unsigned type depending on the type specifier:
// 'd' and 'i' - signed, other - unsigned)
template <typename T = void>
class ArgConverter : public ArgVisitor<ArgConverter<T>, void> {
private:
internal::Arg &arg_;
wchar_t type_;
FMT_DISALLOW_COPY_AND_ASSIGN(ArgConverter);
public:
ArgConverter(internal::Arg &arg, wchar_t type)
: arg_(arg), type_(type) {}
void visit_bool(bool value) {
if (type_ != 's')
visit_any_int(value);
}
template <typename U>
void visit_any_int(U value) {
bool is_signed = type_ == 'd' || type_ == 'i';
using internal::Arg;
typedef typename internal::Conditional<
is_same<T, void>::value, U, T>::type TargetType;
if (sizeof(TargetType) <= sizeof(int)) {
// Extra casts are used to silence warnings.
if (is_signed) {
arg_.type = Arg::INT;
arg_.int_value = static_cast<int>(static_cast<TargetType>(value));
} else {
arg_.type = Arg::UINT;
typedef typename internal::MakeUnsigned<TargetType>::Type Unsigned;
arg_.uint_value = static_cast<unsigned>(static_cast<Unsigned>(value));
}
} else {
if (is_signed) {
arg_.type = Arg::LONG_LONG;
// glibc's printf doesn't sign extend arguments of smaller types:
// std::printf("%lld", -42); // prints "4294967254"
// but we don't have to do the same because it's a UB.
arg_.long_long_value = static_cast<LongLong>(value);
} else {
arg_.type = Arg::ULONG_LONG;
arg_.ulong_long_value =
static_cast<typename internal::MakeUnsigned<U>::Type>(value);
}
}
}
};
// Converts an integer argument to char for printf.
class CharConverter : public ArgVisitor<CharConverter, void> {
private:
internal::Arg &arg_;
FMT_DISALLOW_COPY_AND_ASSIGN(CharConverter);
public:
explicit CharConverter(internal::Arg &arg) : arg_(arg) {}
template <typename T>
void visit_any_int(T value) {
arg_.type = internal::Arg::CHAR;
arg_.int_value = static_cast<char>(value);
}
};
// Checks if an argument is a valid printf width specifier and sets
// left alignment if it is negative.
class WidthHandler : public ArgVisitor<WidthHandler, unsigned> {
private:
FormatSpec &spec_;
FMT_DISALLOW_COPY_AND_ASSIGN(WidthHandler);
public:
explicit WidthHandler(FormatSpec &spec) : spec_(spec) {}
void report_unhandled_arg() {
FMT_THROW(FormatError("width is not integer"));
}
template <typename T>
unsigned visit_any_int(T value) {
typedef typename internal::IntTraits<T>::MainType UnsignedType;
UnsignedType width = static_cast<UnsignedType>(value);
if (internal::is_negative(value)) {
spec_.align_ = ALIGN_LEFT;
width = 0 - width;
}
unsigned int_max = std::numeric_limits<int>::max();
if (width > int_max)
FMT_THROW(FormatError("number is too big"));
return static_cast<unsigned>(width);
}
};
} // namespace internal
/**
\rst
A ``printf`` argument formatter based on the `curiously recurring template
pattern <http://en.wikipedia.org/wiki/Curiously_recurring_template_pattern>`_.
To use `~fmt::BasicPrintfArgFormatter` define a subclass that implements some
or all of the visit methods with the same signatures as the methods in
`~fmt::ArgVisitor`, for example, `~fmt::ArgVisitor::visit_int()`.
Pass the subclass as the *Impl* template parameter. When a formatting
function processes an argument, it will dispatch to a visit method
specific to the argument type. For example, if the argument type is
``double`` then the `~fmt::ArgVisitor::visit_double()` method of a subclass
will be called. If the subclass doesn't contain a method with this signature,
then a corresponding method of `~fmt::BasicPrintfArgFormatter` or its
superclass will be called.
\endrst
*/
template <typename Impl, typename Char>
class BasicPrintfArgFormatter : public internal::ArgFormatterBase<Impl, Char> {
private:
void write_null_pointer() {
this->spec().type_ = 0;
this->write("(nil)");
}
typedef internal::ArgFormatterBase<Impl, Char> Base;
public:
/**
\rst
Constructs an argument formatter object.
*writer* is a reference to the output writer and *spec* contains format
specifier information for standard argument types.
\endrst
*/
BasicPrintfArgFormatter(BasicWriter<Char> &writer, FormatSpec &spec)
: internal::ArgFormatterBase<Impl, Char>(writer, spec) {}
/** Formats an argument of type ``bool``. */
void visit_bool(bool value) {
FormatSpec &fmt_spec = this->spec();
if (fmt_spec.type_ != 's')
return this->visit_any_int(value);
fmt_spec.type_ = 0;
this->write(value);
}
/** Formats a character. */
void visit_char(int value) {
const FormatSpec &fmt_spec = this->spec();
BasicWriter<Char> &w = this->writer();
if (fmt_spec.type_ && fmt_spec.type_ != 'c')
w.write_int(value, fmt_spec);
typedef typename BasicWriter<Char>::CharPtr CharPtr;
CharPtr out = CharPtr();
if (fmt_spec.width_ > 1) {
Char fill = ' ';
out = w.grow_buffer(fmt_spec.width_);
if (fmt_spec.align_ != ALIGN_LEFT) {
std::fill_n(out, fmt_spec.width_ - 1, fill);
out += fmt_spec.width_ - 1;
} else {
std::fill_n(out + 1, fmt_spec.width_ - 1, fill);
}
} else {
out = w.grow_buffer(1);
}
*out = static_cast<Char>(value);
}
/** Formats a null-terminated C string. */
void visit_cstring(const char *value) {
if (value)
Base::visit_cstring(value);
else if (this->spec().type_ == 'p')
write_null_pointer();
else
this->write("(null)");
}
/** Formats a pointer. */
void visit_pointer(const void *value) {
if (value)
return Base::visit_pointer(value);
this->spec().type_ = 0;
write_null_pointer();
}
/** Formats an argument of a custom (user-defined) type. */
void visit_custom(internal::Arg::CustomValue c) {
BasicFormatter<Char> formatter(ArgList(), this->writer());
const Char format_str[] = {'}', 0};
const Char *format = format_str;
c.format(&formatter, c.value, &format);
}
};
/** The default printf argument formatter. */
template <typename Char>
class PrintfArgFormatter
: public BasicPrintfArgFormatter<PrintfArgFormatter<Char>, Char> {
public:
/** Constructs an argument formatter object. */
PrintfArgFormatter(BasicWriter<Char> &w, FormatSpec &s)
: BasicPrintfArgFormatter<PrintfArgFormatter<Char>, Char>(w, s) {}
};
/** This template formats data and writes the output to a writer. */
template <typename Char, typename ArgFormatter = PrintfArgFormatter<Char> >
class PrintfFormatter : private internal::FormatterBase {
private:
BasicWriter<Char> &writer_;
void parse_flags(FormatSpec &spec, const Char *&s);
// Returns the argument with specified index or, if arg_index is equal
// to the maximum unsigned value, the next argument.
internal::Arg get_arg(
const Char *s,
unsigned arg_index = (std::numeric_limits<unsigned>::max)());
// Parses argument index, flags and width and returns the argument index.
unsigned parse_header(const Char *&s, FormatSpec &spec);
public:
/**
\rst
Constructs a ``PrintfFormatter`` object. References to the arguments and
the writer are stored in the formatter object so make sure they have
appropriate lifetimes.
\endrst
*/
explicit PrintfFormatter(const ArgList &args, BasicWriter<Char> &w)
: FormatterBase(args), writer_(w) {}
/** Formats stored arguments and writes the output to the writer. */
FMT_API void format(BasicCStringRef<Char> format_str);
};
template <typename Char, typename AF>
void PrintfFormatter<Char, AF>::parse_flags(FormatSpec &spec, const Char *&s) {
for (;;) {
switch (*s++) {
case '-':
spec.align_ = ALIGN_LEFT;
break;
case '+':
spec.flags_ |= SIGN_FLAG | PLUS_FLAG;
break;
case '0':
spec.fill_ = '0';
break;
case ' ':
spec.flags_ |= SIGN_FLAG;
break;
case '#':
spec.flags_ |= HASH_FLAG;
break;
default:
--s;
return;
}
}
}
template <typename Char, typename AF>
internal::Arg PrintfFormatter<Char, AF>::get_arg(const Char *s,
unsigned arg_index) {
(void)s;
const char *error = 0;
internal::Arg arg = arg_index == std::numeric_limits<unsigned>::max() ?
next_arg(error) : FormatterBase::get_arg(arg_index - 1, error);
if (error)
FMT_THROW(FormatError(!*s ? "invalid format string" : error));
return arg;
}
template <typename Char, typename AF>
unsigned PrintfFormatter<Char, AF>::parse_header(
const Char *&s, FormatSpec &spec) {
unsigned arg_index = std::numeric_limits<unsigned>::max();
Char c = *s;
if (c >= '0' && c <= '9') {
// Parse an argument index (if followed by '$') or a width possibly
// preceded with '0' flag(s).
unsigned value = internal::parse_nonnegative_int(s);
if (*s == '$') { // value is an argument index
++s;
arg_index = value;
} else {
if (c == '0')
spec.fill_ = '0';
if (value != 0) {
// Nonzero value means that we parsed width and don't need to
// parse it or flags again, so return now.
spec.width_ = value;
return arg_index;
}
}
}
parse_flags(spec, s);
// Parse width.
if (*s >= '0' && *s <= '9') {
spec.width_ = internal::parse_nonnegative_int(s);
} else if (*s == '*') {
++s;
spec.width_ = internal::WidthHandler(spec).visit(get_arg(s));
}
return arg_index;
}
template <typename Char, typename AF>
void PrintfFormatter<Char, AF>::format(BasicCStringRef<Char> format_str) {
const Char *start = format_str.c_str();
const Char *s = start;
while (*s) {
Char c = *s++;
if (c != '%') continue;
if (*s == c) {
write(writer_, start, s);
start = ++s;
continue;
}
write(writer_, start, s - 1);
FormatSpec spec;
spec.align_ = ALIGN_RIGHT;
// Parse argument index, flags and width.
unsigned arg_index = parse_header(s, spec);
// Parse precision.
if (*s == '.') {
++s;
if ('0' <= *s && *s <= '9') {
spec.precision_ = static_cast<int>(internal::parse_nonnegative_int(s));
} else if (*s == '*') {
++s;
spec.precision_ = internal::PrecisionHandler().visit(get_arg(s));
}
}
using internal::Arg;
Arg arg = get_arg(s, arg_index);
if (spec.flag(HASH_FLAG) && internal::IsZeroInt().visit(arg))
spec.flags_ &= ~internal::to_unsigned<int>(HASH_FLAG);
if (spec.fill_ == '0') {
if (arg.type <= Arg::LAST_NUMERIC_TYPE)
spec.align_ = ALIGN_NUMERIC;
else
spec.fill_ = ' '; // Ignore '0' flag for non-numeric types.
}
// Parse length and convert the argument to the required type.
using internal::ArgConverter;
switch (*s++) {
case 'h':
if (*s == 'h')
ArgConverter<signed char>(arg, *++s).visit(arg);
else
ArgConverter<short>(arg, *s).visit(arg);
break;
case 'l':
if (*s == 'l')
ArgConverter<fmt::LongLong>(arg, *++s).visit(arg);
else
ArgConverter<long>(arg, *s).visit(arg);
break;
case 'j':
ArgConverter<intmax_t>(arg, *s).visit(arg);
break;
case 'z':
ArgConverter<std::size_t>(arg, *s).visit(arg);
break;
case 't':
ArgConverter<std::ptrdiff_t>(arg, *s).visit(arg);
break;
case 'L':
// printf produces garbage when 'L' is omitted for long double, no
// need to do the same.
break;
default:
--s;
ArgConverter<void>(arg, *s).visit(arg);
}
// Parse type.
if (!*s)
FMT_THROW(FormatError("invalid format string"));
spec.type_ = static_cast<char>(*s++);
if (arg.type <= Arg::LAST_INTEGER_TYPE) {
// Normalize type.
switch (spec.type_) {
case 'i': case 'u':
spec.type_ = 'd';
break;
case 'c':
// TODO: handle wchar_t
internal::CharConverter(arg).visit(arg);
break;
}
}
start = s;
// Format argument.
AF(writer_, spec).visit(arg);
}
write(writer_, start, s);
}
template <typename Char>
void printf(BasicWriter<Char> &w, BasicCStringRef<Char> format, ArgList args) {
PrintfFormatter<Char>(args, w).format(format);
}
/**
\rst
Formats arguments and returns the result as a string.
**Example**::
std::string message = fmt::sprintf("The answer is %d", 42);
\endrst
*/
inline std::string sprintf(CStringRef format, ArgList args) {
MemoryWriter w;
printf(w, format, args);
return w.str();
}
FMT_VARIADIC(std::string, sprintf, CStringRef)
inline std::wstring sprintf(WCStringRef format, ArgList args) {
WMemoryWriter w;
printf(w, format, args);
return w.str();
}
FMT_VARIADIC_W(std::wstring, sprintf, WCStringRef)
/**
\rst
Prints formatted data to the file *f*.
**Example**::
fmt::fprintf(stderr, "Don't %s!", "panic");
\endrst
*/
FMT_API int fprintf(std::FILE *f, CStringRef format, ArgList args);
FMT_VARIADIC(int, fprintf, std::FILE *, CStringRef)
/**
\rst
Prints formatted data to ``stdout``.
**Example**::
fmt::printf("Elapsed time: %.2f seconds", 1.23);
\endrst
*/
inline int printf(CStringRef format, ArgList args) {
return fprintf(stdout, format, args);
}
FMT_VARIADIC(int, printf, CStringRef)
/**
\rst
Prints formatted data to the stream *os*.
**Example**::
fprintf(cerr, "Don't %s!", "panic");
\endrst
*/
inline int fprintf(std::ostream &os, CStringRef format_str, ArgList args) {
MemoryWriter w;
printf(w, format_str, args);
internal::write(os, w);
return static_cast<int>(w.size());
}
FMT_VARIADIC(int, fprintf, std::ostream &, CStringRef)
} // namespace fmt
#endif // FMT_PRINTF_H_

View File

@@ -1,119 +0,0 @@
/*
Formatting library for C++ - string utilities
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_STRING_H_
#define FMT_STRING_H_
#include "format.h"
namespace fmt {
namespace internal {
// A buffer that stores data in ``std::string``.
template <typename Char>
class StringBuffer : public Buffer<Char> {
private:
std::basic_string<Char> data_;
protected:
virtual void grow(std::size_t size) {
data_.resize(size);
this->ptr_ = &data_[0];
this->capacity_ = size;
}
public:
// Moves the data to ``str`` clearing the buffer.
void move_to(std::basic_string<Char> &str) {
data_.resize(this->size_);
str.swap(data_);
this->capacity_ = this->size_ = 0;
this->ptr_ = 0;
}
};
} // namespace internal
/**
\rst
This class template provides operations for formatting and writing data
into a character stream. The output is stored in ``std::string`` that grows
dynamically.
You can use one of the following typedefs for common character types
and the standard allocator:
+---------------+----------------------------+
| Type | Definition |
+===============+============================+
| StringWriter | BasicStringWriter<char> |
+---------------+----------------------------+
| WStringWriter | BasicStringWriter<wchar_t> |
+---------------+----------------------------+
**Example**::
StringWriter out;
out << "The answer is " << 42 << "\n";
This will write the following output to the ``out`` object:
.. code-block:: none
The answer is 42
The output can be moved to an ``std::string`` with ``out.move_to()``.
\endrst
*/
template <typename Char>
class BasicStringWriter : public BasicWriter<Char> {
private:
internal::StringBuffer<Char> buffer_;
public:
/**
\rst
Constructs a :class:`fmt::BasicStringWriter` object.
\endrst
*/
BasicStringWriter() : BasicWriter<Char>(buffer_) {}
/**
\rst
Moves the buffer content to *str* clearing the buffer.
\endrst
*/
void move_to(std::basic_string<Char> &str) {
buffer_.move_to(str);
}
};
typedef BasicStringWriter<char> StringWriter;
typedef BasicStringWriter<wchar_t> WStringWriter;
/**
\rst
Converts *value* to ``std::string`` using the default format for type *T*.
**Example**::
#include "fmt/string.h"
std::string answer = fmt::to_string(42);
\endrst
*/
template <typename T>
std::string to_string(const T &value) {
fmt::MemoryWriter w;
w << value;
return w.str();
}
}
#endif // FMT_STRING_H_

View File

@@ -1,143 +0,0 @@
/*
Formatting library for C++ - time formatting
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_TIME_H_
#define FMT_TIME_H_
#include "format.h"
#include <ctime>
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4702) // unreachable code
# pragma warning(disable: 4996) // "deprecated" functions
#endif
namespace fmt {
template <typename ArgFormatter>
void format_arg(BasicFormatter<char, ArgFormatter> &f,
const char *&format_str, const std::tm &tm) {
if (*format_str == ':')
++format_str;
const char *end = format_str;
while (*end && *end != '}')
++end;
if (*end != '}')
FMT_THROW(FormatError("missing '}' in format string"));
internal::MemoryBuffer<char, internal::INLINE_BUFFER_SIZE> format;
format.append(format_str, end + 1);
format[format.size() - 1] = '\0';
Buffer<char> &buffer = f.writer().buffer();
std::size_t start = buffer.size();
for (;;) {
std::size_t size = buffer.capacity() - start;
std::size_t count = std::strftime(&buffer[start], size, &format[0], &tm);
if (count != 0) {
buffer.resize(start + count);
break;
}
if (size >= format.size() * 256) {
// If the buffer is 256 times larger than the format string, assume
// that `strftime` gives an empty result. There doesn't seem to be a
// better way to distinguish the two cases:
// https://github.com/fmtlib/fmt/issues/367
break;
}
const std::size_t MIN_GROWTH = 10;
buffer.reserve(buffer.capacity() + (size > MIN_GROWTH ? size : MIN_GROWTH));
}
format_str = end + 1;
}
namespace internal{
inline Null<> localtime_r(...) { return Null<>(); }
inline Null<> localtime_s(...) { return Null<>(); }
inline Null<> gmtime_r(...) { return Null<>(); }
inline Null<> gmtime_s(...) { return Null<>(); }
}
// Thread-safe replacement for std::localtime
inline std::tm localtime(std::time_t time) {
struct LocalTime {
std::time_t time_;
std::tm tm_;
LocalTime(std::time_t t): time_(t) {}
bool run() {
using namespace fmt::internal;
return handle(localtime_r(&time_, &tm_));
}
bool handle(std::tm* tm) { return tm != 0; }
bool handle(internal::Null<>) {
using namespace fmt::internal;
return fallback(localtime_s(&tm_, &time_));
}
bool fallback(int res) { return res == 0; }
bool fallback(internal::Null<>) {
using namespace fmt::internal;
std::tm* tm = std::localtime(&time_);
if (tm != 0) tm_ = *tm;
return tm != 0;
}
};
LocalTime lt(time);
if (lt.run())
return lt.tm_;
// Too big time values may be unsupported.
FMT_THROW(fmt::FormatError("time_t value out of range"));
return std::tm();
}
// Thread-safe replacement for std::gmtime
inline std::tm gmtime(std::time_t time) {
struct GMTime {
std::time_t time_;
std::tm tm_;
GMTime(std::time_t t): time_(t) {}
bool run() {
using namespace fmt::internal;
return handle(gmtime_r(&time_, &tm_));
}
bool handle(std::tm* tm) { return tm != 0; }
bool handle(internal::Null<>) {
using namespace fmt::internal;
return fallback(gmtime_s(&tm_, &time_));
}
bool fallback(int res) { return res == 0; }
bool fallback(internal::Null<>) {
std::tm* tm = std::gmtime(&time_);
if (tm != 0) tm_ = *tm;
return tm != 0;
}
};
GMTime gt(time);
if (gt.run())
return gt.tm_;
// Too big time values may be unsupported.
FMT_THROW(fmt::FormatError("time_t value out of range"));
return std::tm();
}
} //namespace fmt
#ifdef _MSC_VER
# pragma warning(pop)
#endif
#endif // FMT_TIME_H_

View File

@@ -1,67 +0,0 @@
/* Copyright 2015-2017 Philippe Tillet
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "isaac/runtime/predict.h"
#include "database/sm_5_2/pool.hpp"
#include "database/sm_5_2/conv.hpp"
#include "database/sm_5_2/gemm.hpp"
#include "database/sm_6_0/conv.hpp"
#include "database/sm_6_0/gemm.hpp"
#include "database/sm_6_1/conv.hpp"
#include "database/sm_6_1/gemm.hpp"
#include "database/sm_7_0/gemm.hpp"
#include "database/sm_7_0/conv.hpp"
namespace isaac{
namespace runtime{
typedef driver::Device::Architecture Architecture;
const std::map<std::pair<driver::Device::Architecture, OperationType>, std::shared_ptr<Profile> > database =
{
{{Architecture::SM_5_0, CONV}, std::make_shared<ConvProfile>((u_char*)sm_5_2::conv)},
{{Architecture::SM_5_0, GEMM}, std::make_shared<GEMMProfile>((u_char*)sm_5_2::gemm)},
{{Architecture::SM_5_2, POOL}, std::make_shared<PoolProfile>((u_char*)sm_5_2::pool)},
{{Architecture::SM_5_2, CONV}, std::make_shared<ConvProfile>((u_char*)sm_5_2::conv)},
{{Architecture::SM_5_2, GEMM}, std::make_shared<GEMMProfile>((u_char*)sm_5_2::gemm)},
{{Architecture::SM_6_0, POOL}, std::make_shared<PoolProfile>((u_char*)sm_5_2::pool)},
{{Architecture::SM_6_0, CONV}, std::make_shared<ConvProfile>((u_char*)sm_5_2::conv)},
{{Architecture::SM_6_0, GEMM}, std::make_shared<GEMMProfile>((u_char*)sm_5_2::gemm)},
{{Architecture::SM_6_1, POOL}, std::make_shared<PoolProfile>((u_char*)sm_5_2::pool)},
{{Architecture::SM_6_1, CONV}, std::make_shared<ConvProfile>((u_char*)sm_6_1::conv)},
{{Architecture::SM_6_1, GEMM}, std::make_shared<GEMMProfile>((u_char*)sm_6_1::gemm)},
{{Architecture::SM_7_0, POOL}, std::make_shared<PoolProfile>((u_char*)sm_5_2::pool)},
{{Architecture::SM_7_0, CONV}, std::make_shared<ConvProfile>((u_char*)sm_7_0::conv)},
{{Architecture::SM_7_0, GEMM}, std::make_shared<GEMMProfile>((u_char*)sm_7_0::gemm)}
};
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More