ISAAC-V2.0: INITIAL COMMIT
This commit is contained in:
@@ -1,84 +1,28 @@
|
||||
cmake_minimum_required(VERSION 2.8.7)
|
||||
project(isaac-research)
|
||||
include(CTest)
|
||||
|
||||
#QtCreator: add visibility of headers
|
||||
file( GLOB_RECURSE MAKE_HEADERS_VISIBLE_SRC *.cpp *.hpp *.h)
|
||||
add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} )
|
||||
|
||||
#Default build type
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "Default build type: Release")
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
SET(CMAKE_FIND_LIBRARY_PREFIXES "")
|
||||
SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll")
|
||||
endif()
|
||||
# Add visibility of headers
|
||||
file( GLOB_RECURSE MAKE_HEADERS_VISIBLE_SRC *.cpp *.hpp *.h)
|
||||
add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} )
|
||||
|
||||
#Modules
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/lib/external/)
|
||||
|
||||
#Compiler flags
|
||||
add_definitions(${BACKEND_DEFINES})
|
||||
if(WIN32)
|
||||
add_definitions("-DNOMINMAX")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic")
|
||||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic")
|
||||
|
||||
#Includes
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/lib/tools/ ${CMAKE_CURRENT_SOURCE_DIR}/include/external/ ${CMAKE_CURRENT_SOURCE_DIR}/include/external/cuda)
|
||||
|
||||
#Binary to convert .cu files to const char *
|
||||
if(NOT ANDROID)
|
||||
add_executable(bin2cpp ${CMAKE_MODULE_PATH}/helpers/bin2cpp.cpp)
|
||||
include("${CMAKE_MODULE_PATH}/helpers/CodeToH.cmake")
|
||||
endif()
|
||||
|
||||
#Source files
|
||||
#Source
|
||||
file(GLOB_RECURSE LIBISAAC_SRC lib/*.cpp)
|
||||
add_library(isaac SHARED ${LIBISAAC_SRC})
|
||||
target_link_libraries(isaac "dl")
|
||||
|
||||
#Python wrapper
|
||||
set(SETUP_PY_IN "${CMAKE_MODULE_PATH}/python/setup.py")
|
||||
set(SETUP_PY "${CMAKE_SOURCE_DIR}/python/setup.py")
|
||||
|
||||
set(LIBISAAC_SRC_STR)
|
||||
foreach(FILE ${LIBISAAC_SRC})
|
||||
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "src" _TMP ${FILE})
|
||||
set(LIBISAAC_SRC_STR "${_TMP} ${LIBISAAC_SRC_STR}")
|
||||
endforeach()
|
||||
|
||||
#Include directories
|
||||
set(INCLUDE_DIRECTORIES_STR)
|
||||
get_property(INCLUDE_DIRECTORIES_LST DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
|
||||
set(INCLUDE_DIRECTORIES_STR)
|
||||
foreach(FILE ${INCLUDE_DIRECTORIES_LST})
|
||||
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "src" _TMP ${FILE})
|
||||
set(INCLUDE_DIRECTORIES_STR "${INCLUDE_DIRECTORIES_STR} ${_TMP}")
|
||||
endforeach()
|
||||
|
||||
configure_file(${SETUP_PY_IN} ${SETUP_PY})
|
||||
|
||||
add_custom_command(OUTPUT "${CMAKE_BINARY_DIR}/build/timestamp"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/python ${CMAKE_BINARY_DIR}/python
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_BINARY_DIR}/python/src/lib/CMakeLists.txt
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/python/build
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/python/src/lib
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_BINARY_DIR}/python/src/include
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/lib ${CMAKE_BINARY_DIR}/python/src/lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}/python/src/include
|
||||
|
||||
COMMAND ${CMAKE_COMMAND} -E tar czf isaac-1.0.tar.gz ${CMAKE_BINARY_DIR}/python
|
||||
)
|
||||
|
||||
add_custom_target(package-python DEPENDS "${CMAKE_BINARY_DIR}/build/timestamp")
|
||||
|
||||
|
||||
#Isaac
|
||||
include(CTest)
|
||||
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(tests)
|
||||
add_subdirectory(bench)
|
||||
#Examples
|
||||
add_subdirectory(examples)
|
||||
|
||||
#Tests
|
||||
add_subdirectory(tests)
|
||||
|
1
LICENSE
1
LICENSE
@@ -19,3 +19,4 @@
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
75
README.md
75
README.md
@@ -1,77 +1,40 @@
|
||||
# ISAAC
|
||||
|
||||
This is the developer repository for ISAAC, a library that uses machine learning to find input-aware kernels for element-wise operations, 1D/2D reductions and GEMM. It works with both cuBLAS and clBLAS, and fallbacks on those when appropriate (typically large square matrices).
|
||||
This is the development branch for ISAAC v2.0. This is a major rewrite more targetted at compute-bound applications, with major performance gains at the expense of portability.
|
||||
|
||||
### License
|
||||
|
||||
ISAAC is distributed under the MIT License.
|
||||
ISAAC is distributed under the MIT/X11 license.
|
||||
|
||||
### Installation
|
||||
|
||||
ISAAC is dependency-free, and will load either OpenCL and/or CUDA 7.0+ _dynamically_ depending on which GPUs are detected at runtime.
|
||||
|
||||
You only need CMake 2.8.7+ and a C++11 compliant compiler:
|
||||
ISAAC only requires an NVIDIA GPU with compute-capability > 5.0 and the corresponding proprietary driver.
|
||||
|
||||
The CUDA SDK is *not* required.
|
||||
|
||||
```
|
||||
git clone https://github.com/ptillet/isaac.git
|
||||
mkdir -p isaac/build && cd isaac/build
|
||||
cmake ../ && make -j4
|
||||
cmake ../ && make -j8
|
||||
./examples/bench
|
||||
```
|
||||
|
||||
Link against libisaac.so instead of libcublas.so or libclblas.so, and you're good to go!
|
||||
### Benchmarks
|
||||
Below is the TFLOPS you get for sGEMM on a Pascal Titan X vs cuBLAS 8.0.
|
||||

|
||||
|
||||
The C++ and Python API does some kernel fusion, but is not entirely stable. It works well to compose element-wise operations, though.
|
||||
Below is the TFLOPS you get for FCONV on a Pascal Titan X vs cuDNN v6.
|
||||

|
||||
|
||||
There's still room for improvement.
|
||||
|
||||
### Benchmark
|
||||
### APIs
|
||||
|
||||
```
|
||||
Usage : blas-bench [--op {axpy, dot, gemv, gemm}] [--dtype {float32, float64}] [--device DEVICE_IDX] [--help]
|
||||
--op: operation to benchmark (default = gemm)
|
||||
--dtype: data-type to benchmark (default = float32)
|
||||
--device: index of isaac device in [0, ..., ndevices - 1] (default = 0)
|
||||
--help: display this message
|
||||
```
|
||||
It detects clBLAS or cuBLAS and compares it against ISAAC for e.g., DeepBench, Covariance, LAPACK (packed rank1 updates), etc.
|
||||
ISAAC implements both GEMM and FCONV for fp16x2, fp32, and fp64. Half-precision with 32-bits accumulation and complex data-types is not yet supported.
|
||||
|
||||
Below is the TFLOPS you get for GEMM on a Pascal Titan X (cuBLAS 8.0). Numbers in bold represent speed-ups greater than 5%.
|
||||

|
||||
### Future Plans
|
||||
|
||||
For AMD Fury (clBLAS-2.10-Fiji):
|
||||

|
||||
|
||||
Same trend on Intel Broadwell iGPU
|
||||
|
||||
### BLAS routines supported
|
||||
|
||||
Currently supported functions are:
|
||||
|
||||
| BLAS1 | BLAS2 | BLAS3 |
|
||||
| --------------| --------------| --------------|
|
||||
| xAXPY | xGEMV | xGEMM |
|
||||
| xCOPY | xGER | |
|
||||
| xSCAL | | |
|
||||
| xDOT | | |
|
||||
| xASUM | | |
|
||||
|
||||
For x in {S, D}
|
||||
|
||||
### Contributing
|
||||
|
||||
You can contribute to further tuning isaac if you have one of the following architecture:
|
||||
- NVidia: SM 2.x ; SM 3.5 ; SM 5.0
|
||||
|
||||
If you have one of the following architectures you can contribute by running:
|
||||
|
||||
```
|
||||
git clone https://github.com/ptillet/isaac.git
|
||||
cd isaac/python ;
|
||||
python setup.py build;
|
||||
cd ../tune
|
||||
PYTHONPATH=../python/build/lib.linux-x86_64-2.7/ python main.py --float64 --float32 --elementwise_1d --elementwise_2d --reduce_1d --reduce_2d_rows --reduce_2d_cols --gemm_nn --gemm_nt --gemm_tn --gemm_tt
|
||||
```
|
||||
|
||||
This will output a .json file that you can submit for integration.
|
||||
|
||||
Bug reports are more than welcome!
|
||||
Future plans include (but are not limited to):
|
||||
* Transparent use over cuBLAS/cuDNN using LD_PRELOAD
|
||||
* Backward Convolution
|
||||
* Complex data-types for GEMM
|
||||
|
@@ -1,44 +0,0 @@
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
|
||||
set(BLAS_DEF)
|
||||
set(BLAS_LIBS)
|
||||
|
||||
#CUBLAS
|
||||
find_package(CUDA QUIET)
|
||||
if(CUDA_FOUND)
|
||||
set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_CUBLAS")
|
||||
include_directories(${CUDA_INCLUDE_DIRS})
|
||||
set(BLAS_LIBS ${BLAS_LIBS} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
#CLBLAS
|
||||
find_package(CLBLAS QUIET)
|
||||
if(CLBLAS_FOUND)
|
||||
set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_CLBLAS")
|
||||
include_directories(${CLBLAS_INCLUDE_DIR})
|
||||
set(BLAS_LIBS ${BLAS_LIBS} ${CLBLAS_LIBRARIES} OpenCL pthread)
|
||||
endif()
|
||||
|
||||
##CBLAS
|
||||
#find_package(MKL QUIET)
|
||||
#if(MKL_FOUND)
|
||||
# set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_MKL")
|
||||
# include_directories(${MKL_INCLUDE_DIR})
|
||||
# set(BLAS_LIBS ${BLAS_LIBS} ${MKL_LIBRARIES} )
|
||||
#else()
|
||||
# find_package(OpenBlas)
|
||||
# if(OPENBLAS_FOUND)
|
||||
# set(BLAS_DEF ${BLAS_DEF} "-DHAS_A_BLAS -DBENCH_CBLAS")
|
||||
# include_directories(${OPENBLAS_INCLUDE_DIR})
|
||||
# set(BLAS_LIBS ${BLAS_LIBS} ${OPENBLAS_LIBRARIES} )
|
||||
# endif()
|
||||
#endif()
|
||||
|
||||
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/tests/common)
|
||||
foreach(PROG blas)
|
||||
add_executable(bench-${PROG} ${PROG}.cpp)
|
||||
set_target_properties(bench-${PROG} PROPERTIES COMPILE_FLAGS "${BLAS_DEF_STR}")
|
||||
target_link_libraries(bench-${PROG} ${BLAS_LIBS} isaac)
|
||||
endforeach(PROG)
|
412
bench/blas.cpp
412
bench/blas.cpp
@@ -1,412 +0,0 @@
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/runtime/execute.h"
|
||||
#ifdef BENCH_CLBLAS
|
||||
#include "clBLAS.h"
|
||||
#endif
|
||||
#ifdef BENCH_MKL
|
||||
#include "mkl_cblas.h"
|
||||
#elif defined(BENCH_CBLAS)
|
||||
#include "cblas.h"
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
#include <cublas.h>
|
||||
#endif
|
||||
#include <iomanip>
|
||||
#include <stdlib.h>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include "common.hpp"
|
||||
#include "half.hpp"
|
||||
|
||||
typedef sc::int_t int_t;
|
||||
|
||||
Timer tmr;
|
||||
|
||||
/* C++ wrapper for BLAS */
|
||||
#ifdef BENCH_CLBLAS
|
||||
template<typename... Args> void clblasAxpy(float, Args... args){ clblasSaxpy(args...); }
|
||||
template<typename... Args> void clblasAxpy(double, Args... args){ clblasDaxpy(args...); }
|
||||
template<typename... Args> void clblasDot(float, Args... args){ clblasSdot(args...); }
|
||||
template<typename... Args> void clblasDot(double, Args... args){ clblasDdot(args...); }
|
||||
template<typename... Args> void clblasGemv(float, Args... args){ clblasSgemv(args...); }
|
||||
template<typename... Args> void clblasGemv(double, Args... args){ clblasDgemv(args...); }
|
||||
template<typename... Args> void clblasGemm(float, Args... args){ clblasSgemm(args...); }
|
||||
template<typename... Args> void clblasGemm(double, Args... args){ clblasDgemm(args...); }
|
||||
#endif
|
||||
|
||||
#ifdef BENCH_CBLAS
|
||||
template<typename... Args> void cblasAxpy(float, Args... args){ cblas_saxpy(args...); }
|
||||
template<typename... Args> void cblasAxpy(double, Args... args){ cblas_daxpy(args...); }
|
||||
template<typename... Args> void cblasDot(float, Args... args){ cblas_sdot(args...); }
|
||||
template<typename... Args> void cblasDot(double, Args... args){ cblas_ddot(args...); }
|
||||
template<typename... Args> void cblasGemv(float, Args... args){ cblas_sgemv(args...); }
|
||||
template<typename... Args> void cblasGemv(double, Args... args){ cblas_dgemv(args...); }
|
||||
template<typename... Args> void cblasGemm(float, Args... args){ cblas_sgemm(args...); }
|
||||
template<typename... Args> void cblasGemm(double, Args... args){ cblas_dgemm(args...); }
|
||||
#endif
|
||||
|
||||
//cuBLAS
|
||||
#ifdef BENCH_CUBLAS
|
||||
template<typename... Args> void cublasAxpy(float, Args... args){ cublasSaxpy(args...); }
|
||||
template<typename... Args> void cublasAxpy(double, Args... args){ cublasDaxpy(args...); }
|
||||
template<typename... Args> void cublasDot(float, Args... args){ cublasSdot(args...); }
|
||||
template<typename... Args> void cublasDot(double, Args... args){ cublasDdot(args...); }
|
||||
template<typename... Args> void cublasGemv(float, Args... args){ cublasSgemv(args...); }
|
||||
template<typename... Args> void cublasGemv(double, Args... args){ cublasDgemv(args...); }
|
||||
template<typename... Args> void cublasGemm(float, Args... args){ cublasSgemm(args...); }
|
||||
template<typename... Args> void cublasGemm(double, Args... args){ cublasDgemm(args...); }
|
||||
#endif
|
||||
|
||||
//
|
||||
template<class OP, class SYNC>
|
||||
double bench(OP const & op, SYNC const & sync)
|
||||
{
|
||||
std::vector<long> times;
|
||||
double total_time = 0;
|
||||
op();
|
||||
sync();
|
||||
while(total_time*1e-9 < 2e-1){
|
||||
tmr.start();
|
||||
op();
|
||||
sync();
|
||||
times.push_back(tmr.get().count());
|
||||
total_time+=times.back();
|
||||
}
|
||||
return min(times);
|
||||
}
|
||||
|
||||
void print_results_header(std::vector<std::string> sections, bool
|
||||
#ifdef BENCH_CLBLAS
|
||||
on_cl
|
||||
#endif
|
||||
, bool
|
||||
#ifdef BENCH_CUBLAS
|
||||
on_cu
|
||||
#endif
|
||||
){
|
||||
std::cout << color_stream(ITALIC) << color_stream(BOLD) ;
|
||||
std::copy(sections.begin(), sections.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
|
||||
std::cout << "ISAAC";
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(on_cl)
|
||||
std::cout << "\tclBLAS";
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::cout << "\tBLAS";
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
if(on_cu)
|
||||
std::cout << "\tcuBLAS";
|
||||
#endif
|
||||
std::cout << color_stream(RESET) << std::endl;
|
||||
}
|
||||
|
||||
void print_results(std::vector<double> const & times, std::vector<std::string> const & prefix, std::function<double(double)> fn){
|
||||
std::copy(prefix.begin(), prefix.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
|
||||
std::vector<double> perf;
|
||||
std::transform(times.begin(), times.end(), std::back_inserter(perf), fn);
|
||||
auto fastest = perf;
|
||||
std::sort(fastest.begin(), fastest.end(), std::greater<double>());
|
||||
for(auto x: perf){
|
||||
if(x/fastest[1] >= 1.05)
|
||||
std::cout << color_stream(FG_LIGHT_BLUE) << x << color_stream(RESET);
|
||||
else
|
||||
std::cout << x;
|
||||
std::cout << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::string str(T const & x){ return std::to_string(x); }
|
||||
|
||||
template<class T>
|
||||
void bench(sc::numeric_type dtype, std::string operation)
|
||||
{
|
||||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
//unsigned int dtsize = sc::size_of(dtype);
|
||||
sc::driver::CommandQueue & queue = sc::driver::backend::queues::get(sc::driver::backend::contexts::get_default(),0);
|
||||
auto sync = [&](){ queue.synchronize(); };
|
||||
#ifdef BENCH_CUBLAS
|
||||
auto cusync = [&](){ cudaDeviceSynchronize(); };
|
||||
#endif
|
||||
|
||||
bool on_cl = queue.backend()==sc::driver::OPENCL;
|
||||
bool on_cu = queue.backend()==sc::driver::CUDA;
|
||||
size_t dtsize = sc::size_of(dtype);
|
||||
/*---------*/
|
||||
/*--BLAS1--*/
|
||||
/*---------*/
|
||||
|
||||
if(operation=="axpy")
|
||||
{
|
||||
float alpha = 1;
|
||||
print_results_header({"N"}, on_cl, on_cu);
|
||||
for(int_t MB: std::vector<int_t>{1, 10, 100, 1000})
|
||||
{
|
||||
int_t N = MB*1e6/dtsize/3;
|
||||
std::vector<double> times;
|
||||
sc::array x(N, dtype), y(N, dtype);
|
||||
//Bench
|
||||
times.push_back(bench([&](){y = x + alpha*y;}, sync));
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(on_cl)
|
||||
times.push_back(bench([&]() {clblasAxpy(T(), N, alpha, cl(x), 0, 1, cl(y), 0, 1, 1, &cl(queue), 0, nullptr, nullptr);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
sc::copy(x, cx);
|
||||
sc::copy(y, cy);
|
||||
times.push_back(bench([&](){cblasAxpy(T(), N, alpha, cx.data(), 1, cy.data(), 1);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
if(on_cu)
|
||||
times.push_back(bench([&](){cublasAxpy(T(), N, alpha, (T*)cu(x), 1, (T*)cu(y), 1);}, cusync));
|
||||
#endif
|
||||
print_results(times, {str(MB)}, [&](double t){return MB*1e6/t;});
|
||||
}
|
||||
}
|
||||
|
||||
if(operation=="dot")
|
||||
{
|
||||
print_results_header({"MB"}, on_cl, on_cu);
|
||||
for(int_t MB: std::vector<int_t>{1, 10, 100, 1000})
|
||||
{
|
||||
int_t N = MB*1e6/dtsize/2;
|
||||
std::vector<double> times;
|
||||
sc::array x(N, dtype), y(N, dtype);
|
||||
sc::array scratch(N, dtype);
|
||||
sc::scalar s(dtype);
|
||||
//Bench
|
||||
times.push_back(bench([&](){s = dot(x,y);}, sync));
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(on_cl)
|
||||
times.push_back(bench([&]() {clblasDot(T(), N, cl(s), 0, cl(x), 0, 1, cl(y), 0, 1, cl(scratch), 1, &cl(queue), 0, nullptr, nullptr);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cx(N), cy(N);
|
||||
sc::copy(x, cx);
|
||||
sc::copy(y, cy);
|
||||
times.push_back(bench([&](){cblasDot(T(), N, cx.data(), 1, cy.data(), 1);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
if(on_cu)
|
||||
times.push_back(bench([&](){cublasDot(T(), N, (T*)cu(x), 1, (T*)cu(y), 1);}, cusync));
|
||||
#endif
|
||||
print_results(times, {str(MB)}, [&](double t){return MB*1e6/t;});
|
||||
}
|
||||
}
|
||||
|
||||
if(operation.substr(0, 4)=="gemv")
|
||||
{
|
||||
std::vector<std::tuple<std::string, std::string,int_t, int_t> > MNs;
|
||||
//Linear System
|
||||
MNs.push_back(make_tuple("Square", "N",153,153));
|
||||
MNs.push_back(make_tuple("Square", "N",1024, 1024));
|
||||
MNs.push_back(make_tuple("Square", "N",2867,2867));
|
||||
MNs.push_back(make_tuple("Square", "T",153,153));
|
||||
MNs.push_back(make_tuple("Square", "T",1024,1024));
|
||||
MNs.push_back(make_tuple("Square", "T",2867,2867));
|
||||
//Normalization
|
||||
MNs.push_back(make_tuple("Short", "N", 64, 60000));
|
||||
MNs.push_back(make_tuple("Short", "N", 256, 60000));
|
||||
MNs.push_back(make_tuple("Short", "N", 1024, 60000));
|
||||
MNs.push_back(make_tuple("Short", "T", 64, 60000));
|
||||
MNs.push_back(make_tuple("Short", "T", 256, 60000));
|
||||
MNs.push_back(make_tuple("Short", "T", 1024, 60000));
|
||||
//Householder
|
||||
MNs.push_back(make_tuple("Tall", "N", 10, 60000));
|
||||
MNs.push_back(make_tuple("Tall", "N", 30, 60000));
|
||||
MNs.push_back(make_tuple("Tall", "T", 10, 60000));
|
||||
MNs.push_back(make_tuple("Tall", "T", 30, 60000));
|
||||
|
||||
/*---------*/
|
||||
/*--BLAS2--*/
|
||||
/*---------*/
|
||||
print_results_header({"BENCH", "M", "N", "AT"}, on_cl, on_cu);
|
||||
for(auto MN: MNs)
|
||||
{
|
||||
std::vector<double> times;
|
||||
std::string name = get<0>(MN);
|
||||
std::string cAT = get<1>(MN);
|
||||
int_t M = get<2>(MN);
|
||||
int_t N = get<3>(MN);
|
||||
int_t As1 = M, As2 = N;
|
||||
bool AT = (cAT == "T");
|
||||
if(AT) std::swap(As1, As2);
|
||||
sc::array A(As1, As2, dtype), y(M, dtype), x(N, dtype);
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.stride()[1];
|
||||
#endif
|
||||
//Bench
|
||||
times.push_back(bench([&](){y = AT?dot(A.T,x):dot(A,x);}, sync));
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(on_cl)
|
||||
times.push_back(bench([&]() {clblasGemv(T(), clblasColumnMajor, AT?clblasTrans:clblasNoTrans, As1, As2, 1, cl(A), 0, lda, cl(x), 0, 1, 0, cl(y), 0, 1, 1, &cl(queue),0, nullptr, nullptr);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cA(M*N), cx(N), cy(M);
|
||||
sc::copy(x, cx);
|
||||
sc::copy(y, cy);
|
||||
sc::copy(A, cA);
|
||||
times.push_back(bench([&](){cblasGemv(T(), CblasColMajor, AT?CblasTrans:CblasNoTrans, As1, As2, 1, cA.data(), lda, cx.data(), 1, 0, cy.data(), 1);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
if(on_cu)
|
||||
times.push_back(bench([&](){cublasGemv(T(), AT?'t':'n', As1, As2, 1, (T*)cu(A), lda, (T*)cu(x), 1, 0, (T*)cu(y), 1);}, cusync));
|
||||
#endif
|
||||
print_results(times, {name, str(M), str(N), cAT}, [&](double t){ return (M*N + M + N)*dtsize/t;});
|
||||
}
|
||||
}
|
||||
|
||||
if(operation.substr(0,4)=="gemm")
|
||||
{
|
||||
std::vector<std::tuple<std::string, int_t, int_t, int_t, std::string, std::string> > MNKs;
|
||||
//DeepBench
|
||||
for(size_t MK: std::vector<size_t>{1760, 2048, 2560})
|
||||
for(size_t N: std::vector<size_t>{16, 32, 64, 128, 7000})
|
||||
MNKs.push_back(make_tuple("Deep", MK, N, MK, "N", "N"));
|
||||
for(size_t MK: std::vector<size_t>{1760, 2048, 2560})
|
||||
for(size_t N: std::vector<size_t>{16, 32, 64, 128, 7000})
|
||||
MNKs.push_back(make_tuple("Deep", MK, N, MK, "T", "N"));
|
||||
for(size_t MK: std::vector<size_t>{1760, 4096})
|
||||
MNKs.push_back(make_tuple("Deep", MK, 7133, MK, "N", "T"));
|
||||
//Covariance (e.g., ICA, 10minutes/100Hz)
|
||||
MNKs.push_back(make_tuple("Cov",32,32,60000,"N","T"));
|
||||
MNKs.push_back(make_tuple("Cov",256,256,60000,"N","T"));
|
||||
//Bi-diagonalization
|
||||
MNKs.push_back(make_tuple("Lapack",4096,4096,32,"N","T"));
|
||||
MNKs.push_back(make_tuple("Lapack",3456,3456,32,"N","T"));
|
||||
MNKs.push_back(make_tuple("Lapack",896,896,32,"N","T"));
|
||||
|
||||
print_results_header({"BENCH", "M", "N", "K", "AT", "BT"}, on_cl, on_cu);
|
||||
/*---------*/
|
||||
/*--BLAS3--*/
|
||||
/*---------*/
|
||||
for(auto MNK: MNKs)
|
||||
{
|
||||
std::vector<double> times;
|
||||
std::vector<double> tflops;
|
||||
std::string name = get<0>(MNK);
|
||||
int_t M = get<1>(MNK);
|
||||
int_t N = get<2>(MNK);
|
||||
int_t K = get<3>(MNK);
|
||||
std::string cAT = get<4>(MNK);
|
||||
std::string cBT = get<5>(MNK);
|
||||
bool AT = cAT=="T";
|
||||
bool BT = cBT=="T";
|
||||
int_t As1 = M, As2 = K;
|
||||
if(AT) std::swap(As1, As2);
|
||||
int_t Bs1 = K, Bs2 = N;
|
||||
if(BT) std::swap(Bs1, Bs2);
|
||||
sc::array C(M, N, dtype), A(As1, As2, dtype), B(Bs1, Bs2, dtype);
|
||||
#ifdef HAS_A_BLAS
|
||||
int_t lda = A.stride()[1], ldb = B.stride()[1], ldc = C.stride()[1];
|
||||
#endif
|
||||
//bench
|
||||
times.push_back(bench([&](){C = AT?(BT?dot(A.T,B.T)
|
||||
:dot(A.T,B))
|
||||
:(BT?dot(A,B.T)
|
||||
:dot(A,B));}, sync));
|
||||
#ifdef BENCH_CLBLAS
|
||||
if(on_cl)
|
||||
times.push_back(bench([&]() {clblasGemm(T(), clblasColumnMajor, AT?clblasTrans:clblasNoTrans, BT?clblasTrans:clblasNoTrans,
|
||||
M, N, K, 1, cl(A), 0, lda, cl(B), 0, ldb,
|
||||
0, cl(C), 0, ldc, 1, &cl(queue),0, nullptr, nullptr);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CBLAS
|
||||
std::vector<float> cC(M*N), cA(M*K), cB(N*K);
|
||||
sc::copy(C, cC);
|
||||
sc::copy(A, cA);
|
||||
sc::copy(B, cB);
|
||||
times.push_back(bench([&](){cblasGemm(T(), CblasColMajor, AT?CblasTrans:CblasNoTrans, BT?CblasTrans:CblasNoTrans, M, N, K, 1, cA.data(), lda, cB.data(), ldb, 1, cC.data(), ldc);}, sync));
|
||||
#endif
|
||||
#ifdef BENCH_CUBLAS
|
||||
if(on_cu)
|
||||
times.push_back(bench([&](){cublasGemm(T(), AT?'t':'n', BT?'t':'n', M, N, K, 1, (T*)cu(A), lda, (T*)cu(B), ldb, 0, (T*)cu(C), ldc);}, cusync));
|
||||
#endif
|
||||
print_results(times, {name, str(M), str(N), str(K), cAT, cBT}, [&](double t){ return 2*M*N*K/t*1e-3;});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void handle_misusage(){
|
||||
std::cerr << "Usage : blas-bench [--op {axpy, dot, gemv, gemm}] [--dtype {float32, float64}] [--device DEVICE_IDX] [--help]" << std::endl;
|
||||
std::cerr << "--op: operation to benchmark (default = gemm)" << std::endl;
|
||||
std::cerr << "--dtype: data-type to benchmark (default = float32)" << std::endl;
|
||||
std::cerr << "--device: index of isaac device in [0, ..., ndevices - 1] (default = 0)" << std::endl;
|
||||
std::cerr << "--help: display this message" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
std::string getopt(std::vector<std::string> const & args,
|
||||
std::string const & key,
|
||||
std::vector<std::string> const & set = {},
|
||||
std::string dft = "")
|
||||
{
|
||||
auto it = std::find(args.begin(), args.end(), key);
|
||||
if(it==args.end()){
|
||||
if(dft.empty())
|
||||
handle_misusage();
|
||||
return dft;
|
||||
}
|
||||
auto next = it + 1;
|
||||
if(next==args.end() || next->compare(0, 2, "--")==0)
|
||||
handle_misusage();
|
||||
if(set.size() && std::find(set.begin(), set.end(), *next)==set.end())
|
||||
handle_misusage();
|
||||
return *next;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::vector<std::string> args(argv, argv + argc);
|
||||
#ifdef BENCH_CLBLAS
|
||||
clblasSetup();
|
||||
#endif
|
||||
sc::driver::backend::default_queue_properties = CL_QUEUE_PROFILING_ENABLE;
|
||||
|
||||
if(std::find(args.begin(), args.end(), "--help") != args.end())
|
||||
handle_misusage();
|
||||
|
||||
std::string operation = getopt(args, "--op", {"axpy", "dot", "gemv", "gemm"}, "gemm");
|
||||
std::string dtype = getopt(args, "--dtype", {"float16", "float32", "float64"}, "float32");
|
||||
int device;
|
||||
try{
|
||||
device = std::stoi(getopt(args, "--device", {}, "0"));
|
||||
}catch(...){ handle_misusage(); }
|
||||
sc::driver::backend::default_device = device;
|
||||
|
||||
/* List devices */
|
||||
std::cout << "Devices available:" << std::endl;
|
||||
std::cout << "------------------" << std::endl;
|
||||
size_t i = 0;
|
||||
std::vector<sc::driver::Platform> platforms;
|
||||
sc::driver::backend::platforms(platforms);
|
||||
for(sc::driver::Platform const & pf: platforms){
|
||||
std::vector<sc::driver::Device> devices;
|
||||
pf.devices(devices);
|
||||
for(sc::driver::Device const & device: devices)
|
||||
std::cout << "[" << (i++==sc::driver::backend::default_device?"x":" ") << "]"
|
||||
<< " - " << device.name()
|
||||
<< " on " << pf.name() << std::endl;
|
||||
}
|
||||
std::cout << "------------------" << std::endl;
|
||||
|
||||
std::cout << std::fixed << std::setprecision(2);
|
||||
//if(dtype=="float16")
|
||||
// bench<half_float::half>(sc::HALF_TYPE, operation);
|
||||
if(dtype=="float32")
|
||||
bench<float>(sc::FLOAT_TYPE, operation);
|
||||
if(dtype=="float64")
|
||||
bench<double>(sc::DOUBLE_TYPE, operation);
|
||||
|
||||
#ifdef BENCH_CLBLAS
|
||||
clblasTeardown();
|
||||
#endif
|
||||
}
|
152
bench/common.hpp
152
bench/common.hpp
@@ -1,152 +0,0 @@
|
||||
#ifndef ISAAC_BENCH_COMMON_HPP_
|
||||
#define ISAAC_BENCH_COMMON_HPP_
|
||||
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include "isaac/array.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
|
||||
template<std::size_t> struct int_{};
|
||||
|
||||
template <class Tuple, size_t Pos>
|
||||
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<Pos> )
|
||||
{
|
||||
out << std::get< std::tuple_size<Tuple>::value-Pos >(t) << ',';
|
||||
return print_tuple(out, t, int_<Pos-1>());
|
||||
}
|
||||
|
||||
template <class Tuple>
|
||||
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<1> )
|
||||
{
|
||||
return out << std::get<std::tuple_size<Tuple>::value-1>(t);
|
||||
}
|
||||
|
||||
template <class... Args>
|
||||
std::ostream& operator<<(std::ostream& out, const std::tuple<Args...>& t)
|
||||
{
|
||||
print_tuple(out, t, int_<sizeof...(Args)>());
|
||||
return out;
|
||||
}
|
||||
|
||||
int ceil(int N, int pad)
|
||||
{
|
||||
return (N%pad==0)?N:(N+pad-1)/pad*pad;
|
||||
}
|
||||
|
||||
std::vector<int> create_log_range(int min, int max, int N, int pad)
|
||||
{
|
||||
std::vector<int> res(N);
|
||||
for(int i = 0 ; i < N ; ++i)
|
||||
{
|
||||
res[i] = static_cast<int>(std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N));
|
||||
res[i] = ceil(res[i], pad);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<int> create_full_range(int min, int max, int pad)
|
||||
{
|
||||
std::vector<int> N;
|
||||
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
|
||||
N.push_back(i);
|
||||
return N;
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
T median(std::vector<T> x)
|
||||
{
|
||||
size_t size = x.size();
|
||||
std::sort(x.begin(), x.end());
|
||||
if (size % 2 == 0)
|
||||
return (x[size / 2 - 1] + x[size / 2]) / 2;
|
||||
else
|
||||
return x[size / 2];
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T min(std::vector<T> x)
|
||||
{ return *std::min_element(x.begin(), x.end()); }
|
||||
|
||||
template<class T>
|
||||
T max(std::vector<T> x)
|
||||
{ return *std::max_element(x.begin(), x.end()); }
|
||||
|
||||
template<class T>
|
||||
T mean(std::vector<T> x)
|
||||
{
|
||||
T res = 0;
|
||||
int N = x.size();
|
||||
for(int i = 0 ; i < N ; ++i)
|
||||
res += x[i];
|
||||
return res/N;
|
||||
}
|
||||
|
||||
class Timer
|
||||
{
|
||||
typedef std::chrono::high_resolution_clock high_resolution_clock;
|
||||
typedef std::chrono::nanoseconds nanoseconds;
|
||||
|
||||
public:
|
||||
explicit Timer(bool run = false)
|
||||
{ if (run) start(); }
|
||||
|
||||
void start()
|
||||
{ _start = high_resolution_clock::now(); }
|
||||
|
||||
nanoseconds get() const
|
||||
{ return std::chrono::duration_cast<nanoseconds>(high_resolution_clock::now() - _start); }
|
||||
|
||||
private:
|
||||
high_resolution_clock::time_point _start;
|
||||
};
|
||||
|
||||
cl_mem& cl(sc::array& x)
|
||||
{ return x.data().handle().cl(); }
|
||||
|
||||
cl_mem& cl(sc::scalar& x)
|
||||
{ return x.data().handle().cl(); }
|
||||
|
||||
cl_command_queue& cl(sc::driver::CommandQueue& x)
|
||||
{ return x.handle().cl(); }
|
||||
|
||||
CUdeviceptr& cu(sc::array& x)
|
||||
{ return x.data().handle().cu(); }
|
||||
|
||||
CUdeviceptr& cu(sc::scalar& x)
|
||||
{ return x.data().handle().cu(); }
|
||||
|
||||
CUstream& cu(sc::driver::CommandQueue& x)
|
||||
{ return x.handle().cu(); }
|
||||
|
||||
enum Code {
|
||||
RESET = 0,
|
||||
BOLD = 1,
|
||||
ITALIC = 3,
|
||||
FG_RED = 31,
|
||||
FG_GREEN = 32,
|
||||
FG_YELLOW = 33,
|
||||
FG_BLUE = 34,
|
||||
FG_MAGENTA = 35,
|
||||
FG_CYAN = 36,
|
||||
FG_LIGHT_GRAY = 37,
|
||||
FG_DARK_GRAY = 90,
|
||||
FG_LIGHT_RED = 91,
|
||||
FG_LIGHT_GREEN = 92,
|
||||
FG_LIGHT_YELLOW = 93,
|
||||
FG_LIGHT_BLUE = 94,
|
||||
FG_LIGHT_MAGENTA = 95,
|
||||
FG_LIGHT_CYAN = 96,
|
||||
FG_WHITE = 97
|
||||
};
|
||||
class color_stream {
|
||||
Code code;
|
||||
public:
|
||||
color_stream(Code pCode) : code(pCode) {}
|
||||
friend std::ostream&
|
||||
operator<<(std::ostream& os, const color_stream& mod) {
|
||||
return os << "\033[" << mod.code << "m";
|
||||
}
|
||||
};
|
||||
#endif
|
@@ -1,54 +0,0 @@
|
||||
#include "isaac/array.h"
|
||||
#include <vector>
|
||||
|
||||
namespace sc = isaac;
|
||||
|
||||
#ifdef BENCH_CUBLAS
|
||||
__global__ void dummy(){}
|
||||
#endif
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
for(sc::driver::backend::data_type::const_iterator it = sc::driver::queues.data().begin() ; it != sc::driver::queues.data().end() ; ++it)
|
||||
{
|
||||
cl::CommandQueue queue = it->second[0];
|
||||
cl::Context context = it->first;
|
||||
cl::Device device = queue.getInfo<CL_QUEUE_DEVICE>();
|
||||
cl::Program program(context,"__kernel void dummy(){}");
|
||||
program.build();
|
||||
cl::Kernel kernel(program, "dummy");
|
||||
|
||||
cl::NDRange offset = cl::NullRange;
|
||||
cl::NDRange global(1);
|
||||
cl::NDRange local(1);
|
||||
|
||||
cl::Event event;
|
||||
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
|
||||
std::cout << "-------------------------" << std::endl;
|
||||
|
||||
queue.enqueueNDRangeKernel(kernel, offset, global, local, NULL, &event);
|
||||
queue.flush();
|
||||
queue.finish();
|
||||
|
||||
{
|
||||
long time = event.getProfilingInfo<CL_PROFILING_COMMAND_END>() - event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
|
||||
std::cout << "Kernel launch overhead: " << time << std::endl;
|
||||
}
|
||||
|
||||
#ifdef BENCH_CUBLAS
|
||||
float time;
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
dummy<<<1, 1>>>();
|
||||
cudaEventRecord(stop);
|
||||
cudaEventSynchronize(stop);
|
||||
cudaEventElapsedTime(&time, start, stop);
|
||||
std::cout << "CUDA Kernel launch overhead: " << time << std::endl;
|
||||
#endif
|
||||
std::cout << "-------------------------" << std::endl;
|
||||
}
|
||||
|
||||
}
|
@@ -1,15 +0,0 @@
|
||||
file(GLOB CLBLAS_ROOT /opt/clBLAS*)
|
||||
|
||||
set(CLBLAS_INCLUDE_HINTS "${CLBLAS_ROOT}/include")
|
||||
set(CLBLAS_LIBRARIES_HINTS "${CLBLAS_ROOT}/lib64")
|
||||
|
||||
find_path(CLBLAS_INCLUDE_DIR clBLAS.h HINTS ${CLBLAS_INCLUDE_HINTS})
|
||||
find_library(CLBLAS_LIBRARIES NAMES clBLAS HINTS ${CLBLAS_LIBRARIES_HINTS})
|
||||
|
||||
if(CLBLAS_LIBRARIES)
|
||||
set(CLBLAS_LIBRARIES ${CLBLAS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(CLBLAS DEFAULT_MSG CLBLAS_LIBRARIES CLBLAS_INCLUDE_DIR)
|
||||
mark_as_advanced(CLBLAS)
|
@@ -1,19 +0,0 @@
|
||||
file(GLOB SYSTEM_STUDIO_ROOT /opt/intel/ /opt/intel/composerxe* /opt/intel/system_studio_*)
|
||||
|
||||
find_path(MKL_INCLUDE_DIR mkl_blas.h HINTS ${SYSTEM_STUDIO_ROOT}/mkl/include/)
|
||||
find_library(MKL_LIBRARIES NAMES mkl_core HINTS ${SYSTEM_STUDIO_ROOT}/mkl/lib/intel64/)
|
||||
find_library(ICC_LIBRARIES NAMES iomp5 HINTS ${SYSTEM_STUDIO_ROOT}/compiler/lib/intel64/)
|
||||
|
||||
if(ICC_LIBRARIES)
|
||||
set(OMP_LIBRARIES ${ICC_LIBRARIES})
|
||||
else()
|
||||
set(OMP_LIBRARIES gomp)
|
||||
endif()
|
||||
|
||||
if(MKL_LIBRARIES AND OMP_LIBRARIES)
|
||||
set(MKL_LIBRARIES -lmkl_mc3 -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core ${MKL_LIBRARIES} ${OMP_LIBRARIES} pthread)
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARIES MKL_INCLUDE_DIR)
|
||||
mark_as_advanced(MKL)
|
@@ -1,10 +0,0 @@
|
||||
find_path(OPENBLAS_INCLUDE_DIR cblas.h)
|
||||
find_library(OPENBLAS_LIBRARIES NAMES openblas PATHS /lib/ /lib64/ /usr/lib /usr/lib64 /usr/local/lib /usr/local/lib64 /opt/OpenBLAS/lib $ENV{OPENBLAS_HOME}/lib)
|
||||
|
||||
if(OPENBLAS_LIBRARIES)
|
||||
set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(OpenBlas DEFAULT_MSG OPENBLAS_LIBRARIES OPENBLAS_INCLUDE_DIR)
|
||||
mark_as_advanced(OpenBlas)
|
@@ -1,30 +0,0 @@
|
||||
#Hints for finding libOpenCL
|
||||
|
||||
#OpenCL Hints
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(L_HINTS $ENV{INTELOCLSDKROOT}/lib/x64)
|
||||
else()
|
||||
set(L_HINTS $ENV{INTELOCLSDKROOT}/lib/x86)
|
||||
endif()
|
||||
|
||||
set(ANDROID_CL_GLOB_HINTS /opt/adreno-driver*/lib)
|
||||
set(X86_CL_GLOB_HINTS /opt/AMDAPPSDK*/lib/x86_64)
|
||||
|
||||
if(ANDROID)
|
||||
foreach(PATH ${ANDROID_CL_GLOB_HINTS})
|
||||
file(GLOB _TMP ${PATH})
|
||||
set(L_HINTS ${L_HINTS} ${_TMP})
|
||||
endforeach()
|
||||
find_library(OPENCL_LIBRARIES NAMES OpenCL NO_CMAKE_FIND_ROOT_PATH HINTS ${L_HINTS} )
|
||||
else()
|
||||
foreach(PATH ${X86_CL_GLOB_HINTS})
|
||||
file(GLOB _TMP ${PATH})
|
||||
set(L_HINTS ${L_HINTS} ${_TMP})
|
||||
endforeach()
|
||||
set(L_HINTS ${L_HINTS} ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib/)
|
||||
find_library(OPENCL_LIBRARIES NAMES OpenCL HINTS ${L_HINTS} )
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(OpenCL DEFAULT_MSG OPENCL_LIBRARIES)
|
||||
mark_as_advanced(OpenCL)
|
@@ -1,138 +0,0 @@
|
||||
#*********************************************************#
|
||||
#* File: Apk.cmake *
|
||||
#* Android apk tools
|
||||
#*
|
||||
#* Copyright (C) 2002-2013 The PixelLight Team (http://www.pixellight.org/)
|
||||
#*
|
||||
#* This file is part of PixelLight.
|
||||
#*
|
||||
#* Permission is hereby granted, free of charge, to any person obtaining a copy of this software
|
||||
#* and associated documentation files (the "Software"), to deal in the Software without
|
||||
#* restriction, including without limitation the rights to use, copy, modify, merge, publish,
|
||||
#* distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
|
||||
#* Software is furnished to do so, subject to the following conditions:
|
||||
#*
|
||||
#* The above copyright notice and this permission notice shall be included in all copies or
|
||||
#* substantial portions of the Software.
|
||||
#*
|
||||
#* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
|
||||
#* BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
#* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
#* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
#* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#*********************************************************#
|
||||
|
||||
|
||||
##################################################
|
||||
## Options
|
||||
##################################################
|
||||
set(ANDROID_APK_API_LEVEL "10" CACHE STRING "Android APK API level")
|
||||
set(ANDROID_APK_INSTALL "0" CACHE BOOL "Install created apk file on the device automatically?")
|
||||
set(ANDROID_APK_RUN "0" CACHE BOOL "Run created apk file on the device automatically? (installs it automatically as well, \"ANDROID_APK_INSTALL\"-option is ignored)")
|
||||
set(ANDROID_APK_SIGNER_KEYSTORE "~/my-release-key.keystore" CACHE STRING "Keystore for signing the apk file (only required for release apk)")
|
||||
set(ANDROID_APK_SIGNER_ALIAS "myalias" CACHE STRING "Alias for signing the apk file (only required for release apk)")
|
||||
|
||||
##################################################
|
||||
## Variables
|
||||
##################################################
|
||||
set(ANDROID_THIS_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) # Directory this CMake file is in
|
||||
|
||||
##################################################
|
||||
## MACRO: android_create_apk
|
||||
##
|
||||
## Create/copy Android apk related files
|
||||
##
|
||||
## @param name
|
||||
## Name of the project (e.g. "MyProject"), this will also be the name of the created apk file
|
||||
## @param apk_pacakge_name
|
||||
## Pacakge name of the application
|
||||
## @param apk_directory
|
||||
## Directory were to construct the apk file in (e.g. "${CMAKE_BINARY_DIR}/apk")
|
||||
## @param libs_directory
|
||||
## Directory where the built android libraries will be POST_BUILD, e.g ${CMAKE_SOURCE_DIR}/libs
|
||||
## @param assets_directory
|
||||
## Directory where the assets for the application are locatated
|
||||
##
|
||||
## @remarks
|
||||
## Requires the following tools to be found automatically
|
||||
## - "android" (part of the Android SDK)
|
||||
## - "adb" (part of the Android SDK)
|
||||
## - "ant" (type e.g. "sudo apt-get install ant" on your Linux system to install Ant)
|
||||
## - "jarsigner" (part of the JDK)
|
||||
## - "zipalign" (part of the Android SDK)
|
||||
##################################################
|
||||
|
||||
|
||||
macro(android_create_apk name apk_package_name apk_directory libs_directory android_directory assets_directory)
|
||||
set(ANDROID_NAME ${name})
|
||||
set(ANDROID_APK_PACKAGE ${apk_package_name})
|
||||
|
||||
# Create the directory for the libraries
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E remove_directory "${apk_directory}/libs")
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory "${apk_directory}/libs/armeabi-v7a/")
|
||||
get_property(MAINLIB TARGET ${name} PROPERTY LOCATION)
|
||||
get_property(ISAAC TARGET isaac PROPERTY LOCATION)
|
||||
add_custom_command(TARGET ${ANDROID_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "${MAINLIB}" "${apk_directory}/libs/armeabi-v7a/")
|
||||
|
||||
# Create "build.xml", "default.properties", "local.properties" and "proguard.cfg" files
|
||||
if(CMAKE_BUILD_TYPE MATCHES Release)
|
||||
set(ANDROID_APK_DEBUGGABLE "false")
|
||||
else()
|
||||
set(ANDROID_APK_DEBUGGABLE "true")
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory "${apk_directory}/res")
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${android_directory}/res" "${apk_directory}/res/")
|
||||
|
||||
configure_file("${android_directory}/AndroidManifest.xml" "${apk_directory}/AndroidManifest.xml")
|
||||
|
||||
add_custom_command(TARGET ${ANDROID_NAME} COMMAND android update project -t android-${ANDROID_APK_API_LEVEL} --name ${ANDROID_NAME} --path "${apk_directory}")
|
||||
|
||||
# Copy assets
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E remove_directory "${apk_directory}/assets")
|
||||
add_custom_command(TARGET ${ANDROID_NAME} PRE_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory "${apk_directory}/assets/")
|
||||
add_custom_command(TARGET ${ANDROID_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/assets" "${apk_directory}/assets/")
|
||||
|
||||
# Build the apk file
|
||||
if(CMAKE_BUILD_TYPE MATCHES Release)
|
||||
# Let Ant create the unsigned apk file
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND ant release
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
|
||||
# Sign the apk file
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND jarsigner -verbose -keystore ${ANDROID_APK_SIGNER_KEYSTORE} bin/${ANDROID_NAME}-unsigned.apk ${ANDROID_APK_SIGNER_ALIAS}
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
|
||||
# Align the apk file
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND zipalign -v -f 4 bin/${ANDROID_NAME}-unsigned.apk bin/${ANDROID_NAME}.apk
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
|
||||
# Install current version on the device/emulator
|
||||
if(ANDROID_APK_INSTALL OR ANDROID_APK_RUN)
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND adb install -r bin/${ANDROID_NAME}.apk
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
endif()
|
||||
else()
|
||||
# Let Ant create the unsigned apk file
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND ant debug
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
|
||||
# Install current version on the device/emulator
|
||||
if(ANDROID_APK_INSTALL OR ANDROID_APK_RUN)
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND adb install -r bin/${ANDROID_NAME}-debug.apk
|
||||
WORKING_DIRECTORY "${apk_directory}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Start the application
|
||||
if(ANDROID_APK_RUN)
|
||||
add_custom_command(TARGET ${ANDROID_NAME}
|
||||
COMMAND adb shell am start -n ${ANDROID_APK_PACKAGE}/android.app.NativeActivity)
|
||||
endif()
|
||||
endmacro(android_create_apk name apk_directory libs_directory assets_directory)
|
@@ -1 +0,0 @@
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchain/android.cmake -DANDROID_NDK=/opt/android-ndk-r10d/ -DANDROID_ABI=armeabi-v7a with NEON -DANDROID_NATIVE_API_LEVEL=19 -DANDROID_APK_API_LEVEL=19 -DANDROID_APK_RUN=1 ../
|
@@ -1,61 +0,0 @@
|
||||
#Copyright (c) 2014, ArrayFire
|
||||
#All rights reserved.
|
||||
|
||||
# Function to turn an OpenCL source file into a C string within a source file.
|
||||
# xxd uses its input's filename to name the string and its length, so we
|
||||
# need to move them to a name that depends only on the path output, not its
|
||||
# input. Otherwise, builds in different relative locations would put the
|
||||
# source into different variable names, and everything would fall over.
|
||||
# The actual name will be filename (.s replaced with underscores), and length
|
||||
# name_len.
|
||||
#
|
||||
# Usage example:
|
||||
#
|
||||
# set(KERNELS a.cl b/c.cl)
|
||||
# resource_to_cxx_source(
|
||||
# SOURCES ${KERNELS}
|
||||
# VARNAME OUTPUTS
|
||||
# )
|
||||
# add_executable(foo ${OUTPUTS})
|
||||
#
|
||||
# The namespace they are placed in is taken from filename.namespace.
|
||||
#
|
||||
# For example, if the input file is kernel.cl, the two variables will be
|
||||
# unsigned char ns::kernel_cl[];
|
||||
# unsigned int ns::kernel_cl_len;
|
||||
#
|
||||
# where ns is the contents of kernel.cl.namespace.
|
||||
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(BIN2CPP_PROGRAM "bin2cpp")
|
||||
|
||||
function(CODE_TO_H)
|
||||
cmake_parse_arguments(ARGS "" "VARNAME;EXTENSION;OUTPUT_DIR;TARGET;NAMESPACE;EOF" "SOURCES" ${ARGN})
|
||||
|
||||
set(_output_files "")
|
||||
foreach(_input_file ${ARGS_SOURCES})
|
||||
get_filename_component(_path "${_input_file}" PATH)
|
||||
get_filename_component(_name "${_input_file}" NAME)
|
||||
get_filename_component(_name_we "${_input_file}" NAME_WE)
|
||||
set(var_name ${_name_we})
|
||||
|
||||
set(_namespace "${ARGS_NAMESPACE}")
|
||||
string(REPLACE "." "_" var_name ${var_name})
|
||||
|
||||
set(_output_path "${ARGS_OUTPUT_DIR}")
|
||||
set(_output_file "${_output_path}/${_name_we}.${ARGS_EXTENSION}")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${_output_file}
|
||||
DEPENDS ${_input_file} ${BIN2CPP_PROGRAM}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${_output_path}"
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\<${_path}/${_name_we}.hpp\\>" >>"${_output_file}"
|
||||
COMMAND ${BIN2CPP_PROGRAM} --file ${_name} --namespace ${_namespace} --output ${_output_file} --name ${var_name} --eof ${ARGS_EOF} --extension ${ARGS_EXTENSION}
|
||||
WORKING_DIRECTORY "${_path}"
|
||||
COMMENT "Compiling ${_input_file} to C++ source"
|
||||
)
|
||||
list(APPEND _output_files ${_output_file})
|
||||
endforeach()
|
||||
add_custom_target(${ARGS_TARGET} ALL DEPENDS ${_output_files})
|
||||
endfunction()
|
@@ -1,194 +0,0 @@
|
||||
// Copyright (c) 2014, ArrayFire
|
||||
// All rights reserved.
|
||||
// Umar Arshad
|
||||
// Copyright 2014
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
typedef map<string, string> opt_t;
|
||||
|
||||
static
|
||||
void print_usage() {
|
||||
cout << R"delimiter(BIN2CPP
|
||||
Converts files from a binary file to C++ headers. It is similar to bin2c and
|
||||
xxd but adds support for namespaces.
|
||||
|
||||
| --name | name of the variable (default: var) |
|
||||
| --file | input file |
|
||||
| --output | output file (If no output is specified then it prints to stdout |
|
||||
| --type | Type of variable (default: char) |
|
||||
| --namespace | A space seperated list of namespaces |
|
||||
| --formatted | Tabs for formatting |
|
||||
| --version | Prints my name |
|
||||
| --help | Prints usage info |
|
||||
|
||||
Example
|
||||
-------
|
||||
Command:
|
||||
./bin2cpp --file blah.txt --namespace blah detail --formatted --name blah_var
|
||||
|
||||
Will produce:
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
namespace blah {
|
||||
namespace detail {
|
||||
static const char blah_var[] = {
|
||||
0x2f, 0x2f, 0x20, 0x62, 0x6c, 0x61, 0x68, 0x2e, 0x74, 0x78,
|
||||
0x74, 0xa, 0x62, 0x6c, 0x61, 0x68, 0x20, 0x62, 0x6c, 0x61,
|
||||
0x68, 0x20, 0x62, 0x6c, 0x61, 0x68, 0xa, };
|
||||
static const size_t blah_var_len = 27;
|
||||
}
|
||||
})delimiter";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static bool formatted;
|
||||
|
||||
static
|
||||
void add_tabs(const int level ){
|
||||
if(formatted) {
|
||||
for(int i =0; i < level; i++) {
|
||||
cout << "\t";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
opt_t
|
||||
parse_options(const vector<string>& args) {
|
||||
opt_t options;
|
||||
|
||||
options["--name"] = "";
|
||||
options["--type"] = "";
|
||||
options["--file"] = "";
|
||||
options["--output"] = "";
|
||||
options["--extension"] = "";
|
||||
options["--namespace"] = "";
|
||||
options["--eof"] = "";
|
||||
|
||||
//Parse Arguments
|
||||
string curr_opt;
|
||||
bool verbose = false;
|
||||
for(auto arg : args) {
|
||||
if(arg == "--verbose") {
|
||||
verbose = true;
|
||||
}
|
||||
else if(arg == "--formatted") {
|
||||
formatted = true;
|
||||
}
|
||||
else if(arg == "--version") {
|
||||
cout << args[0] << " By Umar Arshad" << endl;
|
||||
}
|
||||
else if(arg == "--help") {
|
||||
print_usage();
|
||||
}
|
||||
else if(options.find(arg) != options.end()) {
|
||||
curr_opt = arg;
|
||||
}
|
||||
else if(curr_opt.empty()) {
|
||||
//cerr << "Invalid Argument: " << arg << endl;
|
||||
}
|
||||
else {
|
||||
if(options[curr_opt] != "") {
|
||||
options[curr_opt] += " " + arg;
|
||||
}
|
||||
else {
|
||||
options[curr_opt] += arg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(verbose) {
|
||||
for(auto opts : options) {
|
||||
cout << get<0>(opts) << " " << get<1>(opts) << endl;
|
||||
}
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
int main(int argc, const char * const * const argv)
|
||||
{
|
||||
|
||||
vector<string> args(argv, argv+argc);
|
||||
|
||||
opt_t&& options = parse_options(args);
|
||||
|
||||
//Save default cout buffer. Need this to prevent crash.
|
||||
auto bak = cout.rdbuf();
|
||||
unique_ptr<ofstream> outfile;
|
||||
|
||||
// Set defaults
|
||||
if(options["--name"] == "") { options["--name"] = "var"; }
|
||||
if(options["--output"] != "") {
|
||||
//redirect stream if output file is specified
|
||||
outfile.reset(new ofstream(options["--output"]));
|
||||
cout.rdbuf(outfile->rdbuf());
|
||||
}
|
||||
|
||||
if(options["--extension"] != "cpp")
|
||||
cout << "#pragma once\n";
|
||||
cout << "\n";
|
||||
cout << "#include <cstddef>\n"; // defines size_t
|
||||
cout << "\n";
|
||||
int ns_cnt = 0;
|
||||
int level = 0;
|
||||
if(options["--namespace"] != "") {
|
||||
std::stringstream namespaces(options["--namespace"]);
|
||||
string name;
|
||||
namespaces >> name;
|
||||
do {
|
||||
add_tabs(level++);
|
||||
cout << "namespace " << name << "\n";
|
||||
cout << "{\n";
|
||||
ns_cnt++;
|
||||
namespaces >> name;
|
||||
} while(!namespaces.fail());
|
||||
}
|
||||
|
||||
if(options["--type"] == "") {
|
||||
options["--type"] = "char";
|
||||
}
|
||||
add_tabs(level);
|
||||
cout << "\n";
|
||||
cout << "static const " << options["--type"] << " " << options["--name"] << "[] = {\n";
|
||||
|
||||
|
||||
ifstream input(options["--file"]);
|
||||
size_t char_cnt = 0;
|
||||
add_tabs(++level);
|
||||
for(char i; input.get(i);) {
|
||||
cout << "0x" << std::hex << static_cast<int>(i) << ",\t";
|
||||
char_cnt++;
|
||||
if(!(char_cnt % 10)) {
|
||||
cout << endl;
|
||||
add_tabs(level);
|
||||
}
|
||||
}
|
||||
|
||||
if (options["--eof"].c_str()[0] == '1') {
|
||||
// Add end of file character
|
||||
cout << "0x0";
|
||||
char_cnt++;
|
||||
}
|
||||
|
||||
cout << "};\n";
|
||||
add_tabs(--level);
|
||||
cout << "\n";
|
||||
cout << "static const std::size_t " << options["--name"] << "_len" << " = " << std::dec << char_cnt << ";\n";
|
||||
cout << "\n";
|
||||
|
||||
while(ns_cnt--) {
|
||||
add_tabs(--level);
|
||||
cout << "}\n";
|
||||
}
|
||||
cout.rdbuf(bak);
|
||||
}
|
@@ -1,130 +0,0 @@
|
||||
#Thanks to Andreas Knoeckler for providing stand-alone boost.python
|
||||
#through PyOpenCL and PyCUDA
|
||||
|
||||
import os, sys
|
||||
from distutils.ccompiler import show_compilers,new_compiler
|
||||
from distutils.command.build_ext import build_ext
|
||||
from distutils.command.build_py import build_py
|
||||
from distutils.core import setup, Extension
|
||||
from distutils.sysconfig import get_python_inc
|
||||
from distutils import sysconfig
|
||||
from imp import find_module
|
||||
from glob import glob
|
||||
from os.path import dirname
|
||||
|
||||
platform_cflags = {}
|
||||
platform_ldflags = {}
|
||||
platform_libs = {}
|
||||
|
||||
class build_ext_subclass(build_ext):
|
||||
def build_extensions(self):
|
||||
c = self.compiler.compiler_type
|
||||
if c in platform_cflags.keys():
|
||||
for e in self.extensions:
|
||||
e.extra_compile_args = platform_cflags[c]
|
||||
if c in platform_ldflags.keys():
|
||||
for e in self.extensions:
|
||||
e.extra_link_args = platform_ldflags[c]
|
||||
if c in platform_libs.keys():
|
||||
for e in self.extensions:
|
||||
try:
|
||||
e.libraries += platform_libs[c]
|
||||
except:
|
||||
e.libraries = platform_libs[c]
|
||||
build_ext.build_extensions(self)
|
||||
|
||||
def main():
|
||||
|
||||
def recursive_glob(rootdir='.', suffix=''):
|
||||
return [os.path.join(looproot, filename)
|
||||
for looproot, _, filenames in os.walk(rootdir)
|
||||
for filename in filenames if filename.endswith(suffix)]
|
||||
|
||||
def remove_prefixes(optlist, bad_prefixes):
|
||||
for bad_prefix in bad_prefixes:
|
||||
for i, flag in enumerate(optlist):
|
||||
if flag.startswith(bad_prefix):
|
||||
optlist.pop(i)
|
||||
break
|
||||
return optlist
|
||||
|
||||
#Tweaks warning, because boost-numpy and boost-python won't compile cleanly without these changes
|
||||
cvars = sysconfig.get_config_vars()
|
||||
cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-Wstrict-prototypes']))
|
||||
cvars["CFLAGS"] = cvars["BASECFLAGS"] + ' ' + cvars['OPT']
|
||||
cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"]
|
||||
|
||||
#Check Android
|
||||
for_android = '-mandroid' in cvars['PY_CFLAGS']
|
||||
|
||||
#Dynamic load for backend switching
|
||||
libraries = ['dl']
|
||||
library_dirs = []
|
||||
|
||||
#Include directories
|
||||
numpy_include = os.path.join(find_module("numpy")[1], "core", "include")
|
||||
include ='${INCLUDE_DIRECTORIES_STR}'.split() + ['external/boost/', 'external/boost/boost/', numpy_include]
|
||||
|
||||
#Android
|
||||
if for_android:
|
||||
ANDROID_ROOT = os.environ['ANDROIDNDK'] + '/sources/cxx-stl/gnu-libstdc++/' + os.environ['TOOLCHAIN_VERSION']
|
||||
library_dirs += [ANDROID_ROOT + '/libs/armeabi']
|
||||
include += [ANDROID_ROOT + '/include/', ANDROID_ROOT + '/libs/armeabi/include/']
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = '${LIBISAAC_SRC_STR}'.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
||||
|
||||
extensions = []
|
||||
|
||||
#isaac
|
||||
extensions += [Extension(
|
||||
'_isaac',src,
|
||||
extra_compile_args= ['-std=c++11', '-Wno-unused-function', '-Wno-unused-local-typedefs', '-Wno-sign-compare', '-Wno-attributes', '-DBOOST_PYTHON_SOURCE '],
|
||||
extra_link_args=['-Wl,-soname=_isaac.so'],
|
||||
undef_macros=[],
|
||||
include_dirs=include,
|
||||
library_dirs=library_dirs,
|
||||
libraries=libraries)]
|
||||
|
||||
#External
|
||||
extensions += [Extension('external.sklearn._tree',
|
||||
['external/sklearn/_tree.c'],
|
||||
include_dirs = [numpy_include])]
|
||||
|
||||
#Setup
|
||||
setup(
|
||||
name='isaac',
|
||||
version='1.0',
|
||||
description="Input-specific architecture-aware computations",
|
||||
author='Philippe Tillet',
|
||||
author_email='ptillet@g.harvard.edu',
|
||||
license='MPL 2.0',
|
||||
packages=['isaac', 'isaac.external', 'isaac.external.sklearn'],
|
||||
ext_package="isaac",
|
||||
ext_modules=extensions,
|
||||
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},
|
||||
classifiers=[
|
||||
'Environment :: Console',
|
||||
'Development Status :: 1 - Experimental',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Other Audience',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Natural Language :: English',
|
||||
'Programming Language :: C++',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Topic :: Scientific/Engineering',
|
||||
'Topic :: Scientific/Engineering :: Mathematics',
|
||||
'Topic :: Scientific/Engineering :: Physics',
|
||||
'Topic :: Scientific/Engineering :: Machine Learning',
|
||||
]
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
#System
|
||||
SET(CMAKE_SYSTEM_NAME Windows)
|
||||
#Compilers
|
||||
SET(CMAKE_C_COMPILER /usr/bin/i686-w64-mingw32-gcc)
|
||||
SET(CMAKE_CXX_COMPILER /usr/bin/i686-w64-mingw32-g++)
|
||||
SET(CMAKE_RC_COMPILER /usr/bin/i686-w64-mingw32-windres)
|
||||
# search headers and libraries in the target environment, search
|
||||
# programs in the host environment
|
||||
SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/i686-w64-mingw32)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
@@ -1,12 +0,0 @@
|
||||
#System
|
||||
SET(CMAKE_SYSTEM_NAME Windows)
|
||||
#Compilers
|
||||
SET(CMAKE_C_COMPILER /usr/bin/x86_64-w64-mingw32-gcc)
|
||||
SET(CMAKE_CXX_COMPILER /usr/bin/x86_64-w64-mingw32-g++)
|
||||
SET(CMAKE_RC_COMPILER /usr/bin/x86_64-w64-mingw32-windres)
|
||||
# search headers and libraries in the target environment, search
|
||||
# programs in the host environment
|
||||
SET(CMAKE_FIND_ROOT_PATH "${CMAKE_CURRENT_LIST_DIR}/crossdeps" /usr/x86_64-w64-mingw32)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
@@ -2,6 +2,6 @@ for i in $(find ../lib/ ../include/isaac/ ../python/src/bind -name '*.cpp' -or -
|
||||
do
|
||||
if ! grep -q Copyright $i
|
||||
then
|
||||
cat license-header.txt $i >$i.new && mv $i.new $i
|
||||
cat ../LICENSE $i >$i.new && mv $i.new $i
|
||||
fi
|
||||
done
|
||||
|
BIN
documentation/bench/CONV.pdf
Normal file
BIN
documentation/bench/CONV.pdf
Normal file
Binary file not shown.
BIN
documentation/bench/CONV.png
Normal file
BIN
documentation/bench/CONV.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.2 KiB |
BIN
documentation/bench/GEMM.pdf
Normal file
BIN
documentation/bench/GEMM.pdf
Normal file
Binary file not shown.
BIN
documentation/bench/GEMM.png
Normal file
BIN
documentation/bench/GEMM.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
Binary file not shown.
Before Width: | Height: | Size: 37 KiB |
Binary file not shown.
Before Width: | Height: | Size: 40 KiB |
@@ -1,69 +0,0 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
def add_line(ax, xpos, ypos, height=.1):
|
||||
line = plt.Line2D([xpos, xpos], [ypos + height, ypos],
|
||||
transform=ax.transAxes, color='black')
|
||||
line.set_clip_on(False)
|
||||
ax.add_line(line)
|
||||
|
||||
bench = [('DeepBench-Forward\nM=K=1760', 'N'),
|
||||
('DeepBench-Backward\nM=K=2560', 'N'),
|
||||
('Covariance\nK=60000', 'M=N'),
|
||||
('Blocked SVD\nK=32', 'M=N')]
|
||||
|
||||
labels = [[16, 32, 64, 128, 7000],
|
||||
[16, 32, 64, 128, 7000],
|
||||
[32, 256],
|
||||
[896, 3456, 4096]]
|
||||
|
||||
configs = {
|
||||
'Pascal Titan X': {'lib': 'cuBLAS',
|
||||
'libperf': [1.65, 1.88, 2.58, 4.83, 11.5,
|
||||
0.72, 1.72, 2.39, 2.86, 7.77,
|
||||
0.80, 3.61,
|
||||
1.37, 2.50, 2.57],
|
||||
'libcol': 'green',
|
||||
'scperf': [1.15, 2.43, 3.83, 5.53, 11.5,
|
||||
1.78, 3.06, 4.37, 5.52, 8.67,
|
||||
1.44, 6.43,
|
||||
1.14, 4.53, 4.91]},
|
||||
|
||||
'R9 Fury': {'lib': 'clBLAS',
|
||||
'libperf': [0.22, 0.65, 1.35, 1.92, 3.35,
|
||||
0.28, 0.64, 1.36, 1.91, 3.32,
|
||||
0.02, 0.87,
|
||||
0.43, 0.98, 1.95],
|
||||
'libcol': '#d30034',
|
||||
'scperf': [0.67, 0.94, 1.18, 2.12, 4.66,
|
||||
0.63, 1.15, 1.43, 1.82, 4.22,
|
||||
0.19, 2.82,
|
||||
0.35, 1.82, 1.80]}
|
||||
}
|
||||
|
||||
for device, conf in configs.iteritems():
|
||||
width = 0.5
|
||||
sep = 1.3
|
||||
xx = sep*np.arange(len(conf['scperf'])) + width
|
||||
groups = [0] + [len(_) for _ in labels]
|
||||
for i in np.cumsum(groups)[:-1]:
|
||||
xx[i:] += sep
|
||||
xmax = xx[-1] + width + sep
|
||||
figure, ax = plt.subplots(figsize=(12,8))
|
||||
sc = ax.bar(xx - width, conf['scperf'], width, color='purple')
|
||||
cu = ax.bar(xx, conf['libperf'], width, color=conf['libcol'])
|
||||
linex = [(xx[i] - sep) for i in np.cumsum(groups)[1:-1]]
|
||||
linex = [0] + linex + [xmax]
|
||||
for i in range(len(linex)-1):
|
||||
group, sublabel = bench[i]
|
||||
add_line(ax, linex[i]/xmax, 0, -10)
|
||||
ax.text(.5*(linex[i] + linex[i+1])/xmax, -.12, group, ha='center', transform=ax.transAxes, fontsize = 10, color='darkblue')
|
||||
ax.text(.5*(linex[i] + linex[i+1])/xmax, -.07, sublabel, ha='center', transform=ax.transAxes, fontsize = 10)
|
||||
ax.set_xlim((0,xmax))
|
||||
ax.set_xticks(xx)
|
||||
ax.set_xticklabels([x for _ in labels for x in _ ], rotation=30, fontsize=10)
|
||||
ax.set_ylabel('TFLOPS')
|
||||
ax.legend((sc, cu), ('ISAAC', conf['lib']))
|
||||
ax.set_title('sGEMM - {}'.format(device))
|
||||
plt.savefig('bench-{}.png'.format(conf['lib']))
|
||||
plt.show()
|
@@ -1,21 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
@@ -1,4 +1,6 @@
|
||||
foreach(PROG indexing)
|
||||
add_executable(example-${PROG} ${PROG}.cpp)
|
||||
target_link_libraries(example-${PROG} isaac)
|
||||
foreach(PROG bench)
|
||||
add_executable(${PROG} ${PROG}.cpp)
|
||||
set_target_properties(${PROG} PROPERTIES OUTPUT_NAME ${PROG})
|
||||
include_directories(/usr/local/cuda/include/)
|
||||
target_link_libraries(${PROG} PRIVATE isaac)
|
||||
endforeach(PROG)
|
||||
|
181
examples/bench.cpp
Normal file
181
examples/bench.cpp
Normal file
@@ -0,0 +1,181 @@
|
||||
#include <tuple>
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/driver/cublas.h"
|
||||
#include "isaac/driver/context.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "isaac/driver/stream.h"
|
||||
#include "isaac/tools/bench.hpp"
|
||||
#include "isaac/api.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
namespace drv = sc::driver;
|
||||
using sc::param_t;
|
||||
using std::make_tuple;
|
||||
|
||||
double geometric_mean(std::vector<double> const&data){
|
||||
double logsum = std::accumulate(data.begin(), data.end(),
|
||||
(double)0, [](double acc, double x){ return acc + std::log(x);});
|
||||
return std::exp(logsum/data.size());
|
||||
}
|
||||
|
||||
void print_results_header(std::vector<std::string> sections){
|
||||
std::cout << color_stream(ITALIC) << color_stream(BOLD) ;
|
||||
std::copy(sections.begin(), sections.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
|
||||
std::cout << "ISAAC\tcuDNN";
|
||||
std::cout << color_stream(RESET) << std::endl;
|
||||
}
|
||||
|
||||
void print_results(std::vector<double> const & times, std::vector<std::string> const & prefix, std::function<double(double)> fn){
|
||||
std::copy(prefix.begin(), prefix.end(), std::ostream_iterator<std::string>(std::cout, "\t"));
|
||||
std::vector<double> perf;
|
||||
std::transform(times.begin(), times.end(), std::back_inserter(perf), fn);
|
||||
auto fastest = perf;
|
||||
std::sort(fastest.begin(), fastest.end(), std::greater<double>());
|
||||
for(auto x: perf){
|
||||
if(x/fastest[1] >= 1.05)
|
||||
std::cout << color_stream(FG_LIGHT_BLUE) << x << color_stream(RESET);
|
||||
else
|
||||
std::cout << x;
|
||||
std::cout << "\t";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
int main(){
|
||||
std::cout << std::fixed << std::setprecision(2);
|
||||
auto ctx = drv::backend::contexts::get_default();
|
||||
drv::Stream stream(ctx);
|
||||
sc::DType dtype = sc::FLOAT_TYPE;
|
||||
int32_t dtsize = sc::size_of(dtype);
|
||||
drv::Device const & device = drv::backend::contexts::get_default().device();
|
||||
|
||||
{
|
||||
typedef std::tuple<param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t> conv_tuple;
|
||||
std::vector<conv_tuple> shapes;
|
||||
//Cluster 1
|
||||
for(size_t N: std::vector<size_t>{4, 8, 16, 32})
|
||||
shapes.push_back(std::make_tuple(700, 161, 1, N, 32, 5, 20, 0, 0, 2, 2));
|
||||
//Cluster 2
|
||||
for(size_t N: std::vector<size_t>{4, 8, 16, 32})
|
||||
shapes.push_back(std::make_tuple(341, 79, 32, N, 32, 5, 10, 0, 0, 2, 2));
|
||||
//Cluster 3
|
||||
shapes.push_back(std::make_tuple(480, 48, 1, 16, 16, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(240, 24, 16, 16, 32, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(120, 12, 32, 16, 64, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(60, 6, 64, 16, 128, 3, 3, 1, 1, 1, 1));
|
||||
//Cluster 4
|
||||
shapes.push_back(std::make_tuple(108, 108, 3, 8, 64, 3, 3, 1, 1, 2, 2));
|
||||
shapes.push_back(std::make_tuple(54, 54, 64, 8, 64, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(27, 27, 128, 8, 128, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(14, 14, 128, 8, 256, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(7, 7, 256, 8, 512, 3, 3, 1, 1, 1, 1));
|
||||
//Cluster 5-6
|
||||
for(size_t N: std::vector<size_t>{8, 16}){
|
||||
shapes.push_back(std::make_tuple(224, 224, 3, N, 64, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(112, 112, 64, N, 128, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(56, 56, 128, N, 256, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(28, 28, 256, N, 512, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(14, 14, 512, N, 512, 3, 3, 1, 1, 1, 1));
|
||||
shapes.push_back(std::make_tuple(7, 7, 512, N, 512, 3, 3, 1, 1, 1, 1));
|
||||
}
|
||||
//Cluster 7
|
||||
shapes.push_back(std::make_tuple(224, 224, 3, 16, 64, 7, 7, 3, 3, 2, 2));
|
||||
shapes.push_back(std::make_tuple(28, 28, 192, 16, 32, 5, 5, 2, 2, 1, 1));
|
||||
shapes.push_back(std::make_tuple(28, 28, 192, 16, 64, 1, 1, 0, 0, 1, 1));
|
||||
shapes.push_back(std::make_tuple(14, 14, 512, 16, 48, 5, 5, 2, 2, 1, 1));
|
||||
shapes.push_back(std::make_tuple(14, 14, 512, 16, 192, 1, 1, 0, 0, 1, 1));
|
||||
shapes.push_back(std::make_tuple(7, 7, 832, 16, 256, 1, 1, 0, 0, 1, 1));
|
||||
shapes.push_back(std::make_tuple(7, 7, 832, 16, 128, 5, 5, 2, 2, 1, 1));
|
||||
|
||||
param_t W, H, P, Q, C, N, K, R, S, pad_h, pad_w, stride_h, stride_w;
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
std::cout << "FCONV" << std::endl;
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
print_results_header({"N", "K", "P", "Q", "C", "R", "S"});
|
||||
std::vector<double> speedup;
|
||||
for(auto shape: shapes){
|
||||
std::tie(W, H, C, N, K, R, S, pad_h, pad_w, stride_h, stride_w) = shape;
|
||||
P = (H - R + 1 + 2*pad_h)/stride_h;
|
||||
Q = (W - S + 1 + 2*pad_w)/stride_w;
|
||||
|
||||
sc::scalar alpha(1., dtype);
|
||||
sc::scalar beta(0., dtype);
|
||||
|
||||
drv::Buffer O(ctx, N*K*P*Q*dtsize);
|
||||
drv::Buffer I(ctx, C*H*W*N*dtsize);
|
||||
drv::Buffer F(ctx, K*C*R*S*dtsize);
|
||||
|
||||
std::vector<double> times;
|
||||
times.push_back(bench([&](){ sc::CONV(device, stream, dtype, N, K, P, Q, C, R, S, H, W, pad_h, pad_w, stride_h, stride_w, alpha, I, F, beta, O); }, [&](){ stream.synchronize(); }, device));
|
||||
times.push_back(bench([&](){ sc::driver::cudnnConv(dtype, ctx, stream, H, W, N, K, P, Q, C, R, S, pad_h, pad_w, stride_h, stride_w, alpha, I, F, beta, O); }, [&](){ stream.synchronize(); }, device));
|
||||
speedup.push_back(times[1]/times[0]);
|
||||
print_results(times, {str(N), str(K), str(P), str(Q), str(C), str(R), str(S)}, [&](double tsec){ return sc::templates::Conv::tflops(P,Q,K,N,C,R,S,tsec);});
|
||||
}
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
std::cout << "Speedup: " << geometric_mean(speedup) << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
//GEMM
|
||||
{
|
||||
typedef std::tuple<sc::IsaacOperation_t, sc::IsaacOperation_t, param_t, param_t, param_t> gemm_tuple;
|
||||
std::vector<gemm_tuple> shapes;
|
||||
|
||||
// LinPack
|
||||
for(param_t N: std::vector<param_t>{512, 1024, 2048})
|
||||
shapes.push_back(std::make_tuple(sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, N));
|
||||
|
||||
// DeepBench [Forward]
|
||||
for(param_t M: std::vector<param_t>{1760})
|
||||
for(param_t N: std::vector<param_t>{8, 16, 32, 64, 128})
|
||||
shapes.push_back(std::make_tuple(sc::ISAAC_OP_N, sc::ISAAC_OP_N, M, N, M));
|
||||
|
||||
// DeepBench [Backward]
|
||||
for(param_t M: std::vector<param_t>{1760})
|
||||
for(param_t N: std::vector<param_t>{8, 16, 32, 64, 128})
|
||||
shapes.push_back(std::make_tuple(sc::ISAAC_OP_T, sc::ISAAC_OP_N, M, N, M));
|
||||
|
||||
// PCA/ICA
|
||||
for(param_t N: std::vector<param_t>{16, 64, 256})
|
||||
for(param_t K: std::vector<param_t>{64000})
|
||||
shapes.push_back(std::make_tuple(sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, K));
|
||||
|
||||
// LaPACK
|
||||
for(param_t N: std::vector<param_t>{1024, 2048, 4096})
|
||||
for(param_t K: std::vector<param_t>{32})
|
||||
shapes.push_back(std::make_tuple(sc::ISAAC_OP_N, sc::ISAAC_OP_T, N, N, K));
|
||||
|
||||
sc::IsaacOperation_t AT, BT;
|
||||
param_t M, N, K;
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
std::cout << "GEMM:" << std::endl;
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
print_results_header({"AT", "BT", "M", "N", "K"});
|
||||
std::vector<double> speedup;
|
||||
for(auto shape: shapes){
|
||||
std::tie(AT, BT, M, N, K) = shape;
|
||||
sc::scalar alpha(1., dtype);
|
||||
sc::scalar beta(0., dtype);
|
||||
|
||||
size_t ldc = M;
|
||||
size_t lda = (AT==sc::ISAAC_OP_N)?M:K;
|
||||
size_t ldb = (BT==sc::ISAAC_OP_N)?K:N;
|
||||
|
||||
char cuAT = (AT==sc::ISAAC_OP_T)?'T':'N';
|
||||
char cuBT = (BT==sc::ISAAC_OP_T)?'T':'N';
|
||||
|
||||
drv::Buffer C(ctx, M*N*dtsize);
|
||||
drv::Buffer A(ctx, M*K*dtsize);
|
||||
drv::Buffer B(ctx, K*N*dtsize);
|
||||
|
||||
std::vector<double> times;
|
||||
times.push_back(bench([&](){ sc::GEMM(device, stream, dtype, AT, BT, M, N, K, 0, lda, 0, ldb, 0, ldc, alpha, A, B, beta, C); }, [&](){ stream.synchronize(); }, device));
|
||||
times.push_back(bench([&](){ sc::driver::cublasGemm(dtype, ctx, stream, cuAT, cuBT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); }, [&](){ stream.synchronize(); }, device));
|
||||
speedup.push_back(times[1]/times[0]);
|
||||
print_results(times, {str(AT), str(BT), str(M), str(N), str(K)}, [&](double tsec){ return sc::templates::GEMM::tflops(M, N, K, tsec);});
|
||||
}
|
||||
std::cout << "======================================================================" << std::endl;
|
||||
std::cout << "Speedup: " << geometric_mean(speedup) << std::endl;
|
||||
}
|
||||
}
|
@@ -1,60 +0,0 @@
|
||||
#include "isaac/array.h"
|
||||
#include "isaac/symbolic/scheduler/dag.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
|
||||
class carma_generator
|
||||
{
|
||||
void apply_impl(sc::array_base const & A, sc::array_base const & B, sc::view C, size_t depth)
|
||||
{
|
||||
if(depth>=split_.size()){
|
||||
dag_.append(sc::assign(C, sc::dot(A, B)), "C = dot(A, B)");
|
||||
}
|
||||
else
|
||||
{
|
||||
sc::int_t M = C.shape()[0], N = C.shape()[1], K = A.shape()[1];
|
||||
size_t new_depth = depth + 1;
|
||||
//Split along M
|
||||
if(M >= N && M >= K){
|
||||
apply_impl(A({0, M/2}, {sc::all}), B, C({0, M/2}, sc::all), new_depth);
|
||||
apply_impl(A({M/2, sc::end}, {sc::all}), B, C({M/2, sc::end}, sc::all), new_depth);
|
||||
}
|
||||
//Split along N
|
||||
else if(N >= M && N >= K){
|
||||
apply_impl(A, B(sc::all, {0, N/2}), C(sc::all, {0, N/2}), new_depth);
|
||||
apply_impl(A, B(sc::all, {N/2, sc::end}), C(sc::all, {N/2, sc::end}), new_depth);
|
||||
}
|
||||
//Split along K
|
||||
else{
|
||||
sc::array_base & C1 = dag_.create_temporary(new sc::array(C.shape(), C.dtype(), C.context()));
|
||||
sc::array_base & C2 = dag_.create_temporary(new sc::array(C.shape(), C.dtype(), C.context()));
|
||||
apply_impl(A(sc::all, {0, K/2}), B({0, K/2}, sc::all), C1, new_depth);
|
||||
apply_impl(A(sc::all, {K/2, sc::end}), B({K/2, sc::end}, sc::all), C2, new_depth);
|
||||
dag_.append(sc::assign(C, C1 + C2), "C = C1 + C2");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
carma_generator(size_t depth): split_(depth)
|
||||
{ }
|
||||
|
||||
void apply(sc::array_base const & A, sc::array_base const & B, sc::array_base & C)
|
||||
{
|
||||
apply_impl(A, B, sc::view(C), 0);
|
||||
dag_.export_graphviz("test.dot");
|
||||
}
|
||||
|
||||
private:
|
||||
sc::symbolic::scheduler::dag dag_;
|
||||
std::vector<sc::int_t> split_;
|
||||
};
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
sc::int_t M = 131, N = 1402, K = 5023;
|
||||
sc::array C(M, N), A(M, K), B(K, N);
|
||||
carma_generator generator(3);
|
||||
generator.apply(A, B, C);
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
#include "isaac/array.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
|
||||
int main()
|
||||
{
|
||||
// static const char * sline = "--------------------";
|
||||
static const char * dline = "====================";
|
||||
|
||||
std::cout << dline << std::endl;
|
||||
std::cout << "Tutorial: Indexing " << std::endl;
|
||||
std::cout << dline << std::endl;
|
||||
|
||||
sc::int_t M = 5, N = 12;
|
||||
|
||||
std::vector<float> data(M*N);
|
||||
for(unsigned int i = 0 ; i < data.size(); ++i)
|
||||
data[i] = i;
|
||||
sc::array A = sc::array(M, N, data);
|
||||
|
||||
sc::array s = sc::array({1,1}, std::vector<float>{5});
|
||||
sc::array x = sc::array({1,3},std::vector<float>{1,2,3});
|
||||
sc::array y = sc::array({3,3},std::vector<float>{1,2,3,4,5,6,7,8,9});
|
||||
|
||||
sc::array B({4,3},std::vector<float>{0,1,2,3,4,5,6,7,8,9,10,11});
|
||||
|
||||
// std::cout << sc::sum(y, 1)*sc::sum(x) << std::endl;
|
||||
// std::cout << sc::dot(B.T, B + B) << std::endl;
|
||||
std::cout << 1*s*x + x << std::endl;
|
||||
// std::cout << sc::sum(B) << std::endl;
|
||||
// std::cout << sc::reshape(x, {3,1}) + sc::sum(x)*sc::sum(sc::dot(B.T,B) + x + y, 1) + sc::sum(B)*sc::sum(B, 0)<< std::endl;
|
||||
// std::cout << sline << std::endl;
|
||||
// std::cout << "A[3, 2:end]:" << A(3, {2,sc::end}) << std::endl;
|
||||
|
||||
// std::cout << sline << std::endl;
|
||||
// std::cout << "A[2:end, 4]:" << A({2,sc::end}, 4) << std::endl;
|
||||
|
||||
// std::cout << sline << std::endl;
|
||||
// std::cout << "diag(A, 1): " << sc::diag(A, 1) << std::endl;
|
||||
|
||||
// std::cout << sline << std::endl;
|
||||
// std::cout << "diag(A, -7): " << sc::diag(A, -7) << std::endl;
|
||||
}
|
186
examples/ptx-conv.cpp
Normal file
186
examples/ptx-conv.cpp
Normal file
@@ -0,0 +1,186 @@
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/driver/module.h"
|
||||
#include "isaac/driver/error.h"
|
||||
#include "isaac/driver/kernel.h"
|
||||
#include "isaac/driver/cublas.h"
|
||||
#include "isaac/driver/stream.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "isaac/templates/error.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "isaac/tools/bench.hpp"
|
||||
#include "isaac/templates/conv.h"
|
||||
|
||||
namespace sc = isaac;
|
||||
namespace drv = isaac::driver;
|
||||
|
||||
|
||||
inline int32_t idx(int32_t x, int32_t y, int32_t z, int32_t w,
|
||||
int32_t /*s0*/, int32_t s1, int32_t s2, int32_t s3)
|
||||
{ return w + z*s3 + y*s3*s2 + x*s3*s2*s1; }
|
||||
|
||||
void cpp_conv_nchw(int32_t C, int32_t N, int32_t K,
|
||||
int32_t H, int32_t W,
|
||||
int32_t R, int32_t S,
|
||||
int32_t pad_h, int32_t pad_w,
|
||||
int32_t stride_h, int32_t stride_w,
|
||||
int32_t P, int32_t Q,
|
||||
float* O, float* I, float* F)
|
||||
{
|
||||
for(int32_t k = 0; k < K; ++k)
|
||||
for(int32_t p = 0 ; p < P; ++p)
|
||||
for(int32_t q = 0; q < Q; ++q)
|
||||
for(int32_t n = 0; n < N; ++n)
|
||||
{
|
||||
int32_t pp = p*stride_h - pad_h;
|
||||
int32_t qq = q*stride_w - pad_w;
|
||||
float acc = 0;
|
||||
for(int32_t c = 0; c < C; ++c)
|
||||
for(int32_t r = 0; r < R; ++r)
|
||||
for(int32_t s = 0; s < S; ++s)
|
||||
{
|
||||
int32_t h = pp + r;
|
||||
int32_t w = qq + s;
|
||||
if(h >= 0 && h < H && w >= 0 && w < W)
|
||||
acc += F[idx(k, c, r, s, K, C, R, S)]*I[idx(n, c, h, w, N, C, H, W)];
|
||||
}
|
||||
O[idx(n, k, p, q, N, K, P, Q)] = acc;
|
||||
}
|
||||
}
|
||||
|
||||
void cpp_conv_chwn(int32_t C, int32_t N, int32_t K,
|
||||
int32_t H, int32_t W,
|
||||
int32_t R, int32_t S,
|
||||
int32_t pad_h, int32_t pad_w,
|
||||
int32_t stride_h, int32_t stride_w,
|
||||
int32_t P, int32_t Q,
|
||||
float* O, float* I, float* F)
|
||||
{
|
||||
for(int32_t k = 0; k < K ; ++k)
|
||||
for(int32_t p = 0 ; p < P; ++p)
|
||||
for(int32_t q = 0; q < Q; ++q)
|
||||
for(int32_t n = 0; n < N; ++n)
|
||||
{
|
||||
int32_t pp = p*stride_h - pad_h;
|
||||
int32_t qq = q*stride_w - pad_w;
|
||||
float acc = 0;
|
||||
for(int32_t c = 0; c < C; ++c)
|
||||
for(int32_t r = 0; r < R; ++r)
|
||||
for(int32_t s = 0; s < S; ++s)
|
||||
{
|
||||
int32_t h = pp + r;
|
||||
int32_t w = qq + s;
|
||||
if(h >= 0 && h < H && w >= 0 && w < W)
|
||||
acc += F[idx(c, r, s, k, C, R, S, K)]*I[idx(c, h, w, n, C, H, W, N)];
|
||||
}
|
||||
O[idx(k, p, q, n, K, P, Q, N)] = acc;
|
||||
}
|
||||
}
|
||||
|
||||
double get_tflops(uint64_t P, uint64_t Q, uint64_t K, uint64_t N, uint64_t C, uint64_t R, uint64_t S, double time){
|
||||
return 2*P*Q*K*N*C*R*S/(time*1e3);
|
||||
}
|
||||
|
||||
bool test = false;
|
||||
|
||||
int main(){
|
||||
auto ctx = drv::backend::contexts::get_default();
|
||||
int32_t dtsize = 4;
|
||||
|
||||
//Arguments
|
||||
|
||||
int32_t C = 1, N = 4, K = 32;
|
||||
int32_t H = 68, W = 260;
|
||||
int32_t R = 5, S = 5;
|
||||
int32_t pad_h = 0, pad_w = 0;
|
||||
int32_t stride_h = 1, stride_w = 1;
|
||||
int32_t P = (H - R + 1 + 2*pad_h)/stride_h, Q = (W - S + 1 + 2*pad_w)/stride_w;
|
||||
std::vector<float> iO(K*P*Q*N);
|
||||
std::vector<float> iI(C*H*W*N);
|
||||
std::vector<float> iF(C*R*S*K);
|
||||
drv::Buffer O(ctx, iO.size()*dtsize);
|
||||
drv::Buffer I(ctx, iI.size()*dtsize);
|
||||
for(size_t i = 0; i < iI.size(); ++i) iI[i] = (float)rand()/RAND_MAX;
|
||||
drv::Buffer F(ctx, iF.size()*dtsize);
|
||||
for(size_t i = 0; i < iF.size(); ++i) iF[i] = (float)rand()/RAND_MAX;
|
||||
drv::Stream queue(ctx);
|
||||
queue.write(O, true, 0, iO.size()*dtsize, iO.data());
|
||||
queue.write(I, true, 0, iI.size()*dtsize, iI.data());
|
||||
queue.write(F, true, 0, iF.size()*dtsize, iF.data());
|
||||
sc::scalar alpha(1., sc::FLOAT_TYPE);
|
||||
sc::scalar beta(1., sc::FLOAT_TYPE);
|
||||
|
||||
if(test)
|
||||
cpp_conv_chwn(C, N, K, H, W, R, S, pad_h, pad_w, stride_h, stride_w, P, Q, iO.data(), iI.data(), iF.data());
|
||||
std::vector<float> rO(iO.size());
|
||||
|
||||
|
||||
std::vector<int> rv = {2,4};
|
||||
std::vector<int> rl = {1,2,4};
|
||||
std::vector<int> rs = {1,2,4,8};
|
||||
float best = 0;
|
||||
for(size_t vec: rv)
|
||||
for(size_t bp: std::vector<int>{})
|
||||
for(size_t bq: std::vector<int>{1,2,4})
|
||||
for(size_t bn: rl)
|
||||
for(size_t bk: rl)
|
||||
for(size_t bf_n: rl)
|
||||
for(size_t ps: std::vector<int>{1,2,4})
|
||||
for(size_t qs: std::vector<int>{1,2,4})
|
||||
for(size_t ns: rs)
|
||||
for(size_t ks: rs)
|
||||
for(size_t crs_l: rl)
|
||||
for(size_t crs_s: std::vector<int>{1})
|
||||
for(size_t cs: std::vector<int>{1})
|
||||
for(size_t bc: std::vector<int>{1})
|
||||
for(size_t gridc: std::vector<int>{1})
|
||||
{
|
||||
// Compile
|
||||
isaac::templates::Conv conv(sc::FLOAT_TYPE, C, H, W, N, K, P, Q, R, S, pad_h, pad_w, stride_h, stride_w, vec, bp, bq, bn, bk, bf_n, ps, qs, ns, ks, crs_l, crs_s, cs, bc, gridc);
|
||||
std::string src;
|
||||
try{
|
||||
src = conv.dump(ctx.device(), "fconv");
|
||||
}catch(isaac::templates::invalid_parameters){
|
||||
continue;
|
||||
}
|
||||
drv::Module program(ctx, src, true);
|
||||
drv::Kernel kernel(program, "fconv");
|
||||
|
||||
//Launch
|
||||
float time;
|
||||
try{
|
||||
time = bench([&](){ conv.enqueue(kernel, queue, alpha, I, F, beta, O); },
|
||||
[&](){ queue.synchronize(); }, ctx.device());
|
||||
}catch(drv::exception::cuda::launch_out_of_resources){
|
||||
continue;
|
||||
}
|
||||
|
||||
//Report
|
||||
float tflops = get_tflops(P,Q,K,N,C,R,S,time);
|
||||
best = std::max(tflops, best);
|
||||
std::cout << "//" << vec << " " << bp << " " << bq << " " << bn << " " << bk << " " << bf_n << " " << ps << " " << qs << " " << ns << " " << ks << " " << crs_l << " " << crs_s << " " << cs << " " << bc << " " << gridc << ": " << std::setprecision(3) << tflops << " [ " << best << " ] " << std::endl;
|
||||
|
||||
//Test
|
||||
if(test){
|
||||
queue.read(O, true, 0, rO.size()*dtsize, rO.data());
|
||||
for(size_t i = 0 ; i < rO.size(); ++i)
|
||||
if(fabs((iO[i] - rO[i])/rO[i]) > 1e-4 || std::isnan(rO[i])) { std::cout << "// Failure at idx " << i << ": " << iO[i] << " != " << rO[i] << std::endl; exit(1); }
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//cuDNN
|
||||
float time = bench([&](){sc::driver::cudnnConv(sc::FLOAT_TYPE, ctx, queue, H, W, N, K, P, Q, C, R, S, pad_h, pad_w, stride_h, stride_w, alpha, I, F, beta, O); },
|
||||
[&](){ queue.synchronize(); }, ctx.device());
|
||||
float tflops = get_tflops(P,Q,K,N,C,R,S,time);
|
||||
std::cout << "TFLOPs: " << tflops << std::endl;
|
||||
}
|
84
examples/ptx-gemm.cpp
Normal file
84
examples/ptx-gemm.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/driver/error.h"
|
||||
#include "isaac/driver/module.h"
|
||||
#include "isaac/driver/kernel.h"
|
||||
#include "isaac/driver/stream.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
|
||||
#include "isaac/driver/cublas.h"
|
||||
#include "isaac/half.hpp"
|
||||
|
||||
#include "isaac/tools/bench.hpp"
|
||||
#include "isaac/tools/collections.hpp"
|
||||
#include "isaac/templates/gemm.h"
|
||||
#include "isaac/templates/error.hpp"
|
||||
|
||||
namespace sc = isaac;
|
||||
namespace drv = isaac::driver;
|
||||
|
||||
void do_bench(int32_t M, int32_t N, int32_t K, sc::IsaacOperation_t AT, sc::IsaacOperation_t BT, sc::DType dtype){
|
||||
auto ctx = drv::backend::contexts::get_default();
|
||||
size_t dtsize = sc::size_of(dtype);
|
||||
|
||||
//Buffers
|
||||
int32_t AS0 = M, AS1 = K;
|
||||
int32_t BS0 = K, BS1 = N;
|
||||
if(AT=='T') std::swap(AS0, AS1);
|
||||
if(BT=='T') std::swap(BS0, BS1);
|
||||
int32_t ldc = M, lda = AS0, ldb = BS0;
|
||||
int32_t offc = 0, offa = 0, offb = 0;
|
||||
drv::Buffer C(ctx, M*N*dtsize);
|
||||
drv::Buffer A(ctx, M*K*dtsize);
|
||||
drv::Buffer B(ctx, K*N*dtsize);
|
||||
drv::Stream queue(ctx);
|
||||
sc::scalar alpha(1., dtype), beta(0., dtype);
|
||||
|
||||
// cuBlas
|
||||
double time = bench([&](){ sc::driver::cublasGemm(dtype, ctx, queue, AT, BT, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);}
|
||||
, [&](){ queue.synchronize(); }, ctx.device());
|
||||
std::cout << 2*1e-3*M*N*K/time << std::endl;
|
||||
|
||||
//Exhaustive search
|
||||
std::vector<int> r1 = {1};
|
||||
std::vector<int> rv = {4};
|
||||
std::vector<int> rr = {1, 2, 4};
|
||||
std::vector<int> rl = {2, 4, 8, 16, 32};
|
||||
std::vector<int> rs = {1, 2, 4, 8, 16};
|
||||
double best = 0;
|
||||
for(auto x: sc::cpp::cartesian({rv, rl, rl, rl, rs, r1, rs, rl, rl, rl, rl, r1, r1, r1}))
|
||||
{
|
||||
isaac::templates::GEMM gemm(dtype, AT, BT, M, N, K, offa, lda, offb, ldb, offc, ldc, x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13]);
|
||||
//Compile
|
||||
std::string src;
|
||||
try{
|
||||
src = gemm.dump(ctx.device(), "gemm");
|
||||
}catch(isaac::templates::invalid_parameters){
|
||||
continue;
|
||||
}
|
||||
drv::Module program(ctx, src, true);
|
||||
drv::Kernel kernel(program, "gemm");
|
||||
//Launch
|
||||
double time;
|
||||
try{
|
||||
time = bench([&](){ gemm.enqueue(kernel, queue, alpha, A, B, beta, C); }, [&](){ queue.synchronize(); }, ctx.device());
|
||||
}catch(drv::exception::cuda::launch_out_of_resources){
|
||||
continue;
|
||||
}
|
||||
//Report
|
||||
double tflops = 2*1e-3*M*N*K/time;
|
||||
best = std::max(tflops, best);
|
||||
std::cout << "//" << x[0] << " " << x[1] << " " << x[2] << " " << x[3] << " " << x[4] << " " << x[5] << " " << x[6] << " " << x[7] << " " << x[8] << " " << x[9] << " " << x[10] << " " << x[11] << " " << x[12] << " " << x[13] << " " << std::setprecision(3) << tflops << " [ " << best << " ] " << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
do_bench(2048, 2048, 2048, sc::ISAAC_OP_N, sc::ISAAC_OP_T, sc::FLOAT_TYPE);
|
||||
}
|
53
include/external/clBLAS-complex.h
vendored
53
include/external/clBLAS-complex.h
vendored
@@ -1,53 +0,0 @@
|
||||
/* ************************************************************************
|
||||
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ************************************************************************/
|
||||
|
||||
|
||||
#ifndef CLBLAS_COMPLEX_H_
|
||||
#define CLBLAS_COMPLEX H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_float2 FloatComplex;
|
||||
typedef cl_double2 DoubleComplex;
|
||||
|
||||
static __inline FloatComplex
|
||||
floatComplex(float real, float imag)
|
||||
{
|
||||
FloatComplex z;
|
||||
z.s[0] = real;
|
||||
z.s[1] = imag;
|
||||
return z;
|
||||
}
|
||||
|
||||
static __inline DoubleComplex
|
||||
doubleComplex(double real, double imag)
|
||||
{
|
||||
DoubleComplex z;
|
||||
z.s[0] = real;
|
||||
z.s[1] = imag;
|
||||
return z;
|
||||
}
|
||||
|
||||
#define CREAL(v) ((v).s[0])
|
||||
#define CIMAG(v) ((v).s[1])
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" { */
|
||||
#endif
|
||||
|
||||
#endif /* CLBLAS_COMPLEX_H_ */
|
10096
include/external/clBLAS.h
vendored
10096
include/external/clBLAS.h
vendored
File diff suppressed because it is too large
Load Diff
22
include/external/clBLAS.version.h
vendored
22
include/external/clBLAS.version.h
vendored
@@ -1,22 +0,0 @@
|
||||
/* ************************************************************************
|
||||
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ************************************************************************/
|
||||
|
||||
|
||||
/* the configured version and settings for clblas
|
||||
*/
|
||||
#define clblasVersionMajor 2
|
||||
#define clblasVersionMinor 6
|
||||
#define clblasVersionPatch 0
|
64
include/external/cuda/builtin_types.h
vendored
64
include/external/cuda/builtin_types.h
vendored
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "device_types.h"
|
||||
#if !defined(__CUDACC_RTC__)
|
||||
#define EXCLUDE_FROM_RTC
|
||||
#include "driver_types.h"
|
||||
#undef EXCLUDE_FROM_RTC
|
||||
#endif /* !__CUDACC_RTC__ */
|
||||
#include "surface_types.h"
|
||||
#include "texture_types.h"
|
||||
#include "vector_types.h"
|
412
include/external/cuda/channel_descriptor.h
vendored
412
include/external/cuda/channel_descriptor.h
vendored
@@ -1,412 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__CHANNEL_DESCRIPTOR_H__)
|
||||
#define __CHANNEL_DESCRIPTOR_H__
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_HIGHLEVEL
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief \hl Returns a channel descriptor using the specified format
|
||||
*
|
||||
* Returns a channel descriptor with format \p f and number of bits of each
|
||||
* component \p x, \p y, \p z, and \p w. The ::cudaChannelFormatDesc is
|
||||
* defined as:
|
||||
* \code
|
||||
struct cudaChannelFormatDesc {
|
||||
int x, y, z, w;
|
||||
enum cudaChannelFormatKind f;
|
||||
};
|
||||
* \endcode
|
||||
*
|
||||
* where ::cudaChannelFormatKind is one of ::cudaChannelFormatKindSigned,
|
||||
* ::cudaChannelFormatKindUnsigned, or ::cudaChannelFormatKindFloat.
|
||||
*
|
||||
* \return
|
||||
* Channel descriptor with format \p f
|
||||
*
|
||||
* \sa \ref ::cudaCreateChannelDesc(int,int,int,int,cudaChannelFormatKind) "cudaCreateChannelDesc (Low level)",
|
||||
* ::cudaGetChannelDesc, ::cudaGetTextureReference,
|
||||
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t) "cudaBindTexture (High level)",
|
||||
* \ref ::cudaBindTexture(size_t*, const struct texture< T, dim, readMode>&, const void*, size_t) "cudaBindTexture (High level, inherited channel descriptor)",
|
||||
* \ref ::cudaBindTexture2D(size_t*, const struct texture< T, dim, readMode>&, const void*, const struct cudaChannelFormatDesc&, size_t, size_t, size_t) "cudaBindTexture2D (High level)",
|
||||
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t, const struct cudaChannelFormatDesc&) "cudaBindTextureToArray (High level)",
|
||||
* \ref ::cudaBindTextureToArray(const struct texture< T, dim, readMode>&, cudaArray_const_t) "cudaBindTextureToArray (High level, inherited channel descriptor)",
|
||||
* \ref ::cudaUnbindTexture(const struct texture< T, dim, readMode>&) "cudaUnbindTexture (High level)",
|
||||
* \ref ::cudaGetTextureAlignmentOffset(size_t*, const struct texture< T, dim, readMode>&) "cudaGetTextureAlignmentOffset (High level)"
|
||||
*/
|
||||
template<class T> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc(void)
|
||||
{
|
||||
return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf1(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf2(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
static __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDescHalf4(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char>(void)
|
||||
{
|
||||
int e = (int)sizeof(char) * 8;
|
||||
|
||||
#if defined(_CHAR_UNSIGNED) || defined(__CHAR_UNSIGNED__)
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
#else /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
#endif /* _CHAR_UNSIGNED || __CHAR_UNSIGNED__ */
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<signed char>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned char>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char1>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char2>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<char4>(void)
|
||||
{
|
||||
int e = (int)sizeof(signed char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uchar4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned char) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned short>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short1>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short2>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<short4>(void)
|
||||
{
|
||||
int e = (int)sizeof(short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ushort4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned short) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned int>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int1>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int2>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<int4>(void)
|
||||
{
|
||||
int e = (int)sizeof(int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<uint4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned int) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
#if !defined(__LP64__)
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<unsigned long>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long1>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong1>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long2>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong2>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<long4>(void)
|
||||
{
|
||||
int e = (int)sizeof(long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<ulong4>(void)
|
||||
{
|
||||
int e = (int)sizeof(unsigned long) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
|
||||
}
|
||||
|
||||
#endif /* !__LP64__ */
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float1>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float2>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
template<> __inline__ __host__ cudaChannelFormatDesc cudaCreateChannelDesc<float4>(void)
|
||||
{
|
||||
int e = (int)sizeof(float) * 8;
|
||||
|
||||
return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TEXTURE_HL */
|
||||
|
||||
#endif /* !__CHANNEL_DESCRIPTOR_H__ */
|
338
include/external/cuda/cuComplex.h
vendored
338
include/external/cuda/cuComplex.h
vendored
@@ -1,338 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(CU_COMPLEX_H_)
|
||||
#define CU_COMPLEX_H_
|
||||
|
||||
/* When trying to include C header file in C++ Code extern "C" is required
|
||||
* But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
|
||||
* extern "C" cannot be nested
|
||||
* Hence keep the header out of extern "C" block
|
||||
*/
|
||||
|
||||
#include <math.h> /* import fabsf, sqrt */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#include "vector_types.h"
|
||||
|
||||
typedef float2 cuFloatComplex;
|
||||
|
||||
__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x)
|
||||
{
|
||||
return x.x;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x)
|
||||
{
|
||||
return x.y;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex
|
||||
(float r, float i)
|
||||
{
|
||||
cuFloatComplex res;
|
||||
res.x = r;
|
||||
res.y = i;
|
||||
return res;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComplex x)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x), -cuCimagf(x));
|
||||
}
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x) + cuCrealf(y),
|
||||
cuCimagf(x) + cuCimagf(y));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
return make_cuFloatComplex (cuCrealf(x) - cuCrealf(y),
|
||||
cuCimagf(x) - cuCimagf(y));
|
||||
}
|
||||
|
||||
/* This implementation could suffer from intermediate overflow even though
|
||||
* the final result would be in range. However, various implementations do
|
||||
* not guard against this (presumably to avoid losing performance), so we
|
||||
* don't do it either to stay competitive.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
cuFloatComplex prod;
|
||||
prod = make_cuFloatComplex ((cuCrealf(x) * cuCrealf(y)) -
|
||||
(cuCimagf(x) * cuCimagf(y)),
|
||||
(cuCrealf(x) * cuCimagf(y)) +
|
||||
(cuCimagf(x) * cuCrealf(y)));
|
||||
return prod;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Such guarded implementations are usually the default for
|
||||
* complex library implementations, with some also offering an unguarded,
|
||||
* faster version.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComplex x,
|
||||
cuFloatComplex y)
|
||||
{
|
||||
cuFloatComplex quot;
|
||||
float s = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y));
|
||||
float oos = 1.0f / s;
|
||||
float ars = cuCrealf(x) * oos;
|
||||
float ais = cuCimagf(x) * oos;
|
||||
float brs = cuCrealf(y) * oos;
|
||||
float bis = cuCimagf(y) * oos;
|
||||
s = (brs * brs) + (bis * bis);
|
||||
oos = 1.0f / s;
|
||||
quot = make_cuFloatComplex (((ars * brs) + (ais * bis)) * oos,
|
||||
((ais * brs) - (ars * bis)) * oos);
|
||||
return quot;
|
||||
}
|
||||
|
||||
/*
|
||||
* We would like to call hypotf(), but it's not available on all platforms.
|
||||
* This discrete implementation guards against intermediate underflow and
|
||||
* overflow by scaling. Otherwise we would lose half the exponent range.
|
||||
* There are various ways of doing guarded computation. For now chose the
|
||||
* simplest and fastest solution, however this may suffer from inaccuracies
|
||||
* if sqrt and division are not IEEE compliant.
|
||||
*/
|
||||
__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)
|
||||
{
|
||||
float a = cuCrealf(x);
|
||||
float b = cuCimagf(x);
|
||||
float v, w, t;
|
||||
a = fabsf(a);
|
||||
b = fabsf(b);
|
||||
if (a > b) {
|
||||
v = a;
|
||||
w = b;
|
||||
} else {
|
||||
v = b;
|
||||
w = a;
|
||||
}
|
||||
t = w / v;
|
||||
t = 1.0f + t * t;
|
||||
t = v * sqrtf(t);
|
||||
if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) {
|
||||
t = v + w;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Double precision */
|
||||
typedef double2 cuDoubleComplex;
|
||||
|
||||
__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x)
|
||||
{
|
||||
return x.x;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x)
|
||||
{
|
||||
return x.y;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex
|
||||
(double r, double i)
|
||||
{
|
||||
cuDoubleComplex res;
|
||||
res.x = r;
|
||||
res.y = i;
|
||||
return res;
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComplex x)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x), -cuCimag(x));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x) + cuCreal(y),
|
||||
cuCimag(x) + cuCimag(y));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
return make_cuDoubleComplex (cuCreal(x) - cuCreal(y),
|
||||
cuCimag(x) - cuCimag(y));
|
||||
}
|
||||
|
||||
/* This implementation could suffer from intermediate overflow even though
|
||||
* the final result would be in range. However, various implementations do
|
||||
* not guard against this (presumably to avoid losing performance), so we
|
||||
* don't do it either to stay competitive.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
cuDoubleComplex prod;
|
||||
prod = make_cuDoubleComplex ((cuCreal(x) * cuCreal(y)) -
|
||||
(cuCimag(x) * cuCimag(y)),
|
||||
(cuCreal(x) * cuCimag(y)) +
|
||||
(cuCimag(x) * cuCreal(y)));
|
||||
return prod;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Such guarded implementations are usually the default for
|
||||
* complex library implementations, with some also offering an unguarded,
|
||||
* faster version.
|
||||
*/
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComplex x,
|
||||
cuDoubleComplex y)
|
||||
{
|
||||
cuDoubleComplex quot;
|
||||
double s = (fabs(cuCreal(y))) + (fabs(cuCimag(y)));
|
||||
double oos = 1.0 / s;
|
||||
double ars = cuCreal(x) * oos;
|
||||
double ais = cuCimag(x) * oos;
|
||||
double brs = cuCreal(y) * oos;
|
||||
double bis = cuCimag(y) * oos;
|
||||
s = (brs * brs) + (bis * bis);
|
||||
oos = 1.0 / s;
|
||||
quot = make_cuDoubleComplex (((ars * brs) + (ais * bis)) * oos,
|
||||
((ais * brs) - (ars * bis)) * oos);
|
||||
return quot;
|
||||
}
|
||||
|
||||
/* This implementation guards against intermediate underflow and overflow
|
||||
* by scaling. Otherwise we would lose half the exponent range. There are
|
||||
* various ways of doing guarded computation. For now chose the simplest
|
||||
* and fastest solution, however this may suffer from inaccuracies if sqrt
|
||||
* and division are not IEEE compliant.
|
||||
*/
|
||||
__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)
|
||||
{
|
||||
double a = cuCreal(x);
|
||||
double b = cuCimag(x);
|
||||
double v, w, t;
|
||||
a = fabs(a);
|
||||
b = fabs(b);
|
||||
if (a > b) {
|
||||
v = a;
|
||||
w = b;
|
||||
} else {
|
||||
v = b;
|
||||
w = a;
|
||||
}
|
||||
t = w / v;
|
||||
t = 1.0 + t * t;
|
||||
t = v * sqrt(t);
|
||||
if ((v == 0.0) ||
|
||||
(v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {
|
||||
t = v + w;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* aliases */
|
||||
typedef cuFloatComplex cuComplex;
|
||||
__host__ __device__ static __inline__ cuComplex make_cuComplex (float x,
|
||||
float y)
|
||||
{
|
||||
return make_cuFloatComplex (x, y);
|
||||
}
|
||||
|
||||
/* float-to-double promotion */
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuComplexFloatToDouble
|
||||
(cuFloatComplex c)
|
||||
{
|
||||
return make_cuDoubleComplex ((double)cuCrealf(c), (double)cuCimagf(c));
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuFloatComplex cuComplexDoubleToFloat
|
||||
(cuDoubleComplex c)
|
||||
{
|
||||
return make_cuFloatComplex ((float)cuCreal(c), (float)cuCimag(c));
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ static __inline__ cuComplex cuCfmaf( cuComplex x, cuComplex y, cuComplex d)
|
||||
{
|
||||
float real_res;
|
||||
float imag_res;
|
||||
|
||||
real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d);
|
||||
imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d);
|
||||
|
||||
real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res;
|
||||
imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res;
|
||||
|
||||
return make_cuComplex(real_res, imag_res);
|
||||
}
|
||||
|
||||
__host__ __device__ static __inline__ cuDoubleComplex cuCfma( cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)
|
||||
{
|
||||
double real_res;
|
||||
double imag_res;
|
||||
|
||||
real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d);
|
||||
imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d);
|
||||
|
||||
real_res = -(cuCimag(x) * cuCimag(y)) + real_res;
|
||||
imag_res = (cuCimag(x) * cuCreal(y)) + imag_res;
|
||||
|
||||
return make_cuDoubleComplex(real_res, imag_res);
|
||||
}
|
||||
|
||||
#endif /* !defined(CU_COMPLEX_H_) */
|
565
include/external/cuda/cublas.h
vendored
565
include/external/cuda/cublas.h
vendored
@@ -1,565 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is the public header file for the CUBLAS library, defining the API
|
||||
*
|
||||
* CUBLAS is an implementation of BLAS (Basic Linear Algebra Subroutines)
|
||||
* on top of the CUDA runtime.
|
||||
*/
|
||||
|
||||
#if !defined(CUBLAS_H_)
|
||||
#define CUBLAS_H_
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#ifndef CUBLASWINAPI
|
||||
#ifdef _WIN32
|
||||
#define CUBLASWINAPI __stdcall
|
||||
#else
|
||||
#define CUBLASWINAPI
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef CUBLASAPI
|
||||
#ifdef __CUDACC__
|
||||
#define CUBLASAPI __host__
|
||||
#else
|
||||
#define CUBLASAPI
|
||||
#endif
|
||||
|
||||
#include "cublas_api.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* CUBLAS data types */
|
||||
#define cublasStatus cublasStatus_t
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasInit (void);
|
||||
cublasStatus CUBLASWINAPI cublasShutdown (void);
|
||||
cublasStatus CUBLASWINAPI cublasGetError (void);
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasGetVersion(int *version);
|
||||
cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr);
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasFree (void *devicePtr);
|
||||
|
||||
|
||||
cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
/* ---------------- CUBLAS BLAS1 functions ---------------- */
|
||||
/* NRM2 */
|
||||
float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx);
|
||||
double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx);
|
||||
float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx);
|
||||
double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* DOT */
|
||||
float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y,
|
||||
int incy);
|
||||
double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y,
|
||||
int incy);
|
||||
cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy);
|
||||
cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy);
|
||||
cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy);
|
||||
cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SCAL */
|
||||
void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AXPY */
|
||||
void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx,
|
||||
float *y, int incy);
|
||||
void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x,
|
||||
int incx, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* COPY */
|
||||
void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SWAP */
|
||||
void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AMAX */
|
||||
int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx);
|
||||
int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx);
|
||||
int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx);
|
||||
int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* AMIN */
|
||||
int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx);
|
||||
int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx);
|
||||
|
||||
int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx);
|
||||
int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ASUM */
|
||||
float CUBLASWINAPI cublasSasum (int n, const float *x, int incx);
|
||||
double CUBLASWINAPI cublasDasum (int n, const double *x, int incx);
|
||||
float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx);
|
||||
double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROT */
|
||||
void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy,
|
||||
float sc, float ss);
|
||||
void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy,
|
||||
double sc, double ss);
|
||||
void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||
int incy, float c, cuComplex s);
|
||||
void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex *y, int incy, double sc,
|
||||
cuDoubleComplex cs);
|
||||
void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
|
||||
int incy, float c, float s);
|
||||
void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex *y, int incy, double c, double s);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTG */
|
||||
void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss);
|
||||
void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss);
|
||||
void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
|
||||
cuComplex *cs);
|
||||
void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
|
||||
cuDoubleComplex *cs);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTM */
|
||||
void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
|
||||
const float* sparam);
|
||||
void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
|
||||
const double* sparam);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* ROTMG */
|
||||
void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1,
|
||||
const float *sy1, float* sparam);
|
||||
void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1,
|
||||
const double *sy1, double* sparam);
|
||||
|
||||
/* --------------- CUBLAS BLAS2 functions ---------------- */
|
||||
/* GEMV */
|
||||
void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
|
||||
const float *A, int lda, const float *x, int incx,
|
||||
float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
|
||||
const double *A, int lda, const double *x, int incx,
|
||||
double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||
cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* GBMV */
|
||||
void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku,
|
||||
float alpha, const float *A, int lda,
|
||||
const float *x, int incx, float beta, float *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *x, int incx, double beta, double *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *x, int incx, cuComplex beta, cuComplex *y,
|
||||
int incy);
|
||||
void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y,
|
||||
int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRMV */
|
||||
void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n,
|
||||
const float *A, int lda, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n,
|
||||
const double *A, int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n,
|
||||
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TBMV */
|
||||
void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const float *A, int lda, float *x, int incx);
|
||||
void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const double *A, int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TPMV */
|
||||
void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRSV */
|
||||
void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TPSV */
|
||||
void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP,
|
||||
float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP,
|
||||
cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TBSV */
|
||||
void CUBLASWINAPI cublasStbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const float *A,
|
||||
int lda, float *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasDtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const double *A,
|
||||
int lda, double *x, int incx);
|
||||
void CUBLASWINAPI cublasCtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const cuComplex *A,
|
||||
int lda, cuComplex *x, int incx);
|
||||
|
||||
void CUBLASWINAPI cublasZtbsv(char uplo, char trans,
|
||||
char diag, int n, int k, const cuDoubleComplex *A,
|
||||
int lda, cuDoubleComplex *x, int incx);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYMV/HEMV */
|
||||
void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
|
||||
int lda, const float *x, int incx, float beta,
|
||||
float *y, int incy);
|
||||
void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
|
||||
int lda, const double *x, int incx, double beta,
|
||||
double *y, int incy);
|
||||
void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, const cuComplex *x, int incx, cuComplex beta,
|
||||
cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
|
||||
int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta,
|
||||
cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SBMV/HBMV */
|
||||
void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha,
|
||||
const float *A, int lda, const float *x, int incx,
|
||||
float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha,
|
||||
const double *A, int lda, const double *x, int incx,
|
||||
double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *x, int incx,
|
||||
cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
|
||||
cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPMV/HPMV */
|
||||
void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
|
||||
const float *AP, const float *x,
|
||||
int incx, float beta, float *y, int incy);
|
||||
void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
|
||||
const double *AP, const double *x,
|
||||
int incx, double beta, double *y, int incy);
|
||||
void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
|
||||
const cuComplex *AP, const cuComplex *x,
|
||||
int incx, cuComplex beta, cuComplex *y, int incy);
|
||||
void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *AP, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* GER */
|
||||
void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
|
||||
const float *y, int incy, float *A, int lda);
|
||||
void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
|
||||
const double *y, int incy, double *A, int lda);
|
||||
|
||||
void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy,
|
||||
cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy,
|
||||
cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy,
|
||||
cuDoubleComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy,
|
||||
cuDoubleComplex *A, int lda);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYR/HER */
|
||||
void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
|
||||
int incx, float *A, int lda);
|
||||
void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
|
||||
int incx, double *A, int lda);
|
||||
|
||||
void CUBLASWINAPI cublasCher (char uplo, int n, float alpha,
|
||||
const cuComplex *x, int incx, cuComplex *A, int lda);
|
||||
void CUBLASWINAPI cublasZher (char uplo, int n, double alpha,
|
||||
const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPR/HPR */
|
||||
void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
|
||||
int incx, float *AP);
|
||||
void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
|
||||
int incx, double *AP);
|
||||
void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
|
||||
int incx, cuComplex *AP);
|
||||
void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
|
||||
int incx, cuDoubleComplex *AP);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYR2/HER2 */
|
||||
void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x,
|
||||
int incx, const float *y, int incy, float *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x,
|
||||
int incx, const double *y, int incy, double *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x,
|
||||
int incx, const cuComplex *y, int incy, cuComplex *A,
|
||||
int lda);
|
||||
void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
|
||||
int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A,
|
||||
int lda);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SPR2/HPR2 */
|
||||
void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x,
|
||||
int incx, const float *y, int incy, float *AP);
|
||||
void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
|
||||
const double *x, int incx, const double *y,
|
||||
int incy, double *AP);
|
||||
void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
|
||||
const cuComplex *x, int incx, const cuComplex *y,
|
||||
int incy, cuComplex *AP);
|
||||
void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
|
||||
int incy, cuDoubleComplex *AP);
|
||||
/* ------------------------BLAS3 Functions ------------------------------- */
|
||||
/* GEMM */
|
||||
void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k,
|
||||
float alpha, const float *A, int lda,
|
||||
const float *B, int ldb, float beta, float *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *B, int ldb, double beta, double *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
|
||||
int k, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb,
|
||||
cuDoubleComplex beta, cuDoubleComplex *C,
|
||||
int ldc);
|
||||
/* -------------------------------------------------------*/
|
||||
/* SYRK */
|
||||
void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha,
|
||||
const float *A, int lda, float beta, float *C,
|
||||
int ldc);
|
||||
void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
double beta, double *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
cuComplex beta, cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* HERK */
|
||||
void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
|
||||
float alpha, const cuComplex *A, int lda,
|
||||
float beta, cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
|
||||
double alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
double beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* SYR2K */
|
||||
void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha,
|
||||
const float *A, int lda, const float *B, int ldb,
|
||||
float beta, float *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
|
||||
double alpha, const double *A, int lda,
|
||||
const double *B, int ldb, double beta,
|
||||
double *C, int ldc);
|
||||
void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
/* ------------------------------------------------------- */
|
||||
/* HER2K */
|
||||
void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, float beta,
|
||||
cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, double beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* SYMM*/
|
||||
void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha,
|
||||
const float *A, int lda, const float *B, int ldb,
|
||||
float beta, float *C, int ldc);
|
||||
void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha,
|
||||
const double *A, int lda, const double *B, int ldb,
|
||||
double beta, double *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha,
|
||||
const cuComplex *A, int lda, const cuComplex *B, int ldb,
|
||||
cuComplex beta, cuComplex *C, int ldc);
|
||||
|
||||
void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
|
||||
cuDoubleComplex beta, cuDoubleComplex *C, int ldc);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* HEMM*/
|
||||
void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
|
||||
cuComplex alpha, const cuComplex *A, int lda,
|
||||
const cuComplex *B, int ldb, cuComplex beta,
|
||||
cuComplex *C, int ldc);
|
||||
void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
|
||||
cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
|
||||
const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
|
||||
cuDoubleComplex *C, int ldc);
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRSM*/
|
||||
void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, float alpha, const float *A, int lda,
|
||||
float *B, int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, double alpha,
|
||||
const double *A, int lda, double *B,
|
||||
int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, cuComplex *B, int ldb);
|
||||
|
||||
void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda,
|
||||
cuDoubleComplex *B, int ldb);
|
||||
/*------------------------------------------------------------------------*/
|
||||
/* TRMM*/
|
||||
void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, float alpha, const float *A, int lda,
|
||||
float *B, int ldb);
|
||||
void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, double alpha,
|
||||
const double *A, int lda, double *B,
|
||||
int ldb);
|
||||
void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
|
||||
int m, int n, cuComplex alpha, const cuComplex *A,
|
||||
int lda, cuComplex *B, int ldb);
|
||||
void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
|
||||
char diag, int m, int n, cuDoubleComplex alpha,
|
||||
const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
|
||||
int ldb);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* !defined(CUBLAS_H_) */
|
2583
include/external/cuda/cublas_api.h
vendored
2583
include/external/cuda/cublas_api.h
vendored
File diff suppressed because it is too large
Load Diff
228
include/external/cuda/cuda_device_runtime_api.h
vendored
228
include/external/cuda/cuda_device_runtime_api.h
vendored
@@ -1,228 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__CUDA_DEVICE_RUNTIME_API_H__)
|
||||
#define __CUDA_DEVICE_RUNTIME_API_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if defined(__CUDABE__)
|
||||
|
||||
#if (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__)
|
||||
struct cudaFuncAttributes;
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaMalloc(void **p, size_t s)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaGetDevice(int *device)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
__device__ __attribute__((nv_weak)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
|
||||
{
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
|
||||
#endif /* (__CUDA_ARCH__ >= 350) && !defined(__CUDADEVRT_INTERNAL__) */
|
||||
|
||||
#else /* defined(__CUDABE__) */
|
||||
|
||||
#if defined(__cplusplus) && defined(__CUDACC__) // Visible to nvcc front-end only
|
||||
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350) // Visible to SM>=3.5 and "__host__ __device__" only
|
||||
|
||||
#include "driver_types.h"
|
||||
#include "host_defines.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaDeviceSynchronize(void);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetLastError(void);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaPeekAtLastError(void);
|
||||
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorString(cudaError_t error);
|
||||
extern __device__ __cudart_builtin__ const char* CUDARTAPI cudaGetErrorName(cudaError_t error);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDevice(int *device);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamDestroy(cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaEventDestroy(cudaEvent_t event);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaFree(void *devPtr);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMalloc(void **devPtr, size_t size);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaRuntimeGetVersion(int *runtimeVersion);
|
||||
|
||||
/**
|
||||
* \ingroup CUDART_EXECUTION
|
||||
* \brief Obtains a parameter buffer
|
||||
*
|
||||
* Obtains a parameter buffer which can be filled with parameters for a kernel launch.
|
||||
* Parameters passed to ::cudaLaunchDevice must be allocated via this function.
|
||||
*
|
||||
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||
* CUDA user code should use <<< >>> to launch kernels.
|
||||
*
|
||||
* \param alignment - Specifies alignment requirement of the parameter buffer
|
||||
* \param size - Specifies size requirement in bytes
|
||||
*
|
||||
* \return
|
||||
* Returns pointer to the allocated parameterBuffer
|
||||
* \notefnerr
|
||||
*
|
||||
* \sa cudaLaunchDevice
|
||||
*/
|
||||
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBuffer(size_t alignment, size_t size);
|
||||
|
||||
/**
|
||||
* \ingroup CUDART_EXECUTION
|
||||
* \brief Launches a specified kernel
|
||||
*
|
||||
* Launches a specified kernel with the specified parameter buffer. A parameter buffer can be obtained
|
||||
* by calling ::cudaGetParameterBuffer().
|
||||
*
|
||||
* This is a low level API and can only be accessed from Parallel Thread Execution (PTX).
|
||||
* CUDA user code should use <<< >>> to launch the kernels.
|
||||
*
|
||||
* \param func - Pointer to the kernel to be launched
|
||||
* \param parameterBuffer - Holds the parameters to the launched kernel. parameterBuffer can be NULL. (Optional)
|
||||
* \param gridDimension - Specifies grid dimensions
|
||||
* \param blockDimension - Specifies block dimensions
|
||||
* \param sharedMemSize - Specifies size of shared memory
|
||||
* \param stream - Specifies the stream to be used
|
||||
*
|
||||
* \return
|
||||
* ::cudaSuccess, ::cudaErrorInvalidDevice, ::cudaErrorLaunchMaxDepthExceeded, ::cudaErrorInvalidConfiguration,
|
||||
* ::cudaErrorStartupFailure, ::cudaErrorLaunchPendingCountExceeded, ::cudaErrorLaunchOutOfResources
|
||||
* \notefnerr
|
||||
* \n Please refer to Execution Configuration and Parameter Buffer Layout from the CUDA Programming
|
||||
* Guide for the detailed descriptions of launch configuration and parameter layout respectively.
|
||||
*
|
||||
* \sa cudaGetParameterBuffer
|
||||
*/
|
||||
extern __device__ __cudart_builtin__ void * CUDARTAPI cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
|
||||
|
||||
#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) && defined(__CUDA_ARCH__)
|
||||
// When compiling for the device and per thread default stream is enabled, add
|
||||
// a static inline redirect to the per thread stream entry points.
|
||||
|
||||
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||
cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream)
|
||||
{
|
||||
return cudaLaunchDevice_ptsz(func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream);
|
||||
}
|
||||
|
||||
static __inline__ __device__ __cudart_builtin__ cudaError_t CUDARTAPI
|
||||
cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream)
|
||||
{
|
||||
return cudaLaunchDeviceV2_ptsz(parameterBuffer, stream);
|
||||
}
|
||||
#else
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);
|
||||
#endif
|
||||
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
|
||||
extern __device__ __cudart_builtin__ cudaError_t CUDARTAPI cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||
|
||||
}
|
||||
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaMalloc(T **devPtr, size_t size);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
|
||||
template <typename T> static __inline__ __device__ __cudart_builtin__ cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 350)
|
||||
#endif // defined(__cplusplus) && defined(__CUDACC__)
|
||||
|
||||
#endif /* defined(__CUDABE__) */
|
||||
|
||||
#endif /* !__CUDA_DEVICE_RUNTIME_API_H__ */
|
1499
include/external/cuda/cuda_fp16.h
vendored
1499
include/external/cuda/cuda_fp16.h
vendored
File diff suppressed because it is too large
Load Diff
1895
include/external/cuda/cuda_runtime.h
vendored
1895
include/external/cuda/cuda_runtime.h
vendored
File diff suppressed because it is too large
Load Diff
6520
include/external/cuda/cuda_runtime_api.h
vendored
6520
include/external/cuda/cuda_runtime_api.h
vendored
File diff suppressed because it is too large
Load Diff
69
include/external/cuda/device_types.h
vendored
69
include/external/cuda/device_types.h
vendored
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__DEVICE_TYPES_H__)
|
||||
#define __DEVICE_TYPES_H__
|
||||
|
||||
#include "host_defines.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
enum __device_builtin__ cudaRoundMode
|
||||
{
|
||||
cudaRoundNearest,
|
||||
cudaRoundZero,
|
||||
cudaRoundPosInf,
|
||||
cudaRoundMinInf
|
||||
};
|
||||
|
||||
#endif /* !__DEVICE_TYPES_H__ */
|
145
include/external/cuda/driver_functions.h
vendored
145
include/external/cuda/driver_functions.h
vendored
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__DRIVER_FUNCTIONS_H__)
|
||||
#define __DRIVER_FUNCTIONS_H__
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_MEMORY
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaPitchedPtr based on input parameters
|
||||
*
|
||||
* Returns a ::cudaPitchedPtr based on the specified input parameters \p d,
|
||||
* \p p, \p xsz, and \p ysz.
|
||||
*
|
||||
* \param d - Pointer to allocated memory
|
||||
* \param p - Pitch of allocated memory in bytes
|
||||
* \param xsz - Logical width of allocation in elements
|
||||
* \param ysz - Logical height of allocation in elements
|
||||
*
|
||||
* \return
|
||||
* ::cudaPitchedPtr specified by \p d, \p p, \p xsz, and \p ysz
|
||||
*
|
||||
* \sa make_cudaExtent, make_cudaPos
|
||||
*/
|
||||
static __inline__ __host__ struct cudaPitchedPtr make_cudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
|
||||
{
|
||||
struct cudaPitchedPtr s;
|
||||
|
||||
s.ptr = d;
|
||||
s.pitch = p;
|
||||
s.xsize = xsz;
|
||||
s.ysize = ysz;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaPos based on input parameters
|
||||
*
|
||||
* Returns a ::cudaPos based on the specified input parameters \p x,
|
||||
* \p y, and \p z.
|
||||
*
|
||||
* \param x - X position
|
||||
* \param y - Y position
|
||||
* \param z - Z position
|
||||
*
|
||||
* \return
|
||||
* ::cudaPos specified by \p x, \p y, and \p z
|
||||
*
|
||||
* \sa make_cudaExtent, make_cudaPitchedPtr
|
||||
*/
|
||||
static __inline__ __host__ struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
|
||||
{
|
||||
struct cudaPos p;
|
||||
|
||||
p.x = x;
|
||||
p.y = y;
|
||||
p.z = z;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Returns a cudaExtent based on input parameters
|
||||
*
|
||||
* Returns a ::cudaExtent based on the specified input parameters \p w,
|
||||
* \p h, and \p d.
|
||||
*
|
||||
* \param w - Width in bytes
|
||||
* \param h - Height in elements
|
||||
* \param d - Depth in elements
|
||||
*
|
||||
* \return
|
||||
* ::cudaExtent specified by \p w, \p h, and \p d
|
||||
*
|
||||
* \sa make_cudaPitchedPtr, make_cudaPos
|
||||
*/
|
||||
static __inline__ __host__ struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d)
|
||||
{
|
||||
struct cudaExtent e;
|
||||
|
||||
e.width = w;
|
||||
e.height = h;
|
||||
e.depth = d;
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
/** @} */ /* END CUDART_MEMORY */
|
||||
|
||||
#endif /* !__DRIVER_FUNCTIONS_H__ */
|
1450
include/external/cuda/driver_types.h
vendored
1450
include/external/cuda/driver_types.h
vendored
File diff suppressed because it is too large
Load Diff
201
include/external/cuda/host_config.h
vendored
201
include/external/cuda/host_config.h
vendored
@@ -1,201 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__HOST_CONFIG_H__)
|
||||
#define __HOST_CONFIG_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if defined(__CUDACC__)
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#else /* __CUDACC_RTC__ */
|
||||
|
||||
/* check for host compilers that are compatible with nvcc */
|
||||
#if !defined(__GNUC__) && !defined(_WIN32)
|
||||
|
||||
#error --- !!! UNSUPPORTED COMPILER !!! ---
|
||||
|
||||
#endif /* !__GNUC__ && !_WIN32 */
|
||||
|
||||
#if defined(__ICC)
|
||||
|
||||
#if __ICC != 1500 || !defined(__GNUC__) || !defined(__LP64__)
|
||||
|
||||
#error -- unsupported ICC configuration! Only ICC 15.0 on Linux x86_64 is supported!
|
||||
|
||||
#endif /* __ICC != 1500 || !__GNUC__ || !__LP64__ */
|
||||
|
||||
#endif /* __ICC */
|
||||
|
||||
#if defined(__PGIC__)
|
||||
|
||||
#if __PGIC__ != 15 || __PGIC_MINOR__ != 4 || !defined(__GNUC__) || !defined(__LP64__)
|
||||
|
||||
#error -- unsupported pgc++ configuration! Only pgc++ 15.4 on Linux x86_64 is supported!
|
||||
|
||||
#endif /* __PGIC__ != 15 || __PGIC_MINOR != 4 || !__GNUC__ || !__LP64__ */
|
||||
|
||||
#endif /* __PGIC__ */
|
||||
|
||||
#if defined(__powerpc__)
|
||||
|
||||
#if !defined(__powerpc64__) || !defined(__LITTLE_ENDIAN__)
|
||||
|
||||
#error -- unsupported PPC platform! Only 64-bit little endian PPC is supported!
|
||||
|
||||
#endif /* !__powerpc64__ || !__LITTLE_ENDIAN__ */
|
||||
|
||||
#if defined(__ibmxl_vrm__) && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000)
|
||||
|
||||
#error -- unsupported xlC version! only xlC 13.1 is supported
|
||||
|
||||
#endif /* __ibmxl_vrm__ && (__ibmxl_vrm__ < 0x0d010000 && __ibmxl_vrm__ >= 0x0d020000) */
|
||||
|
||||
#endif /* __powerpc__ */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 9)
|
||||
|
||||
#error -- unsupported GNU version! gcc versions later than 4.9 are not supported!
|
||||
|
||||
#endif /* __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 9) */
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
|
||||
#error -- clang and clang++ are the only supported host compilers on Mac OS X!
|
||||
#endif /* __APPLE__ && __MACH__ && !__clang__ */
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if _MSC_VER < 1600 || _MSC_VER > 1800
|
||||
|
||||
#error -- unsupported Microsoft Visual Studio version! Only the versions 2010, 2012, and 2013 are supported!
|
||||
|
||||
#endif /* _MSC_VER < 1600 || _MSC_VER > 1800 */
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
/* configure host compiler */
|
||||
#if defined(__APPLE__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#if defined(__BLOCKS__) /* nvcc does not support closures */
|
||||
|
||||
#undef __BLOCKS__
|
||||
|
||||
#endif /* __BLOCKS__ */
|
||||
|
||||
#elif defined(__ANDROID__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#elif defined(__QNX__)
|
||||
|
||||
#define _CRTIMP
|
||||
#define __THROW
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
#define _CRTIMP
|
||||
|
||||
#include <features.h> /* for __THROW */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
|
||||
#if _MSC_VER >= 1500
|
||||
|
||||
#undef _USE_DECLSPECS_FOR_SAL
|
||||
#define _USE_DECLSPECS_FOR_SAL \
|
||||
1
|
||||
|
||||
#endif /* _MSC_VER >= 1500 */
|
||||
|
||||
#if !defined(_CRT_NONSTDC_NO_WARNINGS)
|
||||
|
||||
#define _CRT_NONSTDC_NO_WARNINGS /* to suppress warnings */
|
||||
|
||||
#endif /* !_CRT_NONSTDC_NO_WARNINGS */
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS /* to suppress warnings */
|
||||
|
||||
#endif /* !_CRT_SECURE_NO_WARNINGS */
|
||||
|
||||
#if !defined(NOMINMAX)
|
||||
|
||||
#define NOMINMAX /* min and max are part of cuda runtime */
|
||||
|
||||
#endif /* !NOMINMAX */
|
||||
|
||||
#include <crtdefs.h> /* for _CRTIMP */
|
||||
|
||||
#define __THROW
|
||||
|
||||
#endif /* __APPLE__ */
|
||||
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
#endif /* __CUDACC__ */
|
||||
|
||||
#endif /* !__HOST_CONFIG_H__ */
|
241
include/external/cuda/host_defines.h
vendored
241
include/external/cuda/host_defines.h
vendored
@@ -1,241 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__HOST_DEFINES_H__)
|
||||
#define __HOST_DEFINES_H__
|
||||
|
||||
/* CUDA JIT mode (__CUDACC_RTC__) also uses GNU style attributes */
|
||||
#if defined(__GNUC__) || defined(__CUDA_LIBDEVICE__) || defined(__CUDACC_RTC__)
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __volatile__ volatile
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
#define __no_return__ \
|
||||
__attribute__((noreturn))
|
||||
|
||||
#if defined(__CUDACC__) || defined(__CUDA_ARCH__)
|
||||
/* gcc allows users to define attributes with underscores,
|
||||
e.g., __attribute__((__noinline__)).
|
||||
Consider a non-CUDA source file (e.g. .cpp) that has the
|
||||
above attribute specification, and includes this header file. In that case,
|
||||
defining __noinline__ as below would cause a gcc compilation error.
|
||||
Hence, only define __noinline__ when the code is being processed
|
||||
by a CUDA compiler component.
|
||||
*/
|
||||
#define __noinline__ \
|
||||
__attribute__((noinline))
|
||||
#endif /* __CUDACC__ || __CUDA_ARCH__ */
|
||||
|
||||
#define __forceinline__ \
|
||||
__inline__ __attribute__((always_inline))
|
||||
#define __align__(n) \
|
||||
__attribute__((aligned(n)))
|
||||
#define __thread__ \
|
||||
__thread
|
||||
#define __import__
|
||||
#define __export__
|
||||
#define __cdecl
|
||||
#define __annotate__(a) \
|
||||
__attribute__((a))
|
||||
#define __location__(a) \
|
||||
__annotate__(a)
|
||||
#define CUDARTAPI
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#if _MSC_VER >= 1400
|
||||
|
||||
#define __restrict__ \
|
||||
__restrict
|
||||
|
||||
#else /* _MSC_VER >= 1400 */
|
||||
|
||||
#define __restrict__
|
||||
|
||||
#endif /* _MSC_VER >= 1400 */
|
||||
|
||||
#define __inline__ \
|
||||
__inline
|
||||
#define __no_return__ \
|
||||
__declspec(noreturn)
|
||||
#define __noinline__ \
|
||||
__declspec(noinline)
|
||||
#define __forceinline__ \
|
||||
__forceinline
|
||||
#define __align__(n) \
|
||||
__declspec(align(n))
|
||||
#define __thread__ \
|
||||
__declspec(thread)
|
||||
#define __import__ \
|
||||
__declspec(dllimport)
|
||||
#define __export__ \
|
||||
__declspec(dllexport)
|
||||
#define __annotate__(a) \
|
||||
__declspec(a)
|
||||
#define __location__(a) \
|
||||
__annotate__(__##a##__)
|
||||
#define CUDARTAPI \
|
||||
__stdcall
|
||||
|
||||
#else /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||
|
||||
#define __inline__
|
||||
|
||||
#if !defined(__align__)
|
||||
|
||||
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for '__align__' !!! ---
|
||||
|
||||
#endif /* !__align__ */
|
||||
|
||||
#if !defined(CUDARTAPI)
|
||||
|
||||
#error --- !!! UNKNOWN COMPILER: please provide a CUDA compatible definition for 'CUDARTAPI' !!! ---
|
||||
|
||||
#endif /* !CUDARTAPI */
|
||||
|
||||
#endif /* __GNUC__ || __CUDA_LIBDEVICE__ || __CUDACC_RTC__ */
|
||||
|
||||
#if (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !defined(__clang__)))) || \
|
||||
(defined(_MSC_VER) && _MSC_VER < 1900) || \
|
||||
(!defined(__GNUC__) && !defined(_MSC_VER))
|
||||
|
||||
#define __specialization_static \
|
||||
static
|
||||
|
||||
#else /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||
(_MSC_VER && _MSC_VER < 1900) ||
|
||||
(!__GNUC__ && !_MSC_VER) */
|
||||
|
||||
#define __specialization_static
|
||||
|
||||
#endif /* (__GNUC__ && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3 && !__clang__))) ||
|
||||
(_MSC_VER && _MSC_VER < 1900) ||
|
||||
(!__GNUC__ && !_MSC_VER) */
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__)
|
||||
|
||||
#undef __annotate__
|
||||
#define __annotate__(a)
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDABE__ */
|
||||
|
||||
#define __launch_bounds__(...) \
|
||||
__annotate__(launch_bounds(__VA_ARGS__))
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ */
|
||||
|
||||
#if defined(__CUDACC__) || defined(__CUDABE__) || \
|
||||
defined(__GNUC__) || defined(_WIN64)
|
||||
|
||||
#define __builtin_align__(a) \
|
||||
__align__(a)
|
||||
|
||||
#else /* __CUDACC__ || __CUDABE__ || __GNUC__ || _WIN64 */
|
||||
|
||||
#define __builtin_align__(a)
|
||||
|
||||
#endif /* __CUDACC__ || __CUDABE__ || __GNUC__ || _WIN64 */
|
||||
|
||||
#define __host__ \
|
||||
__location__(host)
|
||||
#define __device__ \
|
||||
__location__(device)
|
||||
#define __global__ \
|
||||
__location__(global)
|
||||
#define __shared__ \
|
||||
__location__(shared)
|
||||
#define __constant__ \
|
||||
__location__(constant)
|
||||
#define __managed__ \
|
||||
__location__(managed)
|
||||
|
||||
#if (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !defined(__CUDACC__)
|
||||
#define __device_builtin__
|
||||
#define __device_builtin_texture_type__
|
||||
#define __device_builtin_surface_type__
|
||||
#define __cudart_builtin__
|
||||
#else /* (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !__CUDACC__ */
|
||||
#define __device_builtin__ \
|
||||
__location__(device_builtin)
|
||||
#define __device_builtin_texture_type__ \
|
||||
__location__(device_builtin_texture_type)
|
||||
#define __device_builtin_surface_type__ \
|
||||
__location__(device_builtin_surface_type)
|
||||
#define __cudart_builtin__ \
|
||||
__location__(cudart_builtin)
|
||||
#endif /* (defined(__CUDABE__) && !defined(__CUDACC_INTEGRATED__)) || !__CUDACC__ */
|
||||
|
||||
#if defined(__CUDACC__) && defined(__clang__)
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#error --- !!! The Clang version does not support __has_feature !!! ---
|
||||
#endif /* !__has_feature */
|
||||
|
||||
#if defined(__cplusplus) && defined(__CUDACC__)
|
||||
#if (__has_feature(cxx_noexcept))
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT noexcept
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT_(x) noexcept(x)
|
||||
#else /* !__has_feature(cxx_noexcept) */
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT throw()
|
||||
#define NV_CLANG_ATOMIC_NOEXCEPT_(x)
|
||||
#endif /* __has_feature(cxx_noexcept) */
|
||||
template <typename T> struct __nv_clang_atomic_t {
|
||||
__nv_clang_atomic_t() NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
__nv_clang_atomic_t(const T &x) NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
operator T() volatile NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
operator T() NV_CLANG_ATOMIC_NOEXCEPT;
|
||||
};
|
||||
#define _Atomic(X) __nv_clang_atomic_t<X>
|
||||
#endif /* defined(__cplusplus) && defined(__CUDACC__) */
|
||||
|
||||
#endif /* __CUDACC__ && __clang__ */
|
||||
|
||||
|
||||
#endif /* !__HOST_DEFINES_H__ */
|
119
include/external/cuda/surface_types.h
vendored
119
include/external/cuda/surface_types.h
vendored
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__SURFACE_TYPES_H__)
|
||||
#define __SURFACE_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_TYPES
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#define cudaSurfaceType1D 0x01
|
||||
#define cudaSurfaceType2D 0x02
|
||||
#define cudaSurfaceType3D 0x03
|
||||
#define cudaSurfaceTypeCubemap 0x0C
|
||||
#define cudaSurfaceType1DLayered 0xF1
|
||||
#define cudaSurfaceType2DLayered 0xF2
|
||||
#define cudaSurfaceTypeCubemapLayered 0xFC
|
||||
|
||||
/**
|
||||
* CUDA Surface boundary modes
|
||||
*/
|
||||
enum __device_builtin__ cudaSurfaceBoundaryMode
|
||||
{
|
||||
cudaBoundaryModeZero = 0, /**< Zero boundary mode */
|
||||
cudaBoundaryModeClamp = 1, /**< Clamp boundary mode */
|
||||
cudaBoundaryModeTrap = 2 /**< Trap boundary mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA Surface format modes
|
||||
*/
|
||||
enum __device_builtin__ cudaSurfaceFormatMode
|
||||
{
|
||||
cudaFormatModeForced = 0, /**< Forced format mode */
|
||||
cudaFormatModeAuto = 1 /**< Auto format mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA Surface reference
|
||||
*/
|
||||
struct __device_builtin__ surfaceReference
|
||||
{
|
||||
/**
|
||||
* Channel descriptor for surface reference
|
||||
*/
|
||||
struct cudaChannelFormatDesc channelDesc;
|
||||
};
|
||||
|
||||
/**
|
||||
* An opaque value that represents a CUDA Surface object
|
||||
*/
|
||||
typedef __device_builtin__ unsigned long long cudaSurfaceObject_t;
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TYPES */
|
||||
|
||||
#endif /* !__SURFACE_TYPES_H__ */
|
213
include/external/cuda/texture_types.h
vendored
213
include/external/cuda/texture_types.h
vendored
@@ -1,213 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__TEXTURE_TYPES_H__)
|
||||
#define __TEXTURE_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "driver_types.h"
|
||||
|
||||
/**
|
||||
* \addtogroup CUDART_TYPES
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#define cudaTextureType1D 0x01
|
||||
#define cudaTextureType2D 0x02
|
||||
#define cudaTextureType3D 0x03
|
||||
#define cudaTextureTypeCubemap 0x0C
|
||||
#define cudaTextureType1DLayered 0xF1
|
||||
#define cudaTextureType2DLayered 0xF2
|
||||
#define cudaTextureTypeCubemapLayered 0xFC
|
||||
|
||||
/**
|
||||
* CUDA texture address modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureAddressMode
|
||||
{
|
||||
cudaAddressModeWrap = 0, /**< Wrapping address mode */
|
||||
cudaAddressModeClamp = 1, /**< Clamp to edge address mode */
|
||||
cudaAddressModeMirror = 2, /**< Mirror address mode */
|
||||
cudaAddressModeBorder = 3 /**< Border address mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture filter modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureFilterMode
|
||||
{
|
||||
cudaFilterModePoint = 0, /**< Point filter mode */
|
||||
cudaFilterModeLinear = 1 /**< Linear filter mode */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture read modes
|
||||
*/
|
||||
enum __device_builtin__ cudaTextureReadMode
|
||||
{
|
||||
cudaReadModeElementType = 0, /**< Read texture as specified element type */
|
||||
cudaReadModeNormalizedFloat = 1 /**< Read texture as normalized float */
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture reference
|
||||
*/
|
||||
struct __device_builtin__ textureReference
|
||||
{
|
||||
/**
|
||||
* Indicates whether texture reads are normalized or not
|
||||
*/
|
||||
int normalized;
|
||||
/**
|
||||
* Texture filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode filterMode;
|
||||
/**
|
||||
* Texture address mode for up to 3 dimensions
|
||||
*/
|
||||
enum cudaTextureAddressMode addressMode[3];
|
||||
/**
|
||||
* Channel descriptor for the texture reference
|
||||
*/
|
||||
struct cudaChannelFormatDesc channelDesc;
|
||||
/**
|
||||
* Perform sRGB->linear conversion during texture read
|
||||
*/
|
||||
int sRGB;
|
||||
/**
|
||||
* Limit to the anisotropy ratio
|
||||
*/
|
||||
unsigned int maxAnisotropy;
|
||||
/**
|
||||
* Mipmap filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode mipmapFilterMode;
|
||||
/**
|
||||
* Offset applied to the supplied mipmap level
|
||||
*/
|
||||
float mipmapLevelBias;
|
||||
/**
|
||||
* Lower end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float minMipmapLevelClamp;
|
||||
/**
|
||||
* Upper end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float maxMipmapLevelClamp;
|
||||
int __cudaReserved[15];
|
||||
};
|
||||
|
||||
/**
|
||||
* CUDA texture descriptor
|
||||
*/
|
||||
struct __device_builtin__ cudaTextureDesc
|
||||
{
|
||||
/**
|
||||
* Texture address mode for up to 3 dimensions
|
||||
*/
|
||||
enum cudaTextureAddressMode addressMode[3];
|
||||
/**
|
||||
* Texture filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode filterMode;
|
||||
/**
|
||||
* Texture read mode
|
||||
*/
|
||||
enum cudaTextureReadMode readMode;
|
||||
/**
|
||||
* Perform sRGB->linear conversion during texture read
|
||||
*/
|
||||
int sRGB;
|
||||
/**
|
||||
* Indicates whether texture reads are normalized or not
|
||||
*/
|
||||
int normalizedCoords;
|
||||
/**
|
||||
* Limit to the anisotropy ratio
|
||||
*/
|
||||
unsigned int maxAnisotropy;
|
||||
/**
|
||||
* Mipmap filter mode
|
||||
*/
|
||||
enum cudaTextureFilterMode mipmapFilterMode;
|
||||
/**
|
||||
* Offset applied to the supplied mipmap level
|
||||
*/
|
||||
float mipmapLevelBias;
|
||||
/**
|
||||
* Lower end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float minMipmapLevelClamp;
|
||||
/**
|
||||
* Upper end of the mipmap level range to clamp access to
|
||||
*/
|
||||
float maxMipmapLevelClamp;
|
||||
};
|
||||
|
||||
/**
|
||||
* An opaque value that represents a CUDA texture object
|
||||
*/
|
||||
typedef __device_builtin__ unsigned long long cudaTextureObject_t;
|
||||
|
||||
/** @} */
|
||||
/** @} */ /* END CUDART_TYPES */
|
||||
|
||||
#endif /* !__TEXTURE_TYPES_H__ */
|
177
include/external/cuda/vector_functions.h
vendored
177
include/external/cuda/vector_functions.h
vendored
@@ -1,177 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_FUNCTIONS_H__)
|
||||
#define __VECTOR_FUNCTIONS_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||
#else /* !__CUDACC_RTC__ */
|
||||
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z);
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w);
|
||||
|
||||
#undef __VECTOR_FUNCTIONS_DECL__
|
||||
|
||||
#if !defined(__CUDACC_RTC__)
|
||||
#include "vector_functions.hpp"
|
||||
#endif /* !__CUDACC_RTC__ */
|
||||
|
||||
#endif /* !__VECTOR_FUNCTIONS_H__ */
|
318
include/external/cuda/vector_functions.hpp
vendored
318
include/external/cuda/vector_functions.hpp
vendored
@@ -1,318 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_FUNCTIONS_HPP__)
|
||||
#define __VECTOR_FUNCTIONS_HPP__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#include "builtin_types.h"
|
||||
#include "host_defines.h"
|
||||
#include "vector_types.h"
|
||||
|
||||
#if defined(__CUDACC_RTC__)
|
||||
#define __VECTOR_FUNCTIONS_DECL__ __host__ __device__
|
||||
#else /* !__CUDACC_RTC__ */
|
||||
#define __VECTOR_FUNCTIONS_DECL__ static __inline__ __host__ __device__
|
||||
#endif /* __CUDACC_RTC__ */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char1 make_char1(signed char x)
|
||||
{
|
||||
char1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar1 make_uchar1(unsigned char x)
|
||||
{
|
||||
uchar1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char2 make_char2(signed char x, signed char y)
|
||||
{
|
||||
char2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar2 make_uchar2(unsigned char x, unsigned char y)
|
||||
{
|
||||
uchar2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char3 make_char3(signed char x, signed char y, signed char z)
|
||||
{
|
||||
char3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z)
|
||||
{
|
||||
uchar3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ char4 make_char4(signed char x, signed char y, signed char z, signed char w)
|
||||
{
|
||||
char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w)
|
||||
{
|
||||
uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short1 make_short1(short x)
|
||||
{
|
||||
short1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort1 make_ushort1(unsigned short x)
|
||||
{
|
||||
ushort1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short2 make_short2(short x, short y)
|
||||
{
|
||||
short2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort2 make_ushort2(unsigned short x, unsigned short y)
|
||||
{
|
||||
ushort2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short3 make_short3(short x,short y, short z)
|
||||
{
|
||||
short3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z)
|
||||
{
|
||||
ushort3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ short4 make_short4(short x, short y, short z, short w)
|
||||
{
|
||||
short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
|
||||
{
|
||||
ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int1 make_int1(int x)
|
||||
{
|
||||
int1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint1 make_uint1(unsigned int x)
|
||||
{
|
||||
uint1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int2 make_int2(int x, int y)
|
||||
{
|
||||
int2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint2 make_uint2(unsigned int x, unsigned int y)
|
||||
{
|
||||
uint2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int3 make_int3(int x, int y, int z)
|
||||
{
|
||||
int3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z)
|
||||
{
|
||||
uint3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ int4 make_int4(int x, int y, int z, int w)
|
||||
{
|
||||
int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
|
||||
{
|
||||
uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long1 make_long1(long int x)
|
||||
{
|
||||
long1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong1 make_ulong1(unsigned long int x)
|
||||
{
|
||||
ulong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long2 make_long2(long int x, long int y)
|
||||
{
|
||||
long2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong2 make_ulong2(unsigned long int x, unsigned long int y)
|
||||
{
|
||||
ulong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long3 make_long3(long int x, long int y, long int z)
|
||||
{
|
||||
long3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z)
|
||||
{
|
||||
ulong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ long4 make_long4(long int x, long int y, long int z, long int w)
|
||||
{
|
||||
long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w)
|
||||
{
|
||||
ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float1 make_float1(float x)
|
||||
{
|
||||
float1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float2 make_float2(float x, float y)
|
||||
{
|
||||
float2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float3 make_float3(float x, float y, float z)
|
||||
{
|
||||
float3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ float4 make_float4(float x, float y, float z, float w)
|
||||
{
|
||||
float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong1 make_longlong1(long long int x)
|
||||
{
|
||||
longlong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong1 make_ulonglong1(unsigned long long int x)
|
||||
{
|
||||
ulonglong1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong2 make_longlong2(long long int x, long long int y)
|
||||
{
|
||||
longlong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y)
|
||||
{
|
||||
ulonglong2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong3 make_longlong3(long long int x, long long int y, long long int z)
|
||||
{
|
||||
longlong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z)
|
||||
{
|
||||
ulonglong3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w)
|
||||
{
|
||||
longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w)
|
||||
{
|
||||
ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double1 make_double1(double x)
|
||||
{
|
||||
double1 t; t.x = x; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double2 make_double2(double x, double y)
|
||||
{
|
||||
double2 t; t.x = x; t.y = y; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double3 make_double3(double x, double y, double z)
|
||||
{
|
||||
double3 t; t.x = x; t.y = y; t.z = z; return t;
|
||||
}
|
||||
|
||||
__VECTOR_FUNCTIONS_DECL__ double4 make_double4(double x, double y, double z, double w)
|
||||
{
|
||||
double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
|
||||
}
|
||||
|
||||
#undef __VECTOR_FUNCTIONS_DECL__
|
||||
|
||||
#endif /* !__VECTOR_FUNCTIONS_HPP__ */
|
||||
|
431
include/external/cuda/vector_types.h
vendored
431
include/external/cuda/vector_types.h
vendored
@@ -1,431 +0,0 @@
|
||||
/*
|
||||
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* NOTICE TO LICENSEE:
|
||||
*
|
||||
* This source code and/or documentation ("Licensed Deliverables") are
|
||||
* subject to NVIDIA intellectual property rights under U.S. and
|
||||
* international Copyright laws.
|
||||
*
|
||||
* These Licensed Deliverables contained herein is PROPRIETARY and
|
||||
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
||||
* conditions of a form of NVIDIA software license agreement by and
|
||||
* between NVIDIA and Licensee ("License Agreement") or electronically
|
||||
* accepted by Licensee. Notwithstanding any terms or conditions to
|
||||
* the contrary in the License Agreement, reproduction or disclosure
|
||||
* of the Licensed Deliverables to any third party without the express
|
||||
* written consent of NVIDIA is prohibited.
|
||||
*
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
||||
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
||||
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
||||
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
||||
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
||||
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
||||
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THESE LICENSED DELIVERABLES.
|
||||
*
|
||||
* U.S. Government End Users. These Licensed Deliverables are a
|
||||
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
||||
* 1995), consisting of "commercial computer software" and "commercial
|
||||
* computer software documentation" as such terms are used in 48
|
||||
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
||||
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
||||
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
||||
* U.S. Government End Users acquire the Licensed Deliverables with
|
||||
* only those rights set forth herein.
|
||||
*
|
||||
* Any use of the Licensed Deliverables in individual and commercial
|
||||
* software must include, in the user documentation and internal
|
||||
* comments to the code, the above Disclaimer and U.S. Government End
|
||||
* Users Notice.
|
||||
*/
|
||||
|
||||
#if !defined(__VECTOR_TYPES_H__)
|
||||
#define __VECTOR_TYPES_H__
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if !defined(__CUDA_LIBDEVICE__) && !defined(__CUDACC_RTC__)
|
||||
#define EXCLUDE_FROM_RTC
|
||||
#include "builtin_types.h"
|
||||
#undef EXCLUDE_FROM_RTC
|
||||
#endif /* !__CUDA_LIBDEVICE__ && !__CUDACC_RTC__ */
|
||||
#include "host_defines.h"
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDACC_RTC__) && !defined(__CUDABE__) && \
|
||||
defined(_WIN32) && !defined(_WIN64)
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4201 4408)
|
||||
|
||||
#define __cuda_builtin_vector_align8(tag, members) \
|
||||
struct __device_builtin__ tag \
|
||||
{ \
|
||||
union \
|
||||
{ \
|
||||
struct { members }; \
|
||||
struct { long long int :1,:0; }; \
|
||||
}; \
|
||||
}
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDACC_RTC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
#define __cuda_builtin_vector_align8(tag, members) \
|
||||
struct __device_builtin__ __align__(8) tag \
|
||||
{ \
|
||||
members \
|
||||
}
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDACC_RTC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
struct __device_builtin__ char1
|
||||
{
|
||||
signed char x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uchar1
|
||||
{
|
||||
unsigned char x;
|
||||
};
|
||||
|
||||
|
||||
struct __device_builtin__ __align__(2) char2
|
||||
{
|
||||
signed char x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(2) uchar2
|
||||
{
|
||||
unsigned char x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ char3
|
||||
{
|
||||
signed char x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uchar3
|
||||
{
|
||||
unsigned char x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) char4
|
||||
{
|
||||
signed char x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) uchar4
|
||||
{
|
||||
unsigned char x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ short1
|
||||
{
|
||||
short x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ushort1
|
||||
{
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) short2
|
||||
{
|
||||
short x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(4) ushort2
|
||||
{
|
||||
unsigned short x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ short3
|
||||
{
|
||||
short x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ushort3
|
||||
{
|
||||
unsigned short x, y, z;
|
||||
};
|
||||
|
||||
__cuda_builtin_vector_align8(short4, short x; short y; short z; short w;);
|
||||
__cuda_builtin_vector_align8(ushort4, unsigned short x; unsigned short y; unsigned short z; unsigned short w;);
|
||||
|
||||
struct __device_builtin__ int1
|
||||
{
|
||||
int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uint1
|
||||
{
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
__cuda_builtin_vector_align8(int2, int x; int y;);
|
||||
__cuda_builtin_vector_align8(uint2, unsigned int x; unsigned int y;);
|
||||
|
||||
struct __device_builtin__ int3
|
||||
{
|
||||
int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ uint3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) int4
|
||||
{
|
||||
int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) uint4
|
||||
{
|
||||
unsigned int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ long1
|
||||
{
|
||||
long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulong1
|
||||
{
|
||||
unsigned long x;
|
||||
};
|
||||
|
||||
#if defined(__CUDACC_RTC__) || defined(_WIN32)
|
||||
__cuda_builtin_vector_align8(long2, long int x; long int y;);
|
||||
__cuda_builtin_vector_align8(ulong2, unsigned long int x; unsigned long int y;);
|
||||
#else /* __CUDACC_RTC__ || _WIN32 */
|
||||
|
||||
struct __device_builtin__ __align__(2*sizeof(long int)) long2
|
||||
{
|
||||
long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __align__(2*sizeof(unsigned long int)) ulong2
|
||||
{
|
||||
unsigned long int x, y;
|
||||
};
|
||||
|
||||
#endif /* __CUDACC_RTC__ || _WIN32 */
|
||||
|
||||
struct __device_builtin__ long3
|
||||
{
|
||||
long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulong3
|
||||
{
|
||||
unsigned long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) long4
|
||||
{
|
||||
long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulong4
|
||||
{
|
||||
unsigned long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ float1
|
||||
{
|
||||
float x;
|
||||
};
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__) && defined(__arm__) && \
|
||||
defined(__ARM_PCS_VFP) && __GNUC__ == 4 && __GNUC_MINOR__ == 6
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-pedantic"
|
||||
|
||||
struct __device_builtin__ __attribute__((aligned(8))) float2
|
||||
{
|
||||
float x; float y; float __cuda_gnu_arm_ice_workaround[0];
|
||||
};
|
||||
|
||||
#pragma GCC poison __cuda_gnu_arm_ice_workaround
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#else /* !__CUDACC__ && !__CUDABE__ && __arm__ && __ARM_PCS_VFP &&
|
||||
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||
|
||||
__cuda_builtin_vector_align8(float2, float x; float y;);
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ && __arm__ && __ARM_PCS_VFP &&
|
||||
__GNUC__ == 4&& __GNUC_MINOR__ == 6 */
|
||||
|
||||
struct __device_builtin__ float3
|
||||
{
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) float4
|
||||
{
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ longlong1
|
||||
{
|
||||
long long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulonglong1
|
||||
{
|
||||
unsigned long long int x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) longlong2
|
||||
{
|
||||
long long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulonglong2
|
||||
{
|
||||
unsigned long long int x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ longlong3
|
||||
{
|
||||
long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ ulonglong3
|
||||
{
|
||||
unsigned long long int x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) longlong4
|
||||
{
|
||||
long long int x, y, z ,w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) ulonglong4
|
||||
{
|
||||
unsigned long long int x, y, z, w;
|
||||
};
|
||||
|
||||
struct __device_builtin__ double1
|
||||
{
|
||||
double x;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) double2
|
||||
{
|
||||
double x, y;
|
||||
};
|
||||
|
||||
struct __device_builtin__ double3
|
||||
{
|
||||
double x, y, z;
|
||||
};
|
||||
|
||||
struct __device_builtin__ __builtin_align__(16) double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__CUDABE__) && \
|
||||
defined(_WIN32) && !defined(_WIN64)
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
#endif /* !__CUDACC__ && !__CUDABE__ && _WIN32 && !_WIN64 */
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
typedef __device_builtin__ struct char1 char1;
|
||||
typedef __device_builtin__ struct uchar1 uchar1;
|
||||
typedef __device_builtin__ struct char2 char2;
|
||||
typedef __device_builtin__ struct uchar2 uchar2;
|
||||
typedef __device_builtin__ struct char3 char3;
|
||||
typedef __device_builtin__ struct uchar3 uchar3;
|
||||
typedef __device_builtin__ struct char4 char4;
|
||||
typedef __device_builtin__ struct uchar4 uchar4;
|
||||
typedef __device_builtin__ struct short1 short1;
|
||||
typedef __device_builtin__ struct ushort1 ushort1;
|
||||
typedef __device_builtin__ struct short2 short2;
|
||||
typedef __device_builtin__ struct ushort2 ushort2;
|
||||
typedef __device_builtin__ struct short3 short3;
|
||||
typedef __device_builtin__ struct ushort3 ushort3;
|
||||
typedef __device_builtin__ struct short4 short4;
|
||||
typedef __device_builtin__ struct ushort4 ushort4;
|
||||
typedef __device_builtin__ struct int1 int1;
|
||||
typedef __device_builtin__ struct uint1 uint1;
|
||||
typedef __device_builtin__ struct int2 int2;
|
||||
typedef __device_builtin__ struct uint2 uint2;
|
||||
typedef __device_builtin__ struct int3 int3;
|
||||
typedef __device_builtin__ struct uint3 uint3;
|
||||
typedef __device_builtin__ struct int4 int4;
|
||||
typedef __device_builtin__ struct uint4 uint4;
|
||||
typedef __device_builtin__ struct long1 long1;
|
||||
typedef __device_builtin__ struct ulong1 ulong1;
|
||||
typedef __device_builtin__ struct long2 long2;
|
||||
typedef __device_builtin__ struct ulong2 ulong2;
|
||||
typedef __device_builtin__ struct long3 long3;
|
||||
typedef __device_builtin__ struct ulong3 ulong3;
|
||||
typedef __device_builtin__ struct long4 long4;
|
||||
typedef __device_builtin__ struct ulong4 ulong4;
|
||||
typedef __device_builtin__ struct float1 float1;
|
||||
typedef __device_builtin__ struct float2 float2;
|
||||
typedef __device_builtin__ struct float3 float3;
|
||||
typedef __device_builtin__ struct float4 float4;
|
||||
typedef __device_builtin__ struct longlong1 longlong1;
|
||||
typedef __device_builtin__ struct ulonglong1 ulonglong1;
|
||||
typedef __device_builtin__ struct longlong2 longlong2;
|
||||
typedef __device_builtin__ struct ulonglong2 ulonglong2;
|
||||
typedef __device_builtin__ struct longlong3 longlong3;
|
||||
typedef __device_builtin__ struct ulonglong3 ulonglong3;
|
||||
typedef __device_builtin__ struct longlong4 longlong4;
|
||||
typedef __device_builtin__ struct ulonglong4 ulonglong4;
|
||||
typedef __device_builtin__ struct double1 double1;
|
||||
typedef __device_builtin__ struct double2 double2;
|
||||
typedef __device_builtin__ struct double3 double3;
|
||||
typedef __device_builtin__ struct double4 double4;
|
||||
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
struct __device_builtin__ dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
#if defined(__cplusplus)
|
||||
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
|
||||
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
|
||||
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
|
||||
#endif /* __cplusplus */
|
||||
};
|
||||
|
||||
typedef __device_builtin__ struct dim3 dim3;
|
||||
|
||||
#undef __cuda_builtin_vector_align8
|
||||
|
||||
#endif /* !__VECTOR_TYPES_H__ */
|
87
include/isaac/api.h
Normal file
87
include/isaac/api.h
Normal file
@@ -0,0 +1,87 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isaac/runtime/predict.h"
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/driver/cublas.h"
|
||||
#include "isaac/driver/context.h"
|
||||
#include "isaac/driver/kernel.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "isaac/driver/stream.h"
|
||||
#include "isaac/tools/collections.hpp"
|
||||
#include "isaac/templates/conv.h"
|
||||
#include "isaac/templates/gemm.h"
|
||||
|
||||
namespace isaac{
|
||||
|
||||
void GEMM(driver::Device const & device, driver::Stream & stream,
|
||||
DType dtype, IsaacOperation_t AT, IsaacOperation_t BT, param_t M, param_t N, param_t K,
|
||||
size_t offa, size_t lda, size_t offb, size_t ldb, size_t offc, size_t ldc,
|
||||
scalar const & alpha, driver::Buffer const & A, driver::Buffer const & B, scalar const & beta, driver::Buffer& C)
|
||||
{
|
||||
typedef std::tuple<driver::Stream, DType, IsaacOperation_t, IsaacOperation_t,
|
||||
param_t, param_t, param_t, size_t, size_t, size_t, size_t, size_t, size_t> key_type;
|
||||
typedef std::pair<std::shared_ptr<templates::GEMM>, std::shared_ptr<driver::Kernel>> value_type;
|
||||
|
||||
static std::function<value_type()> compile = [&](){
|
||||
//Fetch profile
|
||||
runtime::GEMMProfile* profile = (runtime::GEMMProfile*)runtime::database.at({device.architecture(), runtime::GEMM}).get();
|
||||
templates::GEMM generator = profile->predict(device, dtype, AT, BT, M, N, K, offa, lda, offb, ldb, offc, ldc);
|
||||
//Execute
|
||||
std::string src = generator.dump(device, "gemm");
|
||||
driver::Module module(stream.context(), src);
|
||||
return value_type(std::make_shared<templates::GEMM>(generator), std::make_shared<driver::Kernel>(module, "gemm"));
|
||||
};
|
||||
static cpp::CachedMap<key_type, value_type> cache(compile);
|
||||
|
||||
//Retrieve profile/kernel and execute
|
||||
value_type const & value = cache.get(key_type(stream, dtype, AT, BT, M, N, K, offa, lda, offb, ldb, offc, ldc));
|
||||
value.first->enqueue(*value.second, stream, alpha, A, B, beta, C);
|
||||
}
|
||||
|
||||
void CONV(driver::Device const & device, driver::Stream & stream,
|
||||
DType dtype, param_t N, param_t K, param_t P, param_t Q, param_t C, param_t R, param_t S,
|
||||
param_t H, param_t W, param_t pad_h, param_t pad_w, param_t stride_h, param_t stride_w,
|
||||
scalar const & alpha, driver::Buffer const & I, driver::Buffer const & F, scalar const & beta, driver::Buffer& O)
|
||||
{
|
||||
typedef std::tuple<driver::Stream, DType, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t, param_t> key_type;
|
||||
typedef std::pair<std::shared_ptr<templates::Conv>, std::shared_ptr<driver::Kernel>> value_type;
|
||||
|
||||
static std::function<value_type()> compile = [&](){
|
||||
//Fetch profile
|
||||
runtime::ConvProfile* profile = (runtime::ConvProfile*)runtime::database.at({device.architecture(), runtime::CONV}).get();
|
||||
templates::Conv generator = profile->predict(device, dtype, C, H, W, N, K, P, Q, R, S, pad_h, pad_w, stride_h, stride_w);
|
||||
//Execute
|
||||
std::string src = generator.dump(device, "fconv");
|
||||
driver::Module module(stream.context(), src);
|
||||
return value_type(std::make_shared<templates::Conv>(generator), std::make_shared<driver::Kernel>(module, "fconv"));
|
||||
};
|
||||
static cpp::CachedMap<key_type, value_type> cache(compile);
|
||||
|
||||
//Retrieve profile/kernel and execute
|
||||
value_type const & value = cache.get(key_type(stream, dtype, N, K, P, Q, C, R, S, pad_h, pad_w, stride_h, stride_w));
|
||||
value.first->enqueue(*value.second, stream, alpha, I, F, beta, O);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@@ -1,337 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_ARRAY_H_
|
||||
#define ISAAC_ARRAY_H_
|
||||
|
||||
#include <iostream>
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
#include "isaac/runtime/handler.h"
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/tools/cpp/tuple.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class scalar;
|
||||
class view;
|
||||
|
||||
class ISAACAPI array_base
|
||||
{
|
||||
int_t dsize();
|
||||
public:
|
||||
//1D Constructors
|
||||
explicit array_base(int_t size1, numeric_type dtype = FLOAT_TYPE, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(int_t size1, numeric_type dtype, driver::Buffer data, int_t start, int_t inc);
|
||||
template<typename DT>
|
||||
array_base(std::vector<DT> const & data, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(array_base & v, slice const & s1);
|
||||
|
||||
//2D Constructors
|
||||
array_base(int_t size1, int_t size2, numeric_type dtype = FLOAT_TYPE, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(int_t size1, int_t size2, numeric_type dtype, driver::Buffer data, int_t start, int_t ld);
|
||||
template<typename DT>
|
||||
array_base(int_t size1, int_t size2, std::vector<DT> const & data, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(array_base & M, slice const & s1, slice const & s2);
|
||||
|
||||
//3D Constructors
|
||||
array_base(int_t size1, int_t size2, int_t size3, numeric_type dtype = FLOAT_TYPE, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
|
||||
//General constructor
|
||||
template<typename DT>
|
||||
array_base(tuple const & shape, std::vector<DT> const & data, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(tuple const & shape, numeric_type dtype, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(tuple const & shape, numeric_type dtype, int_t start, tuple const & stride, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
array_base(tuple const & shape, numeric_type dtype, int_t start, tuple const & stride, driver::Buffer const & data);
|
||||
explicit array_base(runtime::execution_handler const &);
|
||||
|
||||
//Make the class virtual
|
||||
virtual ~array_base() = 0;
|
||||
|
||||
//Getters
|
||||
numeric_type dtype() const;
|
||||
tuple const & shape() const;
|
||||
size_t dim() const;
|
||||
int_t start() const;
|
||||
tuple const & stride() const;
|
||||
driver::Context const & context() const;
|
||||
driver::Buffer const & data() const;
|
||||
driver::Buffer & data();
|
||||
|
||||
//Setters
|
||||
array_base& resize(int_t size1, int_t size2=1);
|
||||
|
||||
//Numeric operators
|
||||
array_base& operator=(array_base const &);
|
||||
array_base& operator=(expression_tree const &);
|
||||
array_base& operator=(runtime::execution_handler const &);
|
||||
template<class T>
|
||||
array_base & operator=(std::vector<T> const & rhs);
|
||||
array_base & operator=(value_scalar const & rhs);
|
||||
|
||||
expression_tree operator-();
|
||||
expression_tree operator!();
|
||||
|
||||
array_base& operator+=(value_scalar const &);
|
||||
array_base& operator+=(array_base const &);
|
||||
array_base& operator+=(expression_tree const &);
|
||||
array_base& operator-=(value_scalar const &);
|
||||
array_base& operator-=(array_base const &);
|
||||
array_base& operator-=(expression_tree const &);
|
||||
array_base& operator*=(value_scalar const &);
|
||||
array_base& operator*=(array_base const &);
|
||||
array_base& operator*=(expression_tree const &);
|
||||
array_base& operator/=(value_scalar const &);
|
||||
array_base& operator/=(array_base const &);
|
||||
array_base& operator/=(expression_tree const &);
|
||||
|
||||
//Indexing (1D)
|
||||
const scalar operator[](int_t) const;
|
||||
scalar operator[](int_t);
|
||||
view operator[](slice const &);
|
||||
|
||||
//Indexing (2D)
|
||||
view operator()(int_t, int_t);
|
||||
view operator()(slice const &, int_t);
|
||||
view operator()(int_t, slice const &);
|
||||
view operator()(slice const &, slice const &);
|
||||
const view operator()(int_t, int_t) const;
|
||||
const view operator()(slice const &, int_t) const;
|
||||
const view operator()(int_t, slice const &) const;
|
||||
const view operator()(slice const &, slice const &) const;
|
||||
|
||||
|
||||
protected:
|
||||
numeric_type dtype_;
|
||||
|
||||
tuple shape_;
|
||||
int_t start_;
|
||||
tuple stride_;
|
||||
|
||||
driver::Context context_;
|
||||
driver::Buffer data_;
|
||||
|
||||
public:
|
||||
const expression_tree T;
|
||||
};
|
||||
|
||||
class ISAACAPI array : public array_base
|
||||
{
|
||||
public:
|
||||
using array_base::array_base;
|
||||
//Copy Constructor
|
||||
array(array_base const &);
|
||||
array(array const &);
|
||||
array(expression_tree const & proxy);
|
||||
using array_base::operator=;
|
||||
};
|
||||
|
||||
class ISAACAPI view : public array_base
|
||||
{
|
||||
public:
|
||||
view(array_base & data);
|
||||
view(array_base& data, slice const & s1);
|
||||
view(array_base& data, slice const & s1, slice const & s2);
|
||||
view(int_t size1, numeric_type dtype, driver::Buffer data, int_t start, int_t inc);
|
||||
using array_base::operator=;
|
||||
};
|
||||
|
||||
class ISAACAPI scalar : public array_base
|
||||
{
|
||||
friend value_scalar::value_scalar(const scalar &);
|
||||
friend value_scalar::value_scalar(const expression_tree &);
|
||||
private:
|
||||
void inject(values_holder&) const;
|
||||
template<class T> T cast() const;
|
||||
public:
|
||||
explicit scalar(numeric_type dtype, const driver::Buffer &data, int_t offset);
|
||||
explicit scalar(value_scalar value, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
explicit scalar(numeric_type dtype, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
scalar(expression_tree const & proxy);
|
||||
scalar& operator=(value_scalar const &);
|
||||
// scalar& operator=(scalar const & s);
|
||||
using array_base::operator =;
|
||||
|
||||
#define INSTANTIATE(type) operator type() const;
|
||||
INSTANTIATE(char)
|
||||
INSTANTIATE(unsigned char)
|
||||
INSTANTIATE(short)
|
||||
INSTANTIATE(unsigned short)
|
||||
INSTANTIATE(int)
|
||||
INSTANTIATE(unsigned int)
|
||||
INSTANTIATE(long)
|
||||
INSTANTIATE(unsigned long)
|
||||
INSTANTIATE(long long)
|
||||
INSTANTIATE(unsigned long long)
|
||||
INSTANTIATE(float)
|
||||
INSTANTIATE(double)
|
||||
#undef INSTANTIATE
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
//copy
|
||||
ISAACAPI void copy(void const * data, array_base & gx, driver::CommandQueue & queue, bool blocking = true);
|
||||
ISAACAPI void copy(array_base const & gx, void* data, driver::CommandQueue & queue, bool blocking = true);
|
||||
ISAACAPI void copy(void const *data, array_base &gx, bool blocking = true);
|
||||
ISAACAPI void copy(array_base const & gx, void* data, bool blocking = true);
|
||||
template<class T> ISAACAPI void copy(std::vector<T> const & cA, array_base& gA, driver::CommandQueue & queue, bool blocking = true);
|
||||
template<class T> ISAACAPI void copy(array_base const & gA, std::vector<T> & cA, driver::CommandQueue & queue, bool blocking = true);
|
||||
template<class T> ISAACAPI void copy(std::vector<T> const & cA, array_base & gA, bool blocking = true);
|
||||
template<class T> ISAACAPI void copy(array_base const & gA, std::vector<T> & cA, bool blocking = true);
|
||||
|
||||
//Operators
|
||||
//Binary operators
|
||||
|
||||
#define ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(OPNAME) \
|
||||
ISAACAPI expression_tree OPNAME (array_base const & x, expression_tree const & y);\
|
||||
ISAACAPI expression_tree OPNAME (array_base const & x, value_scalar const & y);\
|
||||
ISAACAPI expression_tree OPNAME (array_base const & x, array_base const & y);\
|
||||
\
|
||||
ISAACAPI expression_tree OPNAME (expression_tree const & x, expression_tree const & y);\
|
||||
ISAACAPI expression_tree OPNAME (expression_tree const & x, value_scalar const & y);\
|
||||
ISAACAPI expression_tree OPNAME (expression_tree const & x, array_base const & y);\
|
||||
\
|
||||
ISAACAPI expression_tree OPNAME (value_scalar const & y, expression_tree const & x);\
|
||||
ISAACAPI expression_tree OPNAME (value_scalar const & y, array_base const & x);\
|
||||
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator +)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator -)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator *)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator /)
|
||||
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator >)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator >=)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator <)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator <=)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator ==)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(operator !=)
|
||||
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(maximum)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(minimum)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(pow)
|
||||
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(dot)
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(outer)
|
||||
|
||||
ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR(assign)
|
||||
|
||||
#undef ISAAC_DECLARE_ELEMENT_BINARY_OPERATOR
|
||||
|
||||
#define ISAAC_DECLARE_ROT(LTYPE, RTYPE, CTYPE, STYPE) \
|
||||
expression_tree rot(LTYPE const & x, RTYPE const & y, CTYPE const & c, STYPE const & s);
|
||||
|
||||
ISAAC_DECLARE_ROT(array_base, array_base, scalar, scalar)
|
||||
ISAAC_DECLARE_ROT(expression_tree, array_base, scalar, scalar)
|
||||
ISAAC_DECLARE_ROT(array_base, expression_tree, scalar, scalar)
|
||||
ISAAC_DECLARE_ROT(expression_tree, expression_tree, scalar, scalar)
|
||||
|
||||
ISAAC_DECLARE_ROT(array_base, array_base, value_scalar, value_scalar)
|
||||
ISAAC_DECLARE_ROT(expression_tree, array_base, value_scalar, value_scalar)
|
||||
ISAAC_DECLARE_ROT(array_base, expression_tree, value_scalar, value_scalar)
|
||||
ISAAC_DECLARE_ROT(expression_tree, expression_tree, value_scalar, value_scalar)
|
||||
|
||||
ISAAC_DECLARE_ROT(array_base, array_base, expression_tree, expression_tree)
|
||||
ISAAC_DECLARE_ROT(expression_tree, array_base, expression_tree, expression_tree)
|
||||
ISAAC_DECLARE_ROT(array_base, expression_tree, expression_tree, expression_tree)
|
||||
ISAAC_DECLARE_ROT(expression_tree, expression_tree, expression_tree, expression_tree)
|
||||
//--------------------------------
|
||||
|
||||
|
||||
//Unary operators
|
||||
#define ISAAC_DECLARE_UNARY_OPERATOR(OPNAME) \
|
||||
ISAACAPI expression_tree OPNAME (array_base const & x);\
|
||||
ISAACAPI expression_tree OPNAME (expression_tree const & x);
|
||||
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(abs)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(acos)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(asin)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(atan)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(ceil)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(cos)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(cosh)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(exp)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(floor)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(log)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(log10)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(sin)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(sinh)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(sqrt)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(tan)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(tanh)
|
||||
ISAAC_DECLARE_UNARY_OPERATOR(trans)
|
||||
#undef ISAAC_DECLARE_UNARY_OPERATOR
|
||||
|
||||
ISAACAPI expression_tree cast(array_base const &, numeric_type dtype);
|
||||
ISAACAPI expression_tree cast(expression_tree const &, numeric_type dtype);
|
||||
|
||||
//Matrix reduction
|
||||
|
||||
#define ISAAC_DECLARE_REDUCTION(OPNAME) \
|
||||
ISAACAPI expression_tree OPNAME(array_base const & M, int_t axis = -1);\
|
||||
ISAACAPI expression_tree OPNAME(expression_tree const & M, int_t axis = -1);
|
||||
|
||||
ISAAC_DECLARE_REDUCTION(sum)
|
||||
ISAAC_DECLARE_REDUCTION(argmax)
|
||||
ISAAC_DECLARE_REDUCTION((max))
|
||||
ISAAC_DECLARE_REDUCTION((min))
|
||||
ISAAC_DECLARE_REDUCTION(argmin)
|
||||
|
||||
//Shortcuts
|
||||
|
||||
ISAACAPI expression_tree norm(array_base const &, unsigned int order = 2, int_t axis = -1);
|
||||
ISAACAPI expression_tree norm(expression_tree const &, unsigned int order = 2, int_t axis = -1);
|
||||
|
||||
ISAACAPI expression_tree mean(array_base const &, int_t axis = -1);
|
||||
ISAACAPI expression_tree mean(expression_tree const &, int_t axis = -1);
|
||||
|
||||
//ISAACAPI expression_tree var(array_base const &, int_t axis = -1);
|
||||
//ISAACAPI expression_tree var(expression_tree const &, int_t axis = -1);
|
||||
|
||||
//Fusion
|
||||
ISAACAPI expression_tree fuse(expression_tree const & x, expression_tree const & y);
|
||||
|
||||
//Initializers
|
||||
ISAACAPI expression_tree eye(int_t, int_t, isaac::numeric_type, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
ISAACAPI expression_tree zeros(tuple const & shape, numeric_type dtype, driver::Context const & context = driver::backend::contexts::get_default());
|
||||
|
||||
//Swap
|
||||
ISAACAPI void swap(view x, view y);
|
||||
|
||||
//Reshape
|
||||
ISAACAPI expression_tree reshape(array_base const &, tuple const &);
|
||||
ISAACAPI expression_tree reshape(expression_tree const &, tuple const &);
|
||||
|
||||
ISAACAPI expression_tree ravel(array_base const &);
|
||||
ISAACAPI expression_tree ravel(expression_tree const & x);
|
||||
|
||||
//Diag
|
||||
array diag(array_base & x, int offset = 0);
|
||||
|
||||
//
|
||||
ISAACAPI std::ostream& operator<<(std::ostream &, array_base const &);
|
||||
ISAACAPI std::ostream& operator<<(std::ostream &, expression_tree const &);
|
||||
}
|
||||
#endif
|
@@ -1,63 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_COMMON_EXPRESSION_TYPE_H
|
||||
#define ISAAC_COMMON_EXPRESSION_TYPE_H
|
||||
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
enum expression_type
|
||||
{
|
||||
INVALID_EXPRESSION_TYPE,
|
||||
ELEMENTWISE_1D,
|
||||
ELEMENTWISE_2D,
|
||||
REDUCE_1D,
|
||||
REDUCE_2D_ROWS,
|
||||
REDUCE_2D_COLS,
|
||||
GEMM_NN,
|
||||
GEMM_TN,
|
||||
GEMM_NT,
|
||||
GEMM_TT
|
||||
};
|
||||
|
||||
inline expression_type expression_type_from_string(std::string const & name)
|
||||
{
|
||||
if(name=="elementwise_1d") return ELEMENTWISE_1D;
|
||||
if(name=="reduce_1d") return REDUCE_1D;
|
||||
if(name=="elementwise_2d") return ELEMENTWISE_2D;
|
||||
if(name=="reduce_2d_rows") return REDUCE_2D_ROWS;
|
||||
if(name=="reduce_2d_cols") return REDUCE_2D_COLS;
|
||||
if(name=="gemm_nn") return GEMM_NN;
|
||||
if(name=="gemm_nt") return GEMM_NT;
|
||||
if(name=="gemm_tn") return GEMM_TN;
|
||||
if(name=="gemm_tt") return GEMM_TT;
|
||||
throw std::invalid_argument("Unrecognized expression: " + name);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,144 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_COMMON_NUMERIC_TYPE_H
|
||||
#define ISAAC_COMMON_NUMERIC_TYPE_H
|
||||
|
||||
#include <stdexcept>
|
||||
#include "isaac/exception/api.h"
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class half{
|
||||
/* It is a incompleted class for compiling using*/
|
||||
public:
|
||||
half() {};
|
||||
};
|
||||
enum numeric_type
|
||||
{
|
||||
INVALID_NUMERIC_TYPE = 0,
|
||||
// BOOL_TYPE,
|
||||
CHAR_TYPE,
|
||||
UCHAR_TYPE,
|
||||
SHORT_TYPE,
|
||||
USHORT_TYPE,
|
||||
INT_TYPE,
|
||||
UINT_TYPE,
|
||||
LONG_TYPE,
|
||||
ULONG_TYPE,
|
||||
HALF_TYPE,
|
||||
FLOAT_TYPE,
|
||||
DOUBLE_TYPE
|
||||
};
|
||||
|
||||
inline std::string to_string(numeric_type const & type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
// case BOOL_TYPE: return "bool";
|
||||
case CHAR_TYPE: return "char";
|
||||
case UCHAR_TYPE: return "uchar";
|
||||
case SHORT_TYPE: return "short";
|
||||
case USHORT_TYPE: return "ushort";
|
||||
case INT_TYPE: return "int";
|
||||
case UINT_TYPE: return "uint";
|
||||
case LONG_TYPE: return "long";
|
||||
case ULONG_TYPE: return "ulong";
|
||||
case HALF_TYPE : return "half";
|
||||
case FLOAT_TYPE : return "float";
|
||||
case DOUBLE_TYPE : return "double";
|
||||
default : throw unknown_datatype(type);
|
||||
}
|
||||
}
|
||||
|
||||
inline numeric_type numeric_type_from_string(std::string const & name)
|
||||
{
|
||||
if(name=="float16") return HALF_TYPE;
|
||||
if(name=="float32") return FLOAT_TYPE;
|
||||
if(name=="float64") return DOUBLE_TYPE;
|
||||
throw std::invalid_argument("Invalid datatype: " + name);
|
||||
}
|
||||
|
||||
inline unsigned int size_of(numeric_type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
// case BOOL_TYPE:
|
||||
case UCHAR_TYPE:
|
||||
case CHAR_TYPE: return 1;
|
||||
|
||||
case HALF_TYPE:
|
||||
case USHORT_TYPE:
|
||||
case SHORT_TYPE: return 2;
|
||||
|
||||
case UINT_TYPE:
|
||||
case INT_TYPE:
|
||||
case FLOAT_TYPE: return 4;
|
||||
|
||||
case ULONG_TYPE:
|
||||
case LONG_TYPE:
|
||||
case DOUBLE_TYPE: return 8;
|
||||
|
||||
default: throw unknown_datatype(type);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t size, bool is_unsigned>
|
||||
struct to_int_numeric_type_impl;
|
||||
|
||||
#define ISAAC_INSTANTIATE_INT_TYPE_IMPL(SIZE, IS_UNSIGNED, TYPE) \
|
||||
template<> struct to_int_numeric_type_impl<SIZE, IS_UNSIGNED> { static const numeric_type value = TYPE; }
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(1, false, CHAR_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(2, false, SHORT_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(4, false, INT_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(8, false, LONG_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(1, true, UCHAR_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(2, true, USHORT_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(4, true, UINT_TYPE);
|
||||
ISAAC_INSTANTIATE_INT_TYPE_IMPL(8, true, ULONG_TYPE);
|
||||
#undef ISAAC_INSTANTIATE_INT_TYPE_IMPL
|
||||
|
||||
template<class T>
|
||||
struct to_int_numeric_type
|
||||
{
|
||||
static const numeric_type value = to_int_numeric_type_impl<sizeof(T), std::is_unsigned<T>::value>::value;
|
||||
};
|
||||
|
||||
template<class T> struct to_numeric_type { static const numeric_type value = to_int_numeric_type<T>::value; };
|
||||
|
||||
template<> struct to_numeric_type<char> { static const numeric_type value = CHAR_TYPE; };
|
||||
template<> struct to_numeric_type<unsigned char> { static const numeric_type value = UCHAR_TYPE ; };
|
||||
template<> struct to_numeric_type<short> { static const numeric_type value = SHORT_TYPE ; };
|
||||
template<> struct to_numeric_type<unsigned short> { static const numeric_type value = USHORT_TYPE ; };
|
||||
template<> struct to_numeric_type<int> { static const numeric_type value = INT_TYPE ; };
|
||||
template<> struct to_numeric_type<unsigned int> { static const numeric_type value = UINT_TYPE ; };
|
||||
template<> struct to_numeric_type<long> { static const numeric_type value = LONG_TYPE ; };
|
||||
template<> struct to_numeric_type<unsigned long> { static const numeric_type value = ULONG_TYPE ; };
|
||||
template<> struct to_numeric_type<half> { static const numeric_type value = HALF_TYPE; };
|
||||
template<> struct to_numeric_type<float> { static const numeric_type value = FLOAT_TYPE; };
|
||||
template<> struct to_numeric_type<double> { static const numeric_type value = DOUBLE_TYPE; };
|
||||
|
||||
template<class T> typename std::enable_if<std::is_arithmetic<T>::value, numeric_type>::type numeric_type_of(T) { return to_numeric_type<T>::value; }
|
||||
template<class T> typename std::enable_if<!std::is_arithmetic<T>::value, numeric_type>::type numeric_type_of(T const & x) { return x.dtype(); }
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,49 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_DEFINES_H
|
||||
#define ISAAC_DEFINES_H
|
||||
|
||||
#if defined(_WIN32) || defined(_MSC_VER)
|
||||
#ifdef ISAAC_DLL
|
||||
#define ISAACAPI __declspec(dllexport)
|
||||
#else
|
||||
#define ISAACAPI __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define ISAACAPI __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_MSC_VER)
|
||||
#define DISABLE_MSVC_WARNING_C4251 __pragma(warning(disable: 4251))
|
||||
#define RESTORE_MSVC_WARNING_C4251 __pragma(warning(default: 4251))
|
||||
#define DISABLE_MSVC_WARNING_C4275 __pragma(warning(disable: 4275))
|
||||
#define RESTORE_MSVC_WARNING_C4275 __pragma(warning(disable: 4275))
|
||||
|
||||
#else
|
||||
#define DISABLE_MSVC_WARNING_C4251
|
||||
#define RESTORE_MSVC_WARNING_C4251
|
||||
#define DISABLE_MSVC_WARNING_C4275
|
||||
#define RESTORE_MSVC_WARNING_C4275
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -27,12 +27,6 @@
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "isaac/common/expression_type.h"
|
||||
#include "isaac/common/numeric_type.h"
|
||||
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/types.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -40,93 +34,78 @@ namespace driver
|
||||
{
|
||||
|
||||
class Buffer;
|
||||
class CommandQueue;
|
||||
class Stream;
|
||||
class Context;
|
||||
class Platform;
|
||||
class Program;
|
||||
class Module;
|
||||
class Kernel;
|
||||
class ProgramCache;
|
||||
|
||||
class ISAACAPI backend
|
||||
struct backend
|
||||
{
|
||||
public:
|
||||
class ISAACAPI workspaces
|
||||
|
||||
class modules
|
||||
{
|
||||
friend class backend;
|
||||
public:
|
||||
static const size_t SIZE = 8000000; //8MB of temporary workspace per queue
|
||||
static void release();
|
||||
static driver::Buffer & get(CommandQueue const & key);
|
||||
static void release();
|
||||
static Module& get(Stream const & stream, std::string const & name, std::string const &src);
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
static std::map<CommandQueue, Buffer * > cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
static std::map<std::tuple<Stream, std::string>, Module * > cache_;
|
||||
};
|
||||
|
||||
class ISAACAPI programs
|
||||
class kernels
|
||||
{
|
||||
friend class backend;
|
||||
friend class backend;
|
||||
public:
|
||||
static void release();
|
||||
static ProgramCache & get(CommandQueue const & queue, expression_type expression, numeric_type dtype);
|
||||
static void release();
|
||||
static Kernel & get(Module const & program, std::string const & name);
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
static std::map<std::tuple<CommandQueue, expression_type, numeric_type>, ProgramCache * > cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
static std::map<std::tuple<Module, std::string>, Kernel * > cache_;
|
||||
};
|
||||
|
||||
class ISAACAPI kernels
|
||||
class contexts
|
||||
{
|
||||
friend class backend;
|
||||
public:
|
||||
static void release();
|
||||
static Kernel & get(Program const & program, std::string const & name);
|
||||
friend class backend;
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
static std::map<std::tuple<Program, std::string>, Kernel * > cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
static void init(std::vector<Platform> const &);
|
||||
static void release();
|
||||
public:
|
||||
static Context const & get_default();
|
||||
template<class T>
|
||||
static Context const & import(T context)
|
||||
{
|
||||
for(driver::Context const * x: cache_)
|
||||
if((T)*x==context)
|
||||
return *x;
|
||||
cache_.emplace_back(new Context(context, false));
|
||||
return *cache_.back();
|
||||
}
|
||||
static void get(std::list<Context const *> &);
|
||||
private:
|
||||
static std::list<Context const *> cache_;
|
||||
};
|
||||
|
||||
class ISAACAPI contexts
|
||||
class streams
|
||||
{
|
||||
friend class backend;
|
||||
friend class backend;
|
||||
private:
|
||||
static void init(std::vector<Platform> const &);
|
||||
static void release();
|
||||
static void init(std::list<Context const *> const &);
|
||||
static void release();
|
||||
public:
|
||||
static Context const & get_default();
|
||||
static Context const & import(CUcontext context);
|
||||
static Context const & import(cl_context context);
|
||||
static void get(std::list<Context const *> &);
|
||||
static void get(Context const &, std::vector<Stream *> &streams);
|
||||
static Stream & get(Context const &, unsigned int id = 0);
|
||||
static Stream & get_default();
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
static std::list<Context const *> cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
|
||||
class ISAACAPI queues
|
||||
{
|
||||
friend class backend;
|
||||
private:
|
||||
static void init(std::list<Context const *> const &);
|
||||
static void release();
|
||||
public:
|
||||
static void get(Context const &, std::vector<CommandQueue *> &queues);
|
||||
static CommandQueue & get(Context const &, unsigned int id = 0);
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
static std::map< Context, std::vector<CommandQueue*> > cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
static std::map< Context, std::vector<Stream*> > cache_;
|
||||
};
|
||||
|
||||
static void init();
|
||||
static void release();
|
||||
|
||||
static void platforms(std::vector<Platform> &);
|
||||
static std::vector<Platform> platforms();
|
||||
static void synchronize(Context const &);
|
||||
|
||||
public:
|
||||
static unsigned int default_device;
|
||||
static cl_command_queue_properties default_queue_properties;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -23,61 +23,30 @@
|
||||
#ifndef ISAAC_DRIVER_BUFFER_H
|
||||
#define ISAAC_DRIVER_BUFFER_H
|
||||
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/context.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
#include "isaac/driver/dispatch.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class Stream;
|
||||
|
||||
// Buffer
|
||||
class ISAACAPI Buffer: public has_handle_comparators<Buffer>
|
||||
class Buffer: public Handle<CUdeviceptr>
|
||||
{
|
||||
public:
|
||||
typedef Handle<cl_mem, CUdeviceptr> handle_type;
|
||||
|
||||
private:
|
||||
friend class CommandQueue;
|
||||
friend class Kernel;
|
||||
//Wrapper to get CUDA context from Memory
|
||||
static CUcontext context(CUdeviceptr h)
|
||||
{
|
||||
CUcontext res;
|
||||
check(dispatch::cuPointerGetAttribute((void*)&res, CU_POINTER_ATTRIBUTE_CONTEXT, h));
|
||||
return res;
|
||||
}
|
||||
typedef Handle<CUdeviceptr> base_type;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
Buffer(CUdeviceptr h = 0, bool take_ownership = true);
|
||||
Buffer(cl_mem Buffer = 0, bool take_ownership = true);
|
||||
using base_type::base_type;
|
||||
Buffer(Context const & context, size_t size);
|
||||
//Accessors
|
||||
handle_type& handle();
|
||||
handle_type const & handle() const;
|
||||
Context const & context() const;
|
||||
void set_zero(Stream const & queue);
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
handle_type h_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
inline Buffer make_buffer(backend_type backend, cl_mem clh = 0, CUdeviceptr cuh = 0, bool take_ownership = true)
|
||||
{
|
||||
if(backend==OPENCL)
|
||||
return Buffer(clh, take_ownership);
|
||||
else
|
||||
return Buffer(cuh, take_ownership);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -23,10 +23,6 @@
|
||||
#ifndef ISAAC_DRIVER_CONTEXT_H
|
||||
#define ISAAC_DRIVER_CONTEXT_H
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/device.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
|
||||
@@ -36,42 +32,25 @@ namespace isaac
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class ISAACAPI Context: public has_handle_comparators<Context>
|
||||
class Context: public Handle<CUcontext>
|
||||
{
|
||||
friend class Program;
|
||||
friend class CommandQueue;
|
||||
friend class Buffer;
|
||||
|
||||
public:
|
||||
typedef Handle<cl_context, CUcontext> handle_type;
|
||||
typedef Handle<CUcontext> base_type;
|
||||
|
||||
private:
|
||||
static std::string cache_path();
|
||||
|
||||
static CUdevice device(CUcontext)
|
||||
{
|
||||
CUdevice res;
|
||||
dispatch::cuCtxGetDevice(&res);
|
||||
return res;
|
||||
}
|
||||
static std::string get_cache_path();
|
||||
static CUdevice device(CUcontext);
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
explicit Context(CUcontext const & context, bool take_ownership = true);
|
||||
explicit Context(cl_context const & context, bool take_ownership = true);
|
||||
explicit Context(Device const & device);
|
||||
//Accessors
|
||||
backend_type backend() const;
|
||||
Device const & device() const;
|
||||
handle_type const & handle() const;
|
||||
std::string const & cache_path() const;
|
||||
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
backend_type backend_;
|
||||
Device device_;
|
||||
std::string cache_path_;
|
||||
handle_type h_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
|
||||
}
|
||||
|
114
include/isaac/driver/cublas.h
Normal file
114
include/isaac/driver/cublas.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_DRIVER_CUBLAS_H
|
||||
#define ISAAC_DRIVER_CUBLAS_H
|
||||
|
||||
#include "isaac/templates/common.hpp"
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "isaac/driver/stream.h"
|
||||
#include "isaac/driver/backend.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
|
||||
template<typename... Args> void cublasGemm_impl(half, Args... args){ driver::dispatch::cublasHgemm(args...); }
|
||||
template<typename... Args> void cublasGemm_impl(float, Args... args){ driver::dispatch::cublasSgemm_v2(args...); }
|
||||
template<typename... Args> void cublasGemm_impl(double, Args... args){ driver::dispatch::cublasDgemm_v2(args...); }
|
||||
|
||||
|
||||
template<class cuType>
|
||||
inline void cublasGemm_dispatch(Context const & ctx, Stream& queue, char AT, char BT, int32_t M, int32_t N, int32_t K, void* alpha, Buffer const & A, int32_t lda, Buffer const & B, int32_t ldb, void* beta, Buffer& C, int32_t ldc){
|
||||
auto cu_trans = [](char xt) { return (xt=='N')?CUBLAS_OP_N:CUBLAS_OP_T; };
|
||||
cublasHandle_t handle = dispatch::cublasHandle(ctx);
|
||||
dispatch::cublasSetStream_v2(handle, (CUstream)queue);
|
||||
CUdeviceptr cuA = A, cuB = B, cuC = C;
|
||||
cublasGemm_impl(cuType(), handle, cu_trans(AT), cu_trans(BT), M, N, K, (cuType*)alpha, (const cuType*)cuA, lda, (const cuType*)cuB, ldb, (cuType*)beta, (cuType*)cuC, ldc);
|
||||
}
|
||||
|
||||
inline void cublasGemm(DType dtype, Context const & ctx, Stream& queue, char AT, char BT, int32_t M, int32_t N, int32_t K, scalar alpha, Buffer const & A, int32_t lda, Buffer const & B, int32_t ldb, scalar beta, Buffer& C, int32_t ldc){
|
||||
switch(dtype){
|
||||
case HALF_TYPE: return cublasGemm_dispatch<half>(ctx, queue, AT, BT, M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc);
|
||||
case FLOAT_TYPE: return cublasGemm_dispatch<float>(ctx, queue, AT, BT, M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc);
|
||||
case DOUBLE_TYPE: return cublasGemm_dispatch<double>(ctx, queue, AT, BT, M, N, K, alpha.data(), A, lda, B, ldb, beta.data(), C, ldc);
|
||||
default: throw;
|
||||
}
|
||||
}
|
||||
|
||||
inline cudnnDataType_t cudnnDtype(DType dtype){
|
||||
switch(dtype){
|
||||
case HALF_TYPE: return CUDNN_DATA_HALF;
|
||||
case FLOAT_TYPE: return CUDNN_DATA_FLOAT;
|
||||
case DOUBLE_TYPE: return CUDNN_DATA_DOUBLE;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
inline void cudnnConv(DType dtype, Context const & ctx, Stream& queue, int32_t H, int32_t W, int32_t N, int32_t K, int32_t P, int32_t Q, int32_t C, int32_t R, int32_t S,
|
||||
int32_t pad_h, int32_t pad_w, int32_t stride_h, int32_t stride_w, scalar alpha, Buffer const & I, Buffer const & F, scalar beta, Buffer const & O){
|
||||
cudnnHandle_t handle = dispatch::cudnnHandle(ctx);
|
||||
cudnnDataType_t cutype = cudnnDtype(dtype);
|
||||
|
||||
dispatch::cudnnSetStream(handle, (CUstream)queue);
|
||||
cudnnTensorDescriptor_t tO, tI;
|
||||
cudnnFilterDescriptor_t tF;
|
||||
cudnnConvolutionDescriptor_t conv;
|
||||
cudnnConvolutionFwdAlgo_t algo;
|
||||
dispatch::cudnnCreateTensorDescriptor(&tO);
|
||||
dispatch::cudnnCreateTensorDescriptor(&tI);
|
||||
dispatch::cudnnCreateFilterDescriptor(&tF);
|
||||
|
||||
dispatch::cudnnSetTensor4dDescriptor(tO, CUDNN_TENSOR_NCHW, cutype, N, K, P, Q);
|
||||
dispatch::cudnnSetFilter4dDescriptor(tF, cutype, CUDNN_TENSOR_NCHW, K, C, R, S);
|
||||
dispatch::cudnnSetTensor4dDescriptor(tI, CUDNN_TENSOR_NCHW, cutype, N, C, H, W);
|
||||
|
||||
dispatch::cudnnCreateConvolutionDescriptor(&conv);
|
||||
int pad[] = {pad_h, pad_w};
|
||||
int stride[] = {stride_h, stride_w};
|
||||
int upscale[] = {1, 1};
|
||||
dispatch::cudnnSetConvolutionNdDescriptor(conv, 2, pad, stride, upscale, CUDNN_CROSS_CORRELATION, cutype);
|
||||
// dispatch::cudnnSetConvolution2dDescriptor(conv, pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION);
|
||||
|
||||
// dispatch::cudnnGetConvolutionForwardAlgorithm(handle, tI, tF, conv, tO, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, 1024*1024, &algo);
|
||||
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
||||
size_t workspace_size;
|
||||
dispatch::cudnnGetConvolutionForwardWorkspaceSize(handle, tI, tF, conv, tO, algo, &workspace_size);
|
||||
Buffer work(ctx, std::max((size_t)1,workspace_size));
|
||||
CUdeviceptr twork = work;
|
||||
CUdeviceptr pI = I, pF = F, pO = O;
|
||||
dispatch::cudnnConvolutionForward(handle, alpha.data(), tI, (void*)pI, tF, (void*)pF, conv, algo, (void*)twork, workspace_size, beta.data(), tO, (void*)pO);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
@@ -23,8 +23,6 @@
|
||||
#ifndef ISAAC_DRIVER_DEVICE_H
|
||||
#define ISAAC_DRIVER_DEVICE_H
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/platform.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
|
||||
@@ -35,60 +33,26 @@ namespace driver
|
||||
{
|
||||
|
||||
// Device
|
||||
class ISAACAPI Device: public has_handle_comparators<Device>
|
||||
class Device: public Handle<CUdevice>
|
||||
{
|
||||
private:
|
||||
friend class Context;
|
||||
friend class CommandQueue;
|
||||
|
||||
public:
|
||||
typedef Handle<cl_device_id, CUdevice> handle_type;
|
||||
typedef Handle<CUdevice> base_type;
|
||||
|
||||
//Supported types
|
||||
enum Type
|
||||
{
|
||||
GPU = CL_DEVICE_TYPE_GPU,
|
||||
CPU = CL_DEVICE_TYPE_CPU,
|
||||
ACCELERATOR = CL_DEVICE_TYPE_ACCELERATOR,
|
||||
UNKNOWN
|
||||
};
|
||||
//Supported vendors
|
||||
enum class Vendor
|
||||
{
|
||||
AMD,
|
||||
INTEL,
|
||||
NVIDIA,
|
||||
UNKNOWN
|
||||
};
|
||||
//Supported architectures
|
||||
enum class Architecture
|
||||
{
|
||||
//Intel
|
||||
HASWELL,
|
||||
BROADWELL,
|
||||
SKYLAKE,
|
||||
KABYLAKE,
|
||||
//NVidia
|
||||
SM_2_0,
|
||||
SM_2_1,
|
||||
SM_3_0,
|
||||
SM_3_5,
|
||||
SM_3_7,
|
||||
SM_5_0,
|
||||
SM_5_2,
|
||||
SM_6_0,
|
||||
SM_6_1,
|
||||
|
||||
//NVidia
|
||||
SM_2_0,
|
||||
SM_2_1,
|
||||
SM_3_0,
|
||||
SM_3_5,
|
||||
SM_3_7,
|
||||
SM_5_0,
|
||||
SM_5_2,
|
||||
SM_6_0,
|
||||
SM_6_1,
|
||||
|
||||
//AMD
|
||||
TERASCALE_2,
|
||||
TERASCALE_3,
|
||||
GCN_1,
|
||||
GCN_2,
|
||||
GCN_3,
|
||||
GCN_4,
|
||||
|
||||
UNKNOWN
|
||||
UNKNOWN
|
||||
};
|
||||
|
||||
private:
|
||||
@@ -96,34 +60,32 @@ private:
|
||||
template<CUdevice_attribute attr>
|
||||
int cuGetInfo() const;
|
||||
|
||||
inline Architecture nv_arch(std::pair<unsigned int, unsigned int> sm) const;
|
||||
inline nvmlDevice_t nvml_device() const;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
explicit Device(CUdevice const & device, bool take_ownership = true);
|
||||
explicit Device(cl_device_id const & device, bool take_ownership = true);
|
||||
using base_type::base_type;
|
||||
//Accessors
|
||||
handle_type const & handle() const;
|
||||
Vendor vendor() const;
|
||||
Architecture architecture() const;
|
||||
backend_type backend() const;
|
||||
//Informations
|
||||
std::string infos() const;
|
||||
size_t clock_rate() const;
|
||||
unsigned int address_bits() const;
|
||||
size_t address_bits() const;
|
||||
driver::Platform platform() const;
|
||||
std::vector<size_t> max_block_dim() const;
|
||||
size_t max_threads_per_block() const;
|
||||
size_t max_shared_memory() const;
|
||||
size_t warp_size() const;
|
||||
std::pair<size_t, size_t> compute_capability() const;
|
||||
//Identifier
|
||||
std::string name() const;
|
||||
std::string vendor_str() const;
|
||||
std::vector<size_t> max_work_item_sizes() const;
|
||||
Type type() const;
|
||||
std::string extensions() const;
|
||||
size_t max_work_group_size() const;
|
||||
size_t local_mem_size() const;
|
||||
size_t warp_wavefront_size() const;
|
||||
bool fp64_support() const;
|
||||
std::pair<unsigned int, unsigned int> nv_compute_capability() const;
|
||||
std::string pci_bus_id() const;
|
||||
//Clocks
|
||||
size_t current_sm_clock() const;
|
||||
size_t current_mem_clock() const;
|
||||
|
||||
size_t max_sm_clock() const;
|
||||
size_t max_mem_clock() const;
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
handle_type h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -26,15 +26,14 @@
|
||||
#include <type_traits>
|
||||
#include <dlfcn.h>
|
||||
|
||||
//OpenCL Backend
|
||||
#include "isaac/driver/external/CL/cl.h"
|
||||
#include "isaac/driver/external/CL/cl_ext.h"
|
||||
//CUDA Backend
|
||||
#include "isaac/driver/external/CUDA/cuda.h"
|
||||
#include "isaac/driver/external/CUDA/nvrtc.h"
|
||||
#include "isaac/driver/external/CUDA/cublas.h"
|
||||
#include "isaac/driver/external/CUDA/cudnn.h"
|
||||
#include "isaac/driver/external/CUDA/nvml.h"
|
||||
|
||||
//Exceptions
|
||||
#include "isaac/driver/common.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace isaac
|
||||
@@ -48,211 +47,189 @@ template<class T> void check(T){}
|
||||
void check(nvrtcResult err);
|
||||
void check(CUresult err);
|
||||
void check(cublasStatus_t err);
|
||||
void check(cl_int err);
|
||||
void check(cudnnStatus_t err);
|
||||
void check_destruction(CUresult);
|
||||
|
||||
class dispatch
|
||||
{
|
||||
private:
|
||||
template <class F>
|
||||
struct return_type;
|
||||
template <class F>
|
||||
struct return_type;
|
||||
|
||||
template <class R, class... A>
|
||||
struct return_type<R (*)(A...)>
|
||||
{ typedef R type; };
|
||||
template <class R, class... A>
|
||||
struct return_type<R (*)(A...)>
|
||||
{ typedef R type; };
|
||||
|
||||
typedef bool (*f_init_t)();
|
||||
typedef bool (*f_init_t)();
|
||||
|
||||
template<f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
|
||||
{
|
||||
initializer();
|
||||
if(cache == nullptr)
|
||||
cache = dlsym(lib_h, name);
|
||||
FunPtrT fptr;
|
||||
*reinterpret_cast<void **>(&fptr) = cache;
|
||||
typename return_type<FunPtrT>::type res = (*fptr)(args...);
|
||||
check(res);
|
||||
return res;
|
||||
}
|
||||
template<f_init_t initializer, typename FunPtrT, typename... Args>
|
||||
static typename return_type<FunPtrT>::type f_impl(void*& lib_h, FunPtrT, void*& cache, const char * name, Args... args)
|
||||
{
|
||||
initializer();
|
||||
if(cache == nullptr)
|
||||
cache = dlsym(lib_h, name);
|
||||
FunPtrT fptr;
|
||||
*reinterpret_cast<void **>(&fptr) = cache;
|
||||
typename return_type<FunPtrT>::type res = (*fptr)(args...);
|
||||
check(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
public:
|
||||
static bool clinit();
|
||||
static bool cublasinit();
|
||||
static bool nvrtcinit();
|
||||
static bool cuinit();
|
||||
static bool nvrtcinit();
|
||||
static bool nvmlinit();
|
||||
static bool cuinit();
|
||||
static bool cublasinit();
|
||||
static bool cudnninit();
|
||||
|
||||
static void release();
|
||||
static void release();
|
||||
|
||||
//OpenCL
|
||||
static cl_int clBuildProgram(cl_program, cl_uint, const cl_device_id *, const char *, void (*)(cl_program, void *), void *);
|
||||
static cl_int clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clSetKernelArg(cl_kernel, cl_uint, size_t, const void *);
|
||||
static cl_int clReleaseMemObject(cl_mem);
|
||||
static cl_int clFinish(cl_command_queue);
|
||||
static cl_int clGetMemObjectInfo(cl_mem, cl_mem_info, size_t, void *, size_t *);
|
||||
static cl_int clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseContext(cl_context);
|
||||
static cl_int clReleaseEvent(cl_event);
|
||||
static cl_int clEnqueueWriteBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseDevice(cl_device_id);
|
||||
static cl_context clCreateContext(const cl_context_properties *, cl_uint, const cl_device_id *, void (*)(const char *, const void *, size_t, void *), void *, cl_int *);
|
||||
static cl_int clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
|
||||
static cl_int clGetContextInfo(cl_context, cl_context_info, size_t, void *, size_t *);
|
||||
static cl_int clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void *, size_t *);
|
||||
static cl_int clReleaseCommandQueue(cl_command_queue);
|
||||
static cl_int clGetPlatformIDs(cl_uint, cl_platform_id *, cl_uint *);
|
||||
static cl_int clGetPlatformInfo(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
|
||||
static cl_int clGetEventProfilingInfo(cl_event, cl_profiling_info, size_t, void *, size_t *);
|
||||
static cl_program clCreateProgramWithBinary(cl_context, cl_uint, const cl_device_id *, const size_t *, const unsigned char **, cl_int *, cl_int *);
|
||||
static cl_command_queue clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
|
||||
static cl_int clRetainEvent(cl_event);
|
||||
static cl_int clReleaseProgram(cl_program);
|
||||
static cl_int clFlush(cl_command_queue);
|
||||
static cl_int clGetProgramInfo(cl_program, cl_program_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void *, size_t *);
|
||||
static cl_int clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
|
||||
static cl_kernel clCreateKernel(cl_program, const char *, cl_int *);
|
||||
static cl_mem clCreateBuffer(cl_context, cl_mem_flags, size_t, void *, cl_int *);
|
||||
static cl_mem clCreateImage(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *, cl_int *);
|
||||
static cl_program clCreateProgramWithSource(cl_context, cl_uint, const char **, const size_t *, cl_int *);
|
||||
static cl_int clReleaseKernel(cl_kernel);
|
||||
static cl_int clEnqueueCopyBufferToImage(cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
|
||||
static cl_int clSetEventCallback(cl_event, cl_int, void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void *);
|
||||
//CUDA
|
||||
static CUresult cuCtxGetCurrent(CUcontext *pctx);
|
||||
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||||
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
|
||||
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||
static CUresult cuDeviceGetPCIBusId(char *id, int len, CUdevice dev);
|
||||
|
||||
//CUDA
|
||||
static CUresult cuCtxDestroy_v2(CUcontext ctx);
|
||||
static CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
|
||||
static CUresult cuDeviceGet(CUdevice *device, int ordinal);
|
||||
static CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||||
static CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
|
||||
static CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
|
||||
static CUresult cuMemFree_v2(CUdeviceptr dptr);
|
||||
static CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuDriverGetVersion(int *driverVersion);
|
||||
static CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||
static CUresult cuModuleUnload(CUmodule hmod);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
static CUresult cuDeviceGetCount(int *count);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||
static CUresult cuInit(unsigned int Flags);
|
||||
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||||
static CUresult cuCtxGetDevice(CUdevice* result);
|
||||
static CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
|
||||
static CUresult cuModuleLoad(CUmodule *module, const char *fname);
|
||||
static CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||
static CUresult cuModuleUnload(CUmodule hmod);
|
||||
static CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
static CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
static CUresult cuDeviceGetCount(int *count);
|
||||
static CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||
static CUresult cuInit(unsigned int Flags);
|
||||
static CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
|
||||
static CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||
static CUresult cuCtxPushCurrent_v2(CUcontext ctx);
|
||||
static CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
|
||||
static CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||
static CUresult cuStreamSynchronize(CUstream hStream);
|
||||
static CUresult cuStreamDestroy_v2(CUstream hStream);
|
||||
static CUresult cuEventDestroy_v2(CUevent hEvent);
|
||||
static CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||
static CUresult cuPointerGetAttribute(void * data, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||||
static CUresult cuCtxGetDevice(CUdevice* result);
|
||||
static CUresult cuMemsetD8Async(CUdeviceptr dst, unsigned char x, size_t N, CUstream stream);
|
||||
|
||||
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
|
||||
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
|
||||
static cublasHandle_t cublasHandle(Context const & ctx);
|
||||
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
|
||||
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
|
||||
static cublasStatus_t cublasSgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||
static cublasStatus_t cublasDgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||
static nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char **options);
|
||||
static nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet);
|
||||
static nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx);
|
||||
static nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
static nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char **headers, const char **includeNames);
|
||||
static nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log);
|
||||
|
||||
static cublasHandle_t cublasHandle(Context const & ctx);
|
||||
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||
static cublasStatus_t cublasGetStream_v2(cublasHandle_t h, cudaStream_t *streamId);
|
||||
static cublasStatus_t cublasSetStream_v2(cublasHandle_t h, cudaStream_t streamId);
|
||||
static cublasStatus_t cublasSgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, float* alpha, const float *A, int lda, const float *B, int ldb, float* beta, float *C, int ldc);
|
||||
static cublasStatus_t cublasDgemm_v2 (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, double* alpha, const double *A, int lda, const double *B, int ldb, double* beta, double *C, int ldc);
|
||||
static cublasStatus_t cublasHgemm (cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, half* alpha, const half *A, int lda, const half *B, int ldb, half* beta, half *C, int ldc);
|
||||
|
||||
|
||||
static cudnnHandle_t cudnnHandle(Context const & ctx);
|
||||
static cudnnStatus_t cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
|
||||
static cudnnStatus_t cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
||||
static cudnnStatus_t cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
||||
static cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
|
||||
static cudnnStatus_t cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
|
||||
static cudnnStatus_t cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int k, int c, int h, int w);
|
||||
static cudnnStatus_t cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, int pad_h, int pad_w, int u, int v, int upscalex, int upscaley, cudnnConvolutionMode_t mode);
|
||||
static cudnnStatus_t cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, int arrayLength, const int padA[], const int filterStrideA[], const int upscaleA[], cudnnConvolutionMode_t mode, cudnnDataType_t dataType);
|
||||
|
||||
static cudnnStatus_t cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes, cudnnConvolutionFwdAlgo_t *algo);
|
||||
static cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t *sizeInBytes);
|
||||
static cudnnStatus_t cudnnConvolutionForward(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const cudnnFilterDescriptor_t wDesc, const void *w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void *workSpace, size_t workSpaceSizeInBytes, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y);
|
||||
static cudnnStatus_t cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
||||
|
||||
private:
|
||||
static void* opencl_;
|
||||
static void* cuda_;
|
||||
static void* nvrtc_;
|
||||
static void* cublas_;
|
||||
static void* cuda_;
|
||||
static void* nvrtc_;
|
||||
static void* nvml_;
|
||||
static void* cublas_;
|
||||
static void* cudnn_;
|
||||
|
||||
//CUDA
|
||||
static void* cuCtxGetCurrent_;
|
||||
static void* cuCtxDestroy_v2_;
|
||||
static void* cuEventCreate_;
|
||||
static void* cuDeviceGet_;
|
||||
static void* cuMemcpyDtoH_v2_;
|
||||
static void* cuStreamCreate_;
|
||||
static void* cuEventElapsedTime_;
|
||||
static void* cuMemFree_v2_;
|
||||
static void* cuMemcpyDtoHAsync_v2_;
|
||||
static void* cuDriverGetVersion_;
|
||||
static void* cuDeviceGetName_;
|
||||
static void* cuDeviceGetPCIBusId_;
|
||||
|
||||
//OpenCL
|
||||
static void* clBuildProgram_;
|
||||
static void* clEnqueueNDRangeKernel_;
|
||||
static void* clSetKernelArg_;
|
||||
static void* clReleaseMemObject_;
|
||||
static void* clFinish_;
|
||||
static void* clGetMemObjectInfo_;
|
||||
static void* clGetCommandQueueInfo_;
|
||||
static void* clReleaseContext_;
|
||||
static void* clReleaseEvent_;
|
||||
static void* clEnqueueWriteBuffer_;
|
||||
static void* clEnqueueReadBuffer_;
|
||||
static void* clGetProgramBuildInfo_;
|
||||
static void* clReleaseDevice_;
|
||||
static void* clCreateContext_;
|
||||
static void* clGetDeviceIDs_;
|
||||
static void* clGetContextInfo_;
|
||||
static void* clGetDeviceInfo_;
|
||||
static void* clReleaseCommandQueue_;
|
||||
static void* clGetPlatformIDs_;
|
||||
static void* clGetPlatformInfo_;
|
||||
static void* clGetEventProfilingInfo_;
|
||||
static void* clCreateProgramWithBinary_;
|
||||
static void* clCreateCommandQueue_;
|
||||
static void* clRetainEvent_;
|
||||
static void* clReleaseProgram_;
|
||||
static void* clFlush_;
|
||||
static void* clGetProgramInfo_;
|
||||
static void* clGetKernelInfo_;
|
||||
static void* clGetKernelWorkGroupInfo_;
|
||||
static void* clCreateKernel_;
|
||||
static void* clCreateBuffer_;
|
||||
static void* clCreateImage_;
|
||||
static void* clCreateProgramWithSource_;
|
||||
static void* clReleaseKernel_;
|
||||
static void* clEnqueueCopyBufferToImage_;
|
||||
static void* clSetEventCallback_;
|
||||
static void* cuMemcpyHtoDAsync_v2_;
|
||||
static void* cuModuleLoad_;
|
||||
static void* cuLaunchKernel_;
|
||||
static void* cuModuleUnload_;
|
||||
static void* cuModuleLoadDataEx_;
|
||||
static void* cuDeviceGetAttribute_;
|
||||
static void* cuDeviceGetCount_;
|
||||
static void* cuMemcpyHtoD_v2_;
|
||||
static void* cuInit_;
|
||||
static void* cuEventRecord_;
|
||||
static void* cuCtxCreate_v2_;
|
||||
static void* cuModuleGetFunction_;
|
||||
static void* cuStreamSynchronize_;
|
||||
static void* cuStreamDestroy_v2_;
|
||||
static void* cuEventDestroy_v2_;
|
||||
static void* cuMemAlloc_v2_;
|
||||
static void* cuPointerGetAttribute_;
|
||||
static void* cuCtxGetDevice_;
|
||||
static void* cuMemsetD8Async_;
|
||||
static void* cuCtxPushCurrent_v2_;
|
||||
static void* cuCtxPopCurrent_v2_;
|
||||
|
||||
//CUDA
|
||||
static void* cuCtxDestroy_v2_;
|
||||
static void* cuEventCreate_;
|
||||
static void* cuDeviceGet_;
|
||||
static void* cuMemcpyDtoH_v2_;
|
||||
static void* cuStreamCreate_;
|
||||
static void* cuEventElapsedTime_;
|
||||
static void* cuMemFree_v2_;
|
||||
static void* cuMemcpyDtoHAsync_v2_;
|
||||
static void* cuDriverGetVersion_;
|
||||
static void* cuDeviceGetName_;
|
||||
static void* cuMemcpyHtoDAsync_v2_;
|
||||
static void* cuModuleLoad_;
|
||||
static void* cuLaunchKernel_;
|
||||
static void* cuModuleUnload_;
|
||||
static void* cuModuleLoadDataEx_;
|
||||
static void* cuDeviceGetAttribute_;
|
||||
static void* cuDeviceGetCount_;
|
||||
static void* cuMemcpyHtoD_v2_;
|
||||
static void* cuInit_;
|
||||
static void* cuEventRecord_;
|
||||
static void* cuCtxCreate_v2_;
|
||||
static void* cuModuleGetFunction_;
|
||||
static void* cuStreamSynchronize_;
|
||||
static void* cuStreamDestroy_v2_;
|
||||
static void* cuEventDestroy_v2_;
|
||||
static void* cuMemAlloc_v2_;
|
||||
static void* cuPointerGetAttribute_;
|
||||
static void* cuCtxGetDevice_;
|
||||
static void* nvmlInit_v2_;
|
||||
static void* nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void* nvmlDeviceGetClockInfo_;
|
||||
static void* nvmlDeviceGetMaxClockInfo_;
|
||||
|
||||
static void* nvrtcCompileProgram_;
|
||||
static void* nvrtcGetProgramLogSize_;
|
||||
static void* nvrtcGetPTX_;
|
||||
static void* nvrtcGetPTXSize_;
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
static void* nvrtcCompileProgram_;
|
||||
static void* nvrtcGetProgramLogSize_;
|
||||
static void* nvrtcGetPTX_;
|
||||
static void* nvrtcGetPTXSize_;
|
||||
static void* nvrtcCreateProgram_;
|
||||
static void* nvrtcGetProgramLog_;
|
||||
|
||||
static void* cublasCreate_v2_;
|
||||
static void* cublasGetStream_v2_;
|
||||
static void* cublasSetStream_v2_;
|
||||
static void* cublasHgemm_;
|
||||
static void* cublasSgemm_v2_;
|
||||
static void* cublasDgemm_v2_;
|
||||
|
||||
static void* cudnnCreateConvolutionDescriptor_;
|
||||
static void* cudnnCreateTensorDescriptor_;
|
||||
static void* cudnnCreateFilterDescriptor_;
|
||||
static void* cudnnCreate_;
|
||||
static void* cudnnSetTensor4dDescriptor_;
|
||||
static void* cudnnSetFilter4dDescriptor_;
|
||||
static void* cudnnSetConvolution2dDescriptor_;
|
||||
static void* cudnnSetConvolutionNdDescriptor_;
|
||||
static void* cudnnGetConvolutionForwardAlgorithm_;
|
||||
static void* cudnnGetConvolutionForwardWorkspaceSize_;
|
||||
static void* cudnnConvolutionForward_;
|
||||
static void* cudnnSetStream_;
|
||||
|
||||
static void* cublasCreate_v2_;
|
||||
static void* cublasGetStream_v2_;
|
||||
static void* cublasSetStream_v2_;
|
||||
static void* cublasSgemm_v2_;
|
||||
static void* cublasDgemm_v2_;
|
||||
};
|
||||
|
||||
}
|
||||
|
224
include/isaac/driver/error.h
Normal file
224
include/isaac/driver/error.h
Normal file
@@ -0,0 +1,224 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_EXCEPTION_DRIVER_H
|
||||
#define ISAAC_EXCEPTION_DRIVER_H
|
||||
|
||||
#include <exception>
|
||||
#include "isaac/driver/dispatch.h"
|
||||
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
namespace exception
|
||||
{
|
||||
|
||||
namespace nvrtc
|
||||
{
|
||||
|
||||
#define ISAAC_CREATE_NVRTC_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "NVRTC: Error- " msg; } }
|
||||
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
|
||||
|
||||
#undef ISAAC_CREATE_NVRTC_EXCEPTION
|
||||
}
|
||||
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
class base: public std::exception{};
|
||||
|
||||
#define ISAAC_CREATE_CUDA_EXCEPTION(name, msg) class name: public base { public:const char * what() const throw(){ return "CUDA: Error- " msg; } }
|
||||
|
||||
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(no_device ,"no device");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_found ,"not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
|
||||
|
||||
#undef ISAAC_CREATE_CUDA_EXCEPTION
|
||||
}
|
||||
|
||||
namespace cublas
|
||||
{
|
||||
|
||||
#define ISAAC_CREATE_CUBLAS_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "CUBLAS: Error- " msg; } }
|
||||
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
|
||||
|
||||
#undef ISAAC_CREATE_CUBLAS_EXCEPTION
|
||||
}
|
||||
|
||||
namespace cudnn
|
||||
{
|
||||
#define ISAAC_CREATE_CUDNN_EXCEPTION(name, msg) class name: public std::exception { public: const char * what() const throw(){ return "CUDNN: Error- " msg; } }
|
||||
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(not_initialized ,"not initialized");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(alloc_failed ,"allocation failed");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(bad_param ,"bad param");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(internal_error ,"internal error");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(invalid_value ,"invalid value");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(mapping_error ,"mapping error");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(execution_failed ,"execution failed");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(not_supported ,"not supported");
|
||||
ISAAC_CREATE_CUDNN_EXCEPTION(license_error ,"license error");
|
||||
}
|
||||
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
class base: public std::exception{};
|
||||
|
||||
#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } }
|
||||
|
||||
|
||||
ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found");
|
||||
ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources");
|
||||
ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory");
|
||||
ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap");
|
||||
ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch");
|
||||
ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported");
|
||||
ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size");
|
||||
#ifdef CL_INVALID_PROPERTY
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -23,8 +23,6 @@
|
||||
#ifndef ISAAC_DRIVER_EVENT_H
|
||||
#define ISAAC_DRIVER_EVENT_H
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
|
||||
namespace isaac
|
||||
@@ -34,26 +32,14 @@ namespace driver
|
||||
{
|
||||
|
||||
// Event
|
||||
class ISAACAPI Event: public has_handle_comparators<Event>
|
||||
class Event: public Handle<cu_event_t>
|
||||
{
|
||||
private:
|
||||
friend class CommandQueue;
|
||||
typedef Handle<cu_event_t> base_type;
|
||||
|
||||
public:
|
||||
typedef Handle<cl_event, cu_event_t> handle_type;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
Event(cl_event const & event, bool take_ownership = true);
|
||||
Event(backend_type backend);
|
||||
//Accessors
|
||||
handle_type const & handle() const;
|
||||
//Profiling
|
||||
long elapsed_time() const;
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
handle_type h_;
|
||||
using base_type::base_type;
|
||||
float elapsed_time() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
1211
include/isaac/driver/external/CL/cl.h
vendored
1211
include/isaac/driver/external/CL/cl.h
vendored
File diff suppressed because it is too large
Load Diff
346
include/isaac/driver/external/CL/cl_ext.h
vendored
346
include/isaac/driver/external/CL/cl_ext.h
vendored
@@ -1,346 +0,0 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "isaac/driver/external/CL/cl_ext.h"
|
||||
|
||||
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logelementwise_2d
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
|
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
|
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
|
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
|
||||
* for 2D images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the width,
|
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
|
||||
*
|
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
|
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
|
||||
*/
|
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2010
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_memory_flags *
|
||||
*********************************/
|
||||
#define cl_amd_device_memory_flags 1
|
||||
|
||||
#define CL_MEM_USE_PERSISTENT_MEM_AMD (1 << 6) // Alloc from GPU's CPU visible heap
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct { cl_uint type; cl_uint data[5]; } raw;
|
||||
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
|
||||
} cl_device_topology_amd;
|
||||
|
||||
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
|
||||
|
||||
|
||||
/**************************
|
||||
* cl_amd_offline_devices *
|
||||
**************************/
|
||||
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension *
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
/* cl_ext_atomic_counters_32 and cl_ext_atomic_counters_64 extensions
|
||||
* no extension #define since they have no functions
|
||||
*/
|
||||
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
// Type of external memory allocation.
|
||||
// Legal values will be defined in layered extensions.
|
||||
cl_uint allocation_type;
|
||||
|
||||
// Host cache policy for this external memory allocation.
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
// Type of external memory allocation.
|
||||
// Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations.
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
// ION file descriptor
|
||||
int ion_filedesc;
|
||||
|
||||
// Host pointer to the ION allocated memory
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
#endif /* CL_VERSION_1_1 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
1255
include/isaac/driver/external/CL/cl_platform.h
vendored
1255
include/isaac/driver/external/CL/cl_platform.h
vendored
File diff suppressed because it is too large
Load Diff
2
include/isaac/driver/external/CUDA/cublas.h
vendored
2
include/isaac/driver/external/CUDA/cublas.h
vendored
@@ -57,7 +57,7 @@
|
||||
#if !defined(CUBLAS_H_)
|
||||
#define CUBLAS_H_
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
#ifndef CUBLASWINAPI
|
||||
#ifdef _WIN32
|
||||
|
875
include/isaac/driver/external/CUDA/cuda.h
vendored
875
include/isaac/driver/external/CUDA/cuda.h
vendored
File diff suppressed because it is too large
Load Diff
1651
include/isaac/driver/external/CUDA/cudnn.h
vendored
Normal file
1651
include/isaac/driver/external/CUDA/cudnn.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4406
include/isaac/driver/external/CUDA/nvml.h
vendored
Normal file
4406
include/isaac/driver/external/CUDA/nvml.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -24,10 +24,11 @@
|
||||
#define ISAAC_DRIVER_HANDLE_H
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include <iostream>
|
||||
#include <functional>
|
||||
#include <type_traits>
|
||||
#include "isaac/driver/dispatch.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
@@ -35,68 +36,59 @@ namespace driver
|
||||
{
|
||||
|
||||
struct cu_event_t{
|
||||
operator bool() const { return first && second; }
|
||||
CUevent first;
|
||||
CUevent second;
|
||||
operator bool() const { return first && second; }
|
||||
CUevent first;
|
||||
CUevent second;
|
||||
};
|
||||
|
||||
template<class CLType, class CUType>
|
||||
class ISAACAPI Handle
|
||||
{
|
||||
struct cu_platform{
|
||||
cu_platform() : status_(dispatch::cuInit(0)) {}
|
||||
operator bool() const { return status_; }
|
||||
private:
|
||||
static void _delete(CUcontext x);
|
||||
static void _delete(CUdeviceptr x);
|
||||
static void _delete(CUstream x);
|
||||
static void _delete(CUdevice);
|
||||
static void _delete(CUevent x);
|
||||
static void _delete(CUfunction);
|
||||
static void _delete(CUmodule x);
|
||||
static void _delete(cu_event_t x);
|
||||
CUresult status_;
|
||||
};
|
||||
|
||||
static void release(cl_context x);
|
||||
static void release(cl_mem x);
|
||||
static void release(cl_command_queue x);
|
||||
static void release(cl_device_id x);
|
||||
static void release(cl_event x);
|
||||
static void release(cl_kernel x);
|
||||
static void release(cl_program x);
|
||||
template<typename T> struct remove_class { };
|
||||
template<typename C, typename R, typename... A>
|
||||
struct remove_class<R(C::*)(A...)> { using type = R(A...); };
|
||||
template<typename C, typename R, typename... A>
|
||||
struct remove_class<R(C::*)(A...) const> { using type = R(A...); };
|
||||
template<typename C, typename R, typename... A>
|
||||
struct remove_class<R(C::*)(A...) volatile> { using type = R(A...); };
|
||||
template<typename C, typename R, typename... A>
|
||||
struct remove_class<R(C::*)(A...) const volatile> { using type = R(A...); };
|
||||
|
||||
template<typename T>
|
||||
struct get_signature_impl { using type = typename remove_class<
|
||||
decltype(&std::remove_reference<T>::type::operator())>::type; };
|
||||
template<typename R, typename... A>
|
||||
struct get_signature_impl<R(A...)> { using type = R(A...); };
|
||||
template<typename R, typename... A>
|
||||
struct get_signature_impl<R(&)(A...)> { using type = R(A...); };
|
||||
template<typename R, typename... A>
|
||||
struct get_signature_impl<R(*)(A...)> { using type = R(A...); };
|
||||
template<typename T> using get_signature = typename get_signature_impl<T>::type;
|
||||
|
||||
template<class CUType>
|
||||
class Handle
|
||||
{
|
||||
public:
|
||||
//Constructors
|
||||
Handle(backend_type backend, bool take_ownership = true);
|
||||
Handle(CUType cu, bool take_ownership = true);
|
||||
Handle(bool take_ownership = true);
|
||||
~Handle();
|
||||
//Comparison
|
||||
bool operator==(Handle const & other) const;
|
||||
bool operator!=(Handle const & other) const;
|
||||
bool operator<(Handle const & other) const;
|
||||
//Accessors
|
||||
backend_type backend() const;
|
||||
CLType & cl();
|
||||
CLType const & cl() const;
|
||||
CUType & cu();
|
||||
CUType const & cu() const;
|
||||
~Handle();
|
||||
operator CUType() const;
|
||||
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
std::shared_ptr<CLType> cl_;
|
||||
protected:
|
||||
std::shared_ptr<CUType> cu_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
bool has_ownership_;
|
||||
};
|
||||
|
||||
//Helper for automatic implementation of comparison operators
|
||||
template<class T>
|
||||
class has_handle_comparators
|
||||
{
|
||||
public:
|
||||
friend bool operator==(T const & x, T const & y) { return x.handle() == y.handle(); }
|
||||
friend bool operator!=(T const & x, T const & y) { return x.handle() != y.handle(); }
|
||||
friend bool operator<(T const & x, T const & y) { return x.handle() < y.handle(); }
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -23,11 +23,8 @@
|
||||
#ifndef ISAAC_DRIVER_KERNEL_H
|
||||
#define ISAAC_DRIVER_KERNEL_H
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/program.h"
|
||||
#include "isaac/driver/module.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
#include "isaac/value_scalar.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -40,30 +37,25 @@ namespace driver
|
||||
class Buffer;
|
||||
|
||||
// Kernel
|
||||
class ISAACAPI Kernel: public has_handle_comparators<Kernel>
|
||||
class Kernel: public Handle<CUfunction>
|
||||
{
|
||||
friend class CommandQueue;
|
||||
public:
|
||||
typedef Handle<cl_kernel, CUfunction> handle_type;
|
||||
typedef Handle<CUfunction> base_type;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
Kernel(Program const & program, const char * name);
|
||||
//Accessors
|
||||
handle_type const & handle() const;
|
||||
Kernel(Module const & program, const char * name);
|
||||
//Arguments setters
|
||||
void setArg(unsigned int index, value_scalar const & scal);
|
||||
void setArg(unsigned int index, std::size_t size, void* ptr);
|
||||
void setArg(unsigned int index, Buffer const &);
|
||||
void setSizeArg(unsigned int index, std::size_t N);
|
||||
template<class T> void setArg(unsigned int index, T value) { setArg(index, sizeof(T), (void*)&value); }
|
||||
|
||||
//Arguments getters
|
||||
void* const* cu_params() const;
|
||||
private:
|
||||
backend_type backend_;
|
||||
Module program_;
|
||||
unsigned int address_bits_;
|
||||
std::vector<std::shared_ptr<void> > cu_params_store_;
|
||||
std::vector<void*> cu_params_;
|
||||
handle_type h_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -20,28 +20,38 @@
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#ifndef ISAAC_DRIVER_MODULE_H
|
||||
#define ISAAC_DRIVER_MODULE_H
|
||||
|
||||
#include <map>
|
||||
#include "isaac/driver/handle.h"
|
||||
#include "isaac/driver/context.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace tools
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
inline void cpuid(int code, int *a, int *b, int *c, int *d) {
|
||||
__asm__ __volatile__("cpuid":"=a"(*a),"=b"(*b),
|
||||
"=c"(*c),"=d"(*d):"a"(code));
|
||||
}
|
||||
class Context;
|
||||
class Device;
|
||||
|
||||
inline std::string cpu_brand(){
|
||||
char name[48];
|
||||
int* ptr = (int*)name;
|
||||
cpuid(0x80000002, ptr, ptr+1, ptr+2, ptr+3);
|
||||
cpuid(0x80000003, ptr+4, ptr+5, ptr+6, ptr+7);
|
||||
cpuid(0x80000004, ptr+8, ptr+9, ptr+10, ptr+11);
|
||||
return std::string(name, name+48);
|
||||
}
|
||||
class Module: public Handle<CUmodule>
|
||||
{
|
||||
typedef Handle<CUmodule> base_type;
|
||||
|
||||
public:
|
||||
Module(Context const & context, std::string const & source, bool is_ir = true);
|
||||
Context const & context() const;
|
||||
|
||||
private:
|
||||
Context context_;
|
||||
std::string source_;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -26,8 +26,7 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -37,20 +36,15 @@ namespace driver
|
||||
|
||||
class Device;
|
||||
|
||||
class ISAACAPI Platform
|
||||
class Platform: public Handle<cu_platform>
|
||||
{
|
||||
typedef Handle<cu_platform> base_type;
|
||||
public:
|
||||
//Constructors
|
||||
Platform(backend_type);
|
||||
Platform(cl_platform_id const &);
|
||||
using base_type::base_type;
|
||||
//Accessors
|
||||
std::string name() const;
|
||||
std::string version() const;
|
||||
void devices(std::vector<Device> &) const;
|
||||
cl_platform_id cl_id() const;
|
||||
private:
|
||||
backend_type backend_;
|
||||
cl_platform_id cl_platform_;
|
||||
std::vector<Device> devices() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@@ -1,70 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_DRIVER_PROGRAM_H
|
||||
#define ISAAC_DRIVER_PROGRAM_H
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
#include "isaac/driver/context.h"
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class Context;
|
||||
class Device;
|
||||
|
||||
class ISAACAPI Program: public has_handle_comparators<Program>
|
||||
{
|
||||
public:
|
||||
typedef Handle<cl_program, CUmodule> handle_type;
|
||||
|
||||
private:
|
||||
friend class Kernel;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
Program(Context const & context, std::string const & source);
|
||||
//Accessors
|
||||
handle_type const & handle() const;
|
||||
Context const & context() const;
|
||||
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
std::string source_;
|
||||
handle_type h_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,59 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_DRIVER_PROGRAM_CACHE_H
|
||||
#define ISAAC_DRIVER_PROGRAM_CACHE_H
|
||||
|
||||
#include <map>
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/program.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace driver
|
||||
{
|
||||
|
||||
class ISAACAPI ProgramCache
|
||||
{
|
||||
friend class backend;
|
||||
|
||||
public:
|
||||
//Clearing the cache
|
||||
void clear();
|
||||
//Adding a program to the cache
|
||||
Program & add(Context const & context, std::string const & name, std::string const & src);
|
||||
//Finding a program in the cache
|
||||
Program const *find(std::string const & name);
|
||||
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
std::map<std::string, Program> cache_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -20,12 +20,10 @@
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_DRIVER_COMMAND_QUEUE_H
|
||||
#define ISAAC_DRIVER_COMMAND_QUEUE_H
|
||||
#ifndef ISAAC_DRIVER_STREAM_H
|
||||
#define ISAAC_DRIVER_STREAM_H
|
||||
|
||||
#include <map>
|
||||
#include "isaac/defines.h"
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/context.h"
|
||||
#include "isaac/driver/device.h"
|
||||
#include "isaac/driver/handle.h"
|
||||
@@ -38,40 +36,29 @@ namespace driver
|
||||
|
||||
class Kernel;
|
||||
class Event;
|
||||
class NDRange;
|
||||
class Range;
|
||||
class Buffer;
|
||||
|
||||
// Command Queue
|
||||
class ISAACAPI CommandQueue: public has_handle_comparators<CommandQueue>
|
||||
class Stream: public Handle<CUstream>
|
||||
{
|
||||
public:
|
||||
typedef Handle<cl_command_queue, CUstream> handle_type;
|
||||
typedef Handle<CUstream> base_type;
|
||||
|
||||
public:
|
||||
//Constructors
|
||||
CommandQueue(cl_command_queue const & queue, bool take_ownership = true);
|
||||
CommandQueue(Context const & context, Device const & device, cl_command_queue_properties properties = 0);
|
||||
using base_type::base_type;
|
||||
Stream(Context const & context);
|
||||
//Accessors
|
||||
handle_type & handle();
|
||||
handle_type const & handle() const;
|
||||
backend_type backend() const;
|
||||
Context const & context() const;
|
||||
Device const & device() const;
|
||||
//Synchronize
|
||||
void synchronize();
|
||||
//Profiling
|
||||
void enable_profiling();
|
||||
void disable_profiling();
|
||||
//Enqueue calls
|
||||
void enqueue(Kernel const & kernel, NDRange global, driver::NDRange local, std::vector<Event> const *, Event *event);
|
||||
void enqueue(Kernel const & kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<Event> const * = NULL, Event *event = NULL);
|
||||
void write(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void const* ptr);
|
||||
void read(Buffer const & buffer, bool blocking, std::size_t offset, std::size_t size, void* ptr);
|
||||
|
||||
private:
|
||||
backend_type backend_;
|
||||
Context context_;
|
||||
Device device_;
|
||||
handle_type h_;
|
||||
};
|
||||
|
||||
|
@@ -1,82 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_EXCEPTION_API_H
|
||||
#define ISAAC_EXCEPTION_API_H
|
||||
|
||||
#include <string>
|
||||
#include <exception>
|
||||
|
||||
#include "isaac/defines.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
/** @brief Exception for the case the generator is unable to deal with the operation */
|
||||
DISABLE_MSVC_WARNING_C4275
|
||||
class operation_not_supported_exception : public std::exception
|
||||
{
|
||||
public:
|
||||
operation_not_supported_exception();
|
||||
operation_not_supported_exception(std::string message);
|
||||
virtual const char* what() const throw();
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
std::string message_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
RESTORE_MSVC_WARNING_C4275
|
||||
|
||||
|
||||
/** @brief Exception for the case the generator is unable to deal with the operation */
|
||||
DISABLE_MSVC_WARNING_C4275
|
||||
class ISAACAPI unknown_datatype : public std::exception
|
||||
{
|
||||
public:
|
||||
unknown_datatype(int);
|
||||
virtual const char* what() const throw();
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
std::string message_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
RESTORE_MSVC_WARNING_C4275
|
||||
|
||||
|
||||
/** @brief Exception for the case the generator is unable to deal with the operation */
|
||||
DISABLE_MSVC_WARNING_C4275
|
||||
class ISAACAPI semantic_error : public std::exception
|
||||
{
|
||||
public:
|
||||
semantic_error(std::string const & message);
|
||||
virtual const char* what() const throw();
|
||||
private:
|
||||
DISABLE_MSVC_WARNING_C4251
|
||||
std::string message_;
|
||||
RESTORE_MSVC_WARNING_C4251
|
||||
};
|
||||
RESTORE_MSVC_WARNING_C4275
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,216 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_EXCEPTION_DRIVER_H
|
||||
#define ISAAC_EXCEPTION_DRIVER_H
|
||||
#include <exception>
|
||||
|
||||
#include "isaac/driver/dispatch.h"
|
||||
#include "isaac/defines.h"
|
||||
|
||||
DISABLE_MSVC_WARNING_C4275
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace exception
|
||||
{
|
||||
|
||||
class ISAACAPI unknown_architecture: public std::exception{
|
||||
public:
|
||||
unknown_architecture(std::string const & msg): msg_("Unrecognized architecture: " + msg){}
|
||||
const char * what() const throw(){ return msg_.c_str(); }
|
||||
private:
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
namespace nvrtc
|
||||
{
|
||||
|
||||
#define ISAAC_CREATE_NVRTC_EXCEPTION(name, msg) class ISAACAPI name: public std::exception { public: const char * what() const throw(){ return "NVRTC: Error- " msg; } }
|
||||
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(out_of_memory ,"out of memory");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(program_creation_failure ,"program creation failure");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_input ,"invalid input");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_program ,"invalid program");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(invalid_option ,"invalid option");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(compilation ,"compilation");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(builtin_operation_failure ,"builtin operation failure");
|
||||
ISAAC_CREATE_NVRTC_EXCEPTION(unknown_error ,"unknown error");
|
||||
|
||||
#undef ISAAC_CREATE_NVRTC_EXCEPTION
|
||||
}
|
||||
|
||||
|
||||
namespace cuda
|
||||
{
|
||||
class base: public std::exception{};
|
||||
|
||||
#define ISAAC_CREATE_CUDA_EXCEPTION(name, msg) class ISAACAPI name: public base { public:const char * what() const throw(){ return "CUDA: Error- " msg; } }
|
||||
|
||||
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_value ,"invalid value");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(out_of_memory ,"out of memory");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_initialized ,"not initialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(deinitialized ,"deinitialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_disabled ,"profiler disabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_not_initialized ,"profiler not initialized");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_started ,"profiler already started");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(profiler_already_stopped ,"profiler already stopped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(no_device ,"no device");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_device ,"invalid device");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_image ,"invalid image");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_context ,"invalid context");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_already_current ,"context already current");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(map_failed ,"map failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unmap_failed ,"unmap failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(array_is_mapped ,"array is mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(already_mapped ,"already mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(no_binary_for_gpu ,"no binary for gpu");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(already_acquired ,"already acquired");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped ,"not mapped");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_array ,"not mapped as array");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_mapped_as_pointer ,"not mapped as pointer");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(ecc_uncorrectable ,"ecc uncorrectable");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unsupported_limit ,"unsupported limit");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_already_in_use ,"context already in use");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_unsupported ,"peer access unsupported");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_ptx ,"invalid ptx");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_graphics_context ,"invalid graphics context");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_source ,"invalid source");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(file_not_found ,"file not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_symbol_not_found ,"shared object symbol not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(shared_object_init_failed ,"shared object init failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(operating_system ,"operating system");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_handle ,"invalid handle");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_found ,"not found");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_ready ,"not ready");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(illegal_address ,"illegal address");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_out_of_resources ,"launch out of resources");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_timeout ,"launch timeout");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_incompatible_texturing ,"launch incompatible texturing");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_already_enabled ,"peer access already enabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(peer_access_not_enabled ,"peer access not enabled");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(primary_context_active ,"primary context active");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(context_is_destroyed ,"context is destroyed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(assert_error ,"assert");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(too_many_peers ,"too many peers");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_already_registered ,"host memory already registered");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(host_memory_not_registered ,"hot memory not registered");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(hardware_stack_error ,"hardware stack error");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(illegal_instruction ,"illegal instruction");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(misaligned_address ,"misaligned address");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_address_space ,"invalid address space");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(invalid_pc ,"invalid pc");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(launch_failed ,"launch failed");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_permitted ,"not permitted");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(not_supported ,"not supported");
|
||||
ISAAC_CREATE_CUDA_EXCEPTION(unknown ,"unknown");
|
||||
|
||||
#undef ISAAC_CREATE_CUDA_EXCEPTION
|
||||
}
|
||||
|
||||
namespace cublas
|
||||
{
|
||||
|
||||
#define ISAAC_CREATE_CUBLAS_EXCEPTION(name, msg) class ISAACAPI name: public std::exception { public: const char * what() const throw(){ return "CUBLAS: Error- " msg; } }
|
||||
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(not_initialized ,"not initialized");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(alloc_failed ,"alloc failed");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(invalid_value ,"invalid value");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(arch_mismatch ,"arch mismatch");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(mapping_error ,"mapping error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(execution_failed ,"execution failed");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(internal_error ,"internal error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(not_supported ,"not supported");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(license_error ,"license error");
|
||||
ISAAC_CREATE_CUBLAS_EXCEPTION(unknown ,"unknown");
|
||||
|
||||
#undef ISAAC_CREATE_CUBLAS_EXCEPTION
|
||||
}
|
||||
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
class ISAACAPI base: public std::exception{};
|
||||
|
||||
#define ISAAC_CREATE_CL_EXCEPTION(name, msg) class ISAACAPI name: public base { public: const char * what() const throw(){ return "OpenCL: Error- " msg; } }
|
||||
|
||||
|
||||
ISAAC_CREATE_CL_EXCEPTION(device_not_found, "device not found");
|
||||
ISAAC_CREATE_CL_EXCEPTION(device_not_available, "device not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(compiler_not_available, "compiler not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(mem_object_allocation_failure, "object allocation failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(out_of_resources, "launch out of resources");
|
||||
ISAAC_CREATE_CL_EXCEPTION(out_of_host_memory, "out of host memory");
|
||||
ISAAC_CREATE_CL_EXCEPTION(profiling_info_not_available, "profiling info not available");
|
||||
ISAAC_CREATE_CL_EXCEPTION(mem_copy_overlap, "mem copy overlap");
|
||||
ISAAC_CREATE_CL_EXCEPTION(image_format_mismatch, "image format mismatch");
|
||||
ISAAC_CREATE_CL_EXCEPTION(image_format_not_supported, "image format not supported");
|
||||
ISAAC_CREATE_CL_EXCEPTION(build_program_failure, "build program failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(map_failure, "map failure");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_value, "invalid value");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device_type, "invalid device type");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_platform, "invalid platform");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_device, "invalid device");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_context, "invalid context");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_queue_properties, "invalid queue properties");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_command_queue, "invalid command queue");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_host_ptr, "invalid host pointer");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mem_object, "invalid mem object");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_format_descriptor, "invalid image format descriptor");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_image_size, "invalid image size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_sampler, "invalid sampler");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_binary, "invalid binary");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_build_options, "invalid build options");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program, "invalid program");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_program_executable, "invalid program executable");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_name, "invalid kernel name");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_definition, "invalid kernel definition");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel, "invalid kernel");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_index, "invalid arg index");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_value, "invalid arg value");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_arg_size, "invalid arg size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_kernel_args, "invalid kernel args");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_dimension, "invalid work dimension");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_group_size, "invalid work group size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_work_item_size, "invalid work item size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_offset, "invalid global offset");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event_wait_list, "invalid event wait list");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_event, "invalid event");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_operation, "invalid operation");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_gl_object, "invalid GL object");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_buffer_size, "invalid buffer size");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_mip_level, "invalid MIP level");
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_global_work_size, "invalid global work size");
|
||||
#ifdef CL_INVALID_PROPERTY
|
||||
ISAAC_CREATE_CL_EXCEPTION(invalid_property, "invalid property");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
RESTORE_MSVC_WARNING_C4275
|
||||
|
||||
#endif
|
2909
include/isaac/external/half.hpp
vendored
Normal file
2909
include/isaac/external/half.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,118 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_TEMPLATES_base_
|
||||
#define ISAAC_TEMPLATES_base_
|
||||
|
||||
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/jit/generation/engine/stream.h"
|
||||
#include "isaac/runtime/handler.h"
|
||||
#include "isaac/jit/syntax/engine/binder.h"
|
||||
#include "isaac/jit/syntax/engine/object.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace templates
|
||||
{
|
||||
|
||||
//Error codes
|
||||
static const int TEMPLATE_VALID = 0;
|
||||
static const int TEMPLATE_LOCAL_MEMORY_OVERFLOW = -1;
|
||||
static const int TEMPLATE_WORK_GROUP_SIZE_OVERFLOW = -2;
|
||||
static const int TEMPLATE_LOCAL_SIZE_0_OVERFLOW = -3;
|
||||
static const int TEMPLATE_LOCAL_SIZE_1_OVERFLOW = -4;
|
||||
static const int TEMPLATE_LOCAL_SIZE_2_OVERFLOW = -5;
|
||||
static const int TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE = -6;
|
||||
static const int TEMPLATE_INVALID_SIMD_WIDTH = -7;
|
||||
static const int TEMPLATE_ALIGNMENT_MUST_BE_BLOCK_SIZE_MULTIPLE = -8;
|
||||
static const int TEMPLATE_INVALID_FETCHING_POLICY_TYPE= -9;
|
||||
|
||||
static const int TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH = -10;
|
||||
static const int TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE = -11;
|
||||
static const int TEMPLATE_KS_MUST_BE_SMALLER_THAN_KL = -12;
|
||||
static const int TEMPLATE_SIMD_WIDTH_MUST_BE_ONE = -13;
|
||||
static const int TEMPLATE_LOCAL_FETCH_PRODUCT_MUST_MATCH_LOCAL_SIZE_PRODUCT = -14;
|
||||
static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_KL_MULTIPLE = -15;
|
||||
static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_NL_MULTIPLE = -16;
|
||||
static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_KL_MULTIPLE = -17;
|
||||
static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_ML_MULTIPLE = -18;
|
||||
static const int TEMPLATE_TEMPORARY_TOO_LARGE = -19;
|
||||
static const int TEMPLATE_BLOCK_SIZE_TOO_LARGE = -20;
|
||||
|
||||
class base: public std::enable_shared_from_this<base>
|
||||
{
|
||||
private:
|
||||
virtual std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const = 0;
|
||||
public:
|
||||
base();
|
||||
virtual ~base();
|
||||
virtual unsigned int temporary_workspace(expression_tree const &) const;
|
||||
virtual unsigned int lmem_usage(expression_tree const &) const;
|
||||
virtual unsigned int registers_usage(expression_tree const &) const;
|
||||
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const = 0;
|
||||
virtual int is_invalid(expression_tree const & expressions, driver::Device const & device) const = 0;
|
||||
virtual void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & expressions) = 0;
|
||||
virtual expression_type type() const = 0;
|
||||
std::string generate(std::string const & suffix, expression_tree const & expressions, driver::Device const & device);
|
||||
std::shared_ptr<base> getptr();
|
||||
};
|
||||
|
||||
class external_base: public base
|
||||
{
|
||||
private:
|
||||
virtual std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const;
|
||||
public:
|
||||
external_base();
|
||||
virtual unsigned int temporary_workspace(expression_tree const &) const;
|
||||
virtual unsigned int lmem_usage(expression_tree const &) const;
|
||||
virtual unsigned int registers_usage(expression_tree const &) const;
|
||||
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const = 0;
|
||||
};
|
||||
|
||||
class parameterized_base : public base
|
||||
{
|
||||
private:
|
||||
virtual int is_invalid_impl(driver::Device const &, expression_tree const &) const;
|
||||
|
||||
public:
|
||||
parameterized_base(unsigned int _vwidth, int_t _ls0, int_t _ls1);
|
||||
unsigned int ls0() const;
|
||||
unsigned int ls1() const;
|
||||
/** @brief returns whether or not the profile has undefined behavior on particular device */
|
||||
int is_invalid(expression_tree const & expressions, driver::Device const & device) const;
|
||||
protected:
|
||||
unsigned int vwidth_;
|
||||
unsigned int ls0_;
|
||||
unsigned int ls1_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,49 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_TEMPLATES_VAXPY_H
|
||||
#define ISAAC_BACKEND_TEMPLATES_VAXPY_H
|
||||
|
||||
#include "isaac/jit/generation/base.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class elementwise_1d : public parameterized_base
|
||||
{
|
||||
private:
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & symbols) const;
|
||||
public:
|
||||
elementwise_1d(unsigned int vwidth, unsigned int ls, unsigned int ng);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
expression_type type() const;
|
||||
private:
|
||||
unsigned int ng_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,52 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_TEMPLATES_MAXPY_H
|
||||
#define ISAAC_BACKEND_TEMPLATES_MAXPY_H
|
||||
|
||||
#include <vector>
|
||||
#include "isaac/jit/generation/base.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class elementwise_2d : public parameterized_base
|
||||
{
|
||||
private:
|
||||
int is_invalid_impl(driver::Device const &, expression_tree const &) const;
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const;
|
||||
public:
|
||||
elementwise_2d(unsigned int vwidth, unsigned int ls0, unsigned int ls1, unsigned int ng0, unsigned int ng1);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
expression_type type() const;
|
||||
private:
|
||||
unsigned int ng0_;
|
||||
unsigned int ng1_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,98 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_KEYWORDS_H
|
||||
#define ISAAC_BACKEND_KEYWORDS_H
|
||||
|
||||
#include "isaac/driver/common.h"
|
||||
#include "isaac/driver/device.h"
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class keyword
|
||||
{
|
||||
public:
|
||||
keyword(driver::backend_type backend, std::string const & opencl, std::string const & cuda);
|
||||
std::string const & get() const;
|
||||
private:
|
||||
driver::backend_type backend_;
|
||||
std::string opencl_;
|
||||
std::string cuda_;
|
||||
};
|
||||
|
||||
static inline std::string size_type(driver::Device const & device)
|
||||
{
|
||||
switch(device.backend())
|
||||
{
|
||||
case driver::CUDA:
|
||||
return "int";
|
||||
case driver::OPENCL:
|
||||
return "int";
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & ss, keyword const & kw);
|
||||
|
||||
#define ADD_KEYWORD(NAME, OCLKW, CUDAKW) class NAME : public keyword { public: NAME(driver::backend_type backend) : keyword(backend, OCLKW, CUDAKW){} };
|
||||
|
||||
ADD_KEYWORD(KernelPrefix, "__kernel", "extern \"C\" __global__")
|
||||
ADD_KEYWORD(Local, "__local", "__shared__")
|
||||
ADD_KEYWORD(Global, "__global", "")
|
||||
ADD_KEYWORD(LocalPtr, "__local", "")
|
||||
|
||||
ADD_KEYWORD(GlobalIdx0, "get_global_id(0)", "(blockIdx.x*blockDim.x + threadIdx.x)")
|
||||
ADD_KEYWORD(GlobalIdx1, "get_global_id(1)", "(blockIdx.y*blockDim.y + threadIdx.y)")
|
||||
ADD_KEYWORD(GlobalIdx2, "get_global_id(2)", "(blockIdx.z*blockDim.z + threadIdx.z)")
|
||||
|
||||
ADD_KEYWORD(GlobalSize0, "get_global_size(0)", "(blockDim.x*gridDim.x)")
|
||||
ADD_KEYWORD(GlobalSize1, "get_global_size(1)", "(blockDim.y*gridDim.y)")
|
||||
ADD_KEYWORD(GlobalSize2, "get_global_size(2)", "(blockDim.z*gridDim.z)")
|
||||
|
||||
ADD_KEYWORD(LocalIdx0, "get_local_id(0)", "threadIdx.x")
|
||||
ADD_KEYWORD(LocalIdx1, "get_local_id(1)", "threadIdx.y")
|
||||
ADD_KEYWORD(LocalIdx2, "get_local_id(2)", "threadIdx.z")
|
||||
|
||||
ADD_KEYWORD(LocalSize0, "get_local_size(0)", "blockDim.x")
|
||||
ADD_KEYWORD(LocalSize1, "get_local_size(1)", "blockDim.y")
|
||||
ADD_KEYWORD(LocalSize2, "get_local_size(2)", "blockDim.z")
|
||||
|
||||
ADD_KEYWORD(GroupIdx0, "get_group_id(0)", "blockIdx.x")
|
||||
ADD_KEYWORD(GroupIdx1, "get_group_id(1)", "blockIdx.y")
|
||||
ADD_KEYWORD(GroupIdx2, "get_group_id(2)", "blockIdx.z")
|
||||
|
||||
ADD_KEYWORD(GroupSize0, "get_ng(0)", "GridDim.x")
|
||||
ADD_KEYWORD(GroupSize1, "get_ng(1)", "GridDim.y")
|
||||
ADD_KEYWORD(GroupSize2, "get_ng(2)", "GridDim.z")
|
||||
|
||||
ADD_KEYWORD(LocalBarrier, "barrier(CLK_LOCAL_MEM_FENCE)", "__syncthreads()")
|
||||
struct CastPrefix: public keyword{ CastPrefix(driver::backend_type backend, std::string const & datatype): keyword(backend, "convert_" + datatype, "make_" + datatype){} };
|
||||
struct InitPrefix: public keyword{ InitPrefix(driver::backend_type backend, std::string const & datatype): keyword(backend, "", "make_" + datatype){} };
|
||||
|
||||
struct Infinity: public keyword{ Infinity(driver::backend_type backend, std::string const & datatype): keyword(backend, "INFINITY", "infinity<" + datatype + ">()"){} };
|
||||
struct Select: public keyword{ Select(driver::backend_type backend, std::string cond, std::string if_value, std::string else_value): keyword(backend, "select(" + else_value + "," + if_value + "," + cond + ")", "(" + cond + ")?" + if_value + ":" + else_value) {} };
|
||||
#undef ADD_KEYWORD
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@@ -1,62 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_STREAM_H
|
||||
#define ISAAC_BACKEND_STREAM_H
|
||||
|
||||
#include <sstream>
|
||||
#include "isaac/driver/common.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class kernel_generation_stream : public std::ostream
|
||||
{
|
||||
class kgenstream : public std::stringbuf
|
||||
{
|
||||
public:
|
||||
kgenstream(std::ostringstream& oss,unsigned int const & tab_count) ;
|
||||
int sync();
|
||||
~kgenstream();
|
||||
private:
|
||||
std::ostream& oss_;
|
||||
unsigned int const & tab_count_;
|
||||
};
|
||||
|
||||
void process(std::string& str);
|
||||
|
||||
public:
|
||||
kernel_generation_stream(driver::backend_type backend);
|
||||
~kernel_generation_stream();
|
||||
|
||||
std::string str();
|
||||
void inc_tab();
|
||||
void dec_tab();
|
||||
private:
|
||||
unsigned int tab_count_;
|
||||
driver::backend_type backend_;
|
||||
std::ostringstream oss;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,155 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_TEMPLATES_MPRODUCT_H
|
||||
#define ISAAC_BACKEND_TEMPLATES_MPRODUCT_H
|
||||
|
||||
#include "isaac/jit/generation/base.h"
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
#include "isaac/jit/syntax/expression/preset.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class cublas_gemm : public external_base
|
||||
{
|
||||
bool init();
|
||||
public:
|
||||
cublas_gemm(char A_trans, char B_trans);
|
||||
int is_invalid(expression_tree const &, driver::Device const &) const;
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
|
||||
expression_type type() const;
|
||||
private:
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
bool init_;
|
||||
};
|
||||
|
||||
class intelblas_gemm : public external_base
|
||||
{
|
||||
bool init();
|
||||
public:
|
||||
intelblas_gemm(char A_trans, char B_trans);
|
||||
int is_invalid(expression_tree const &, driver::Device const &) const;
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
|
||||
expression_type type() const;
|
||||
private:
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
bool init_;
|
||||
};
|
||||
|
||||
class intelblas_gemm_image : public external_base
|
||||
{
|
||||
bool init();
|
||||
public:
|
||||
intelblas_gemm_image(char A_trans, char B_trans);
|
||||
int is_invalid(expression_tree const &, driver::Device const &) const;
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const &, std::string const &, runtime::execution_handler const & h);
|
||||
expression_type type() const;
|
||||
private:
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
bool init_;
|
||||
};
|
||||
|
||||
class gemm : public parameterized_base
|
||||
{
|
||||
private:
|
||||
unsigned int temporary_workspace(expression_tree const & expressions) const;
|
||||
unsigned int lmem_usage(expression_tree const & expressions) const;
|
||||
unsigned int registers_usage(expression_tree const & expressions) const;
|
||||
int is_invalid_impl(driver::Device const &, expression_tree const &) const;
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
void enqueue_block(driver::CommandQueue & queue, int_t M, int_t N, int_t K, const expression_tree::node &A, const expression_tree::node &B, const expression_tree::node &C,
|
||||
value_scalar const &alpha, value_scalar const &beta, driver::Program const & program, std::string const & suffix, runtime::execution_options_type const & options);
|
||||
|
||||
public:
|
||||
gemm(unsigned int simd, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, int_t lf0, int_t lf1
|
||||
, char A_trans, char B_trans);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const & h);
|
||||
expression_type type() const;
|
||||
|
||||
private:
|
||||
//Parameters
|
||||
unsigned int mL_;
|
||||
unsigned int kL_;
|
||||
unsigned int nL_;
|
||||
unsigned int depth_;
|
||||
unsigned int mS_;
|
||||
unsigned int kS_;
|
||||
unsigned int nS_;
|
||||
|
||||
unsigned int lf0_;
|
||||
unsigned int lf1_;
|
||||
|
||||
bool prefetch_;
|
||||
bool unroll_outer_;
|
||||
//
|
||||
const char A_trans_;
|
||||
const char B_trans_;
|
||||
expression_type type_;
|
||||
};
|
||||
|
||||
class gemm_nn : public gemm
|
||||
{
|
||||
public:
|
||||
gemm_nn(unsigned int vwidth, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, int_t lf0, int_t lf1);
|
||||
};
|
||||
|
||||
class gemm_tn : public gemm
|
||||
{
|
||||
public:
|
||||
gemm_tn(unsigned int vwidth, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, int_t lf0, int_t lf1);
|
||||
};
|
||||
|
||||
|
||||
class gemm_nt : public gemm
|
||||
{
|
||||
public:
|
||||
gemm_nt(unsigned int vwidth, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, int_t lf0, int_t lf1);
|
||||
};
|
||||
|
||||
|
||||
class gemm_tt : public gemm
|
||||
{
|
||||
public:
|
||||
gemm_tt(unsigned int vwidth, int_t ls0, int_t KL, int_t ls1, int_t D
|
||||
, int_t ms, int_t ks, int_t ns, int_t lf0, int_t lf1);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,57 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_TEMPLATES_DOT_H
|
||||
#define ISAAC_BACKEND_TEMPLATES_DOT_H
|
||||
|
||||
#include "isaac/jit/generation/base.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class reduce_1d : public parameterized_base
|
||||
{
|
||||
private:
|
||||
unsigned int lmem_usage(expression_tree const & expressions) const;
|
||||
unsigned int temporary_workspace(expression_tree const & expressions) const;
|
||||
inline void reduce_1d_local_memory(kernel_generation_stream & stream, unsigned int size, std::vector<symbolic::reduce_1d*> exprs,
|
||||
std::string const & buf_str, std::string const & buf_value_str, driver::backend_type backend) const;
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const & expressions, driver::Device const & device, symbolic::symbols_table const & mapping) const;
|
||||
|
||||
public:
|
||||
reduce_1d(unsigned int vwidth, unsigned int ls, unsigned int ng);
|
||||
std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
expression_type type() const;
|
||||
|
||||
private:
|
||||
unsigned int ng_;
|
||||
std::vector< driver::Buffer > tmp_;
|
||||
std::vector< driver::Buffer > tmpidx_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,69 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_TEMPLATES_MDOT_H
|
||||
#define ISAAC_BACKEND_TEMPLATES_MDOT_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
#include "isaac/jit/generation/base.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace templates
|
||||
{
|
||||
|
||||
class reduce_2d : public parameterized_base
|
||||
{
|
||||
protected:
|
||||
reduce_2d(unsigned int vwidth, unsigned int ls0, unsigned int ls1, unsigned int ng0, unsigned int ng1, operation_type_family);
|
||||
private:
|
||||
unsigned int lmem_usage(expression_tree const &) const;
|
||||
unsigned int temporary_workspace(expression_tree const & expressions) const;
|
||||
std::string generate_impl(std::string const & suffix, expression_tree const &, driver::Device const & device, symbolic::symbols_table const &) const;
|
||||
public:
|
||||
virtual std::vector<int_t> input_sizes(expression_tree const & expressions) const;
|
||||
void enqueue(driver::CommandQueue & queue, driver::Program const & program, std::string const & suffix, runtime::execution_handler const &);
|
||||
expression_type type() const;
|
||||
private:
|
||||
unsigned int ng0_;
|
||||
unsigned int ng1_;
|
||||
operation_type_family reduction_type_;
|
||||
};
|
||||
|
||||
class reduce_2d_rows : public reduce_2d
|
||||
{
|
||||
public:
|
||||
reduce_2d_rows(unsigned int vwidth, unsigned int ls0, unsigned int ls1, unsigned int ng0, unsigned int ng1);
|
||||
};
|
||||
|
||||
class reduce_2d_cols : public reduce_2d
|
||||
{
|
||||
public:
|
||||
reduce_2d_cols(unsigned int vwidth, unsigned int ls0, unsigned int ls1, unsigned int ng0, unsigned int ng1);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,85 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_BACKEND_BINDER_H
|
||||
#define ISAAC_BACKEND_BINDER_H
|
||||
|
||||
#include <map>
|
||||
#include "isaac/driver/buffer.h"
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class array_base;
|
||||
|
||||
|
||||
class symbolic_binder
|
||||
{
|
||||
class cmp
|
||||
{
|
||||
public:
|
||||
cmp(driver::backend_type backend) : backend_(backend) {}
|
||||
|
||||
bool operator()(handle_t const & x, handle_t const & y) const
|
||||
{
|
||||
if(backend_==driver::OPENCL)
|
||||
return x.cl < y.cl;
|
||||
else
|
||||
return x.cu < y.cu;
|
||||
}
|
||||
|
||||
private:
|
||||
driver::backend_type backend_;
|
||||
};
|
||||
|
||||
public:
|
||||
symbolic_binder(driver::backend_type backend);
|
||||
virtual ~symbolic_binder();
|
||||
virtual bool bind(handle_t const &, bool) = 0;
|
||||
virtual unsigned int get(handle_t const &, bool) = 0;
|
||||
unsigned int get();
|
||||
protected:
|
||||
unsigned int current_arg_;
|
||||
std::map<handle_t,unsigned int, cmp> memory;
|
||||
};
|
||||
|
||||
|
||||
class bind_sequential : public symbolic_binder
|
||||
{
|
||||
public:
|
||||
bind_sequential(driver::backend_type backend);
|
||||
bool bind(handle_t const & a, bool);
|
||||
unsigned int get(handle_t const & a, bool);
|
||||
};
|
||||
|
||||
class bind_independent : public symbolic_binder
|
||||
{
|
||||
public:
|
||||
bind_independent(driver::backend_type backend);
|
||||
bool bind(handle_t const & a, bool);
|
||||
unsigned int get(const handle_t &a, bool);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,54 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_SYMBOLIC_ENGINE_MACRO_H
|
||||
#define ISAAC_SYMBOLIC_ENGINE_MACRO_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace symbolic
|
||||
{
|
||||
|
||||
|
||||
//Macro
|
||||
class macro
|
||||
{
|
||||
public:
|
||||
macro(std::string const & code);
|
||||
macro(const char * code);
|
||||
int expand(std::string & str) const;
|
||||
bool operator<(macro const & o) const;
|
||||
|
||||
private:
|
||||
std::string code_;
|
||||
std::string name_;
|
||||
std::vector<std::string> args_;
|
||||
std::vector<std::string> tokens_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
@@ -1,207 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ISAAC_MAPPED_OBJECT_H
|
||||
#define ISAAC_MAPPED_OBJECT_H
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "isaac/jit/syntax/engine/macro.h"
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
#include "isaac/jit/generation/engine/stream.h"
|
||||
#include "isaac/types.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
namespace symbolic
|
||||
{
|
||||
|
||||
class object;
|
||||
|
||||
typedef std::map<size_t, std::shared_ptr<object> > symbols_table;
|
||||
|
||||
//Node
|
||||
class node
|
||||
{
|
||||
public:
|
||||
node(size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
op_element op() const;
|
||||
object const * lhs() const;
|
||||
object const * rhs() const;
|
||||
size_t root() const;
|
||||
protected:
|
||||
op_element op_;
|
||||
object* lhs_;
|
||||
object* rhs_;
|
||||
size_t root_;
|
||||
};
|
||||
|
||||
//Object
|
||||
class object
|
||||
{
|
||||
protected:
|
||||
void add_base(std::string const & name);
|
||||
void add_load(bool contiguous);
|
||||
public:
|
||||
object(driver::Context const & context, std::string const & scalartype, unsigned int id);
|
||||
object(driver::Context const & context, std::string const & scalartype, std::string const & name);
|
||||
virtual ~object();
|
||||
bool hasattr(std::string const & name) const;
|
||||
std::string process(std::string const & in) const;
|
||||
virtual std::string evaluate(std::map<std::string, std::string> const & table) const;
|
||||
protected:
|
||||
driver::Context const & context_;
|
||||
std::map<std::string, std::string> attributes_;
|
||||
std::set<macro> macros_;
|
||||
std::list<std::string> hierarchy_;
|
||||
};
|
||||
|
||||
//Leaf
|
||||
class leaf: public object
|
||||
{
|
||||
public:
|
||||
leaf(driver::Context const & context, std::string const & scalartype, unsigned int id);
|
||||
leaf(driver::Context const & context, std::string const & scalartype, std::string const & name);
|
||||
};
|
||||
|
||||
|
||||
//Arithmetic node
|
||||
class arithmetic_node : public object, public node
|
||||
{
|
||||
public:
|
||||
arithmetic_node(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
protected:
|
||||
std::string op_str_;
|
||||
};
|
||||
|
||||
//Binary arithmetic
|
||||
class binary_arithmetic_node: public arithmetic_node
|
||||
{
|
||||
public:
|
||||
binary_arithmetic_node(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
std::string evaluate(std::map<std::string, std::string> const & table) const;
|
||||
};
|
||||
|
||||
//Unary arithmetic
|
||||
class unary_arithmetic_node: public arithmetic_node
|
||||
{
|
||||
public:
|
||||
unary_arithmetic_node(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
std::string evaluate(std::map<std::string, std::string> const & table) const;
|
||||
};
|
||||
|
||||
//Sfor
|
||||
class sfor: public object, public node
|
||||
{
|
||||
public:
|
||||
sfor(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
//Reductions
|
||||
class reduction : public object, public node
|
||||
{
|
||||
public:
|
||||
reduction(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
class reduce_1d : public reduction
|
||||
{
|
||||
public:
|
||||
reduce_1d(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
class reduce_2d : public reduction
|
||||
{
|
||||
public:
|
||||
reduce_2d(unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
//Host scalar
|
||||
class host_scalar : public leaf
|
||||
{
|
||||
public:
|
||||
host_scalar(driver::Context const & context, std::string const & scalartype, unsigned int id);
|
||||
};
|
||||
|
||||
//Placeholder
|
||||
class placeholder : public leaf
|
||||
{
|
||||
public:
|
||||
placeholder(driver::Context const & context, unsigned int level);
|
||||
};
|
||||
|
||||
//Arrays
|
||||
class array : public leaf
|
||||
{
|
||||
protected:
|
||||
std::string make_broadcast(tuple const & shape);
|
||||
public:
|
||||
array(driver::Context const & context, std::string const & scalartype, unsigned int id);
|
||||
};
|
||||
|
||||
//Buffer
|
||||
class buffer : public array
|
||||
{
|
||||
public:
|
||||
buffer(driver::Context const & context, std::string const & scalartype, unsigned int id, tuple const & shape, tuple const &strides);
|
||||
unsigned int dim() const { return dim_; }
|
||||
private:
|
||||
std::string ld_;
|
||||
std::string start_;
|
||||
std::string stride_;
|
||||
unsigned int dim_;
|
||||
};
|
||||
|
||||
//Index modifier
|
||||
class index_modifier: public array, public node
|
||||
{
|
||||
public:
|
||||
index_modifier(std::string const & scalartype, unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
class reshape : public index_modifier
|
||||
{
|
||||
public:
|
||||
reshape(std::string const & scalartype, unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
class trans : public index_modifier
|
||||
{
|
||||
public:
|
||||
trans(std::string const & scalartype, unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
class diag_vector : public index_modifier
|
||||
{
|
||||
public:
|
||||
diag_vector(std::string const & scalartype, unsigned int id, size_t root, op_element op, expression_tree const & tree, symbols_table const & table);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
@@ -1,123 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef ISAAC_SYMBOLIC_ENGINE_PROCESS
|
||||
#define ISAAC_SYMBOLIC_ENGINE_PROCESS
|
||||
|
||||
#include <functional>
|
||||
#include <typeinfo>
|
||||
#include "isaac/tools/cpp/string.hpp"
|
||||
#include "isaac/jit/syntax/expression/expression.h"
|
||||
#include "isaac/jit/syntax/engine/binder.h"
|
||||
#include "isaac/jit/syntax/engine/object.h"
|
||||
#include "isaac/array.h"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
namespace symbolic
|
||||
{
|
||||
|
||||
//Traverse
|
||||
template<class FUN>
|
||||
inline void traverse(expression_tree const & tree, size_t root, FUN const & fun,
|
||||
std::function<bool(size_t)> const & recurse)
|
||||
{
|
||||
expression_tree::node const & node = tree[root];
|
||||
if (node.type==COMPOSITE_OPERATOR_TYPE && recurse(root)){
|
||||
traverse(tree, node.binary_operator.lhs, fun, recurse);
|
||||
traverse(tree, node.binary_operator.rhs, fun, recurse);
|
||||
}
|
||||
if (node.type != INVALID_SUBTYPE)
|
||||
fun(root);
|
||||
}
|
||||
|
||||
template<class FUN>
|
||||
inline void traverse(expression_tree const & tree, size_t root, FUN const & fun)
|
||||
{ return traverse(tree, root, fun, [](size_t){return true;}); }
|
||||
|
||||
template<class FUN>
|
||||
inline void traverse(expression_tree const & tree, FUN const & fun)
|
||||
{ return traverse(tree, tree.root(), fun); }
|
||||
|
||||
|
||||
//Extract symbolic types
|
||||
template<class T>
|
||||
inline void extract(expression_tree const & tree, symbols_table const & table,
|
||||
size_t root, std::set<std::string>& processed, std::vector<T*>& result, bool array_recurse = true)
|
||||
{
|
||||
auto extract_impl = [&](size_t index)
|
||||
{
|
||||
symbols_table::const_iterator it = table.find(index);
|
||||
if(it!=table.end())
|
||||
{
|
||||
T* obj = dynamic_cast<T*>(&*it->second);
|
||||
if(obj && processed.insert(obj->process("#name")).second)
|
||||
result.push_back(obj);
|
||||
}
|
||||
};
|
||||
auto recurse = [&](size_t index){ return array_recurse?true:dynamic_cast<index_modifier*>(&*table.at(index))==0;};
|
||||
traverse(tree, root, extract_impl, recurse);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline std::vector<T*> extract(expression_tree const & tree, symbols_table const & table, std::vector<size_t> roots, bool array_recurse = true)
|
||||
{
|
||||
std::vector<T*> result;
|
||||
std::set<std::string> processed;
|
||||
for(size_t root: roots)
|
||||
extract(tree, table, root, processed, result, array_recurse);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline std::vector<T*> extract(expression_tree const & tree, symbols_table const & table, size_t root, bool array_recurse = true)
|
||||
{
|
||||
return extract<T>(tree, table, std::vector<size_t>{root}, array_recurse);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline std::vector<T*> extract(expression_tree const & tree, symbols_table const & table)
|
||||
{
|
||||
return extract<T>(tree, table, tree.root());
|
||||
}
|
||||
|
||||
// Filter nodes
|
||||
std::vector<size_t> find(expression_tree const & tree, size_t root, std::function<bool (expression_tree::node const &)> const & pred);
|
||||
std::vector<size_t> find(expression_tree const & tree, std::function<bool (expression_tree::node const &)> const & pred);
|
||||
|
||||
std::vector<size_t> assignments(expression_tree const & tree);
|
||||
std::vector<size_t> lhs_of(expression_tree const & tree, std::vector<size_t> const & in);
|
||||
std::vector<size_t> rhs_of(expression_tree const & tree, std::vector<size_t> const & in);
|
||||
|
||||
// Hash
|
||||
std::string hash(expression_tree const & tree);
|
||||
|
||||
//Set arguments
|
||||
void set_arguments(expression_tree const & tree, driver::Kernel & kernel, unsigned int& current_arg);
|
||||
|
||||
//Symbolize
|
||||
symbols_table symbolize(isaac::expression_tree const & expression);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@@ -1,154 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _ISAAC_SYMBOLIC_EXPRESSION_H
|
||||
#define _ISAAC_SYMBOLIC_EXPRESSION_H
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "isaac/driver/backend.h"
|
||||
#include "isaac/driver/context.h"
|
||||
#include "isaac/driver/command_queue.h"
|
||||
#include "isaac/driver/event.h"
|
||||
#include "isaac/driver/kernel.h"
|
||||
#include "isaac/driver/ndrange.h"
|
||||
#include "isaac/driver/buffer.h"
|
||||
|
||||
#include "isaac/jit/syntax/expression/operations.h"
|
||||
#include "isaac/tools/cpp/tuple.hpp"
|
||||
|
||||
#include "isaac/types.h"
|
||||
#include "isaac/value_scalar.h"
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
class array_base;
|
||||
|
||||
struct invalid_node{};
|
||||
|
||||
enum node_type
|
||||
{
|
||||
INVALID_SUBTYPE = 0,
|
||||
COMPOSITE_OPERATOR_TYPE,
|
||||
VALUE_SCALAR_TYPE,
|
||||
DENSE_ARRAY_TYPE,
|
||||
};
|
||||
|
||||
union handle_t
|
||||
{
|
||||
cl_mem cl;
|
||||
CUdeviceptr cu;
|
||||
};
|
||||
|
||||
|
||||
struct array_holder
|
||||
{
|
||||
int_t start;
|
||||
handle_t handle;
|
||||
array_base* base;
|
||||
};
|
||||
|
||||
class expression_tree
|
||||
{
|
||||
public:
|
||||
struct node
|
||||
{
|
||||
//Constructors
|
||||
node();
|
||||
node(invalid_node);
|
||||
node(value_scalar const & x);
|
||||
node(array_base const & x);
|
||||
node(int_t lhs, op_element op, int_t rhs, numeric_type dtype, tuple const & shape);
|
||||
|
||||
//Common
|
||||
node_type type;
|
||||
numeric_type dtype;
|
||||
tuple shape;
|
||||
tuple ld;
|
||||
|
||||
//Type-specific
|
||||
union
|
||||
{
|
||||
//Operator
|
||||
struct{
|
||||
int_t lhs;
|
||||
op_element op;
|
||||
int_t rhs;
|
||||
}binary_operator;
|
||||
//Scalar
|
||||
values_holder scalar;
|
||||
//Array
|
||||
array_holder array;
|
||||
};
|
||||
};
|
||||
|
||||
typedef std::vector<node> data_type;
|
||||
|
||||
public:
|
||||
expression_tree(node const & lhs, node const & rhs, op_element const & op, driver::Context const * context, numeric_type const & dtype, tuple const & shape);
|
||||
expression_tree(expression_tree const & lhs, node const & rhs, op_element const & op, driver::Context const * context, numeric_type const & dtype, tuple const & shape);
|
||||
expression_tree(node const & lhs, expression_tree const & rhs, op_element const & op, driver::Context const * context, numeric_type const & dtype, tuple const & shape);
|
||||
expression_tree(expression_tree const & lhs, expression_tree const & rhs, op_element const & op, driver::Context const * context, numeric_type const & dtype, tuple const & shape);
|
||||
|
||||
tuple shape() const;
|
||||
int_t dim() const;
|
||||
data_type const & data() const;
|
||||
std::size_t root() const;
|
||||
driver::Context const & context() const;
|
||||
numeric_type const & dtype() const;
|
||||
|
||||
node const & operator[](size_t) const;
|
||||
node & operator[](size_t);
|
||||
|
||||
expression_tree operator-();
|
||||
expression_tree operator!();
|
||||
|
||||
private:
|
||||
data_type tree_;
|
||||
std::size_t root_;
|
||||
driver::Context const * context_;
|
||||
};
|
||||
|
||||
template<class T> typename std::enable_if<!std::is_arithmetic<T>::value, T const &>::type wrap_generic(T const & x){ return x;}
|
||||
template<class T> typename std::enable_if<std::is_arithmetic<T>::value, value_scalar>::type wrap_generic(T x) { return value_scalar(x); }
|
||||
|
||||
template<typename T>
|
||||
ISAACAPI typename std::conditional<std::is_arithmetic<T>::value, value_scalar, T const &>::type make_tuple(driver::Context const &, T const & x)
|
||||
{ return wrap_generic(x); }
|
||||
|
||||
template<typename T, typename... Args>
|
||||
ISAACAPI expression_tree make_tuple(driver::Context const & context, T const & x, Args... args)
|
||||
{ return expression_tree(wrap_generic(x), make_tuple(context, args...), op_element(BINARY_ARITHMETIC, PAIR_TYPE), &context, numeric_type_of(x), {1}); }
|
||||
|
||||
//io
|
||||
std::string to_string(node_type const & f);
|
||||
std::string to_string(expression_tree::node const & e);
|
||||
std::ostream & operator<<(std::ostream & os, expression_tree::node const & s_node);
|
||||
std::string to_string(isaac::expression_tree const & s);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,157 +0,0 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _ISAAC_SYMBOLIC_OPERATIONS_H
|
||||
#define _ISAAC_SYMBOLIC_OPERATIONS_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
|
||||
|
||||
/** @brief Optimization enum for grouping operations into unary or binary operations. Just for optimization of lookups. */
|
||||
enum operation_type_family
|
||||
{
|
||||
INVALID_ = 0,
|
||||
|
||||
// BLAS1-type
|
||||
UNARY_ARITHMETIC,
|
||||
BINARY_ARITHMETIC,
|
||||
REDUCE,
|
||||
|
||||
// BLAS2-type
|
||||
REDUCE_ROWS,
|
||||
REDUCE_COLUMNS,
|
||||
|
||||
// BLAS3-type
|
||||
GEMM
|
||||
};
|
||||
|
||||
/** @brief Enumeration for identifying the possible operations */
|
||||
enum operation_type
|
||||
{
|
||||
INVALID_TYPE = 0,
|
||||
|
||||
// unary operator
|
||||
MINUS_TYPE,
|
||||
NEGATE_TYPE,
|
||||
|
||||
// unary expression
|
||||
CAST_BOOL_TYPE,
|
||||
CAST_CHAR_TYPE,
|
||||
CAST_UCHAR_TYPE,
|
||||
CAST_SHORT_TYPE,
|
||||
CAST_USHORT_TYPE,
|
||||
CAST_INT_TYPE,
|
||||
CAST_UINT_TYPE,
|
||||
CAST_LONG_TYPE,
|
||||
CAST_ULONG_TYPE,
|
||||
CAST_HALF_TYPE,
|
||||
CAST_FLOAT_TYPE,
|
||||
CAST_DOUBLE_TYPE,
|
||||
|
||||
ABS_TYPE,
|
||||
ACOS_TYPE,
|
||||
ASIN_TYPE,
|
||||
ATAN_TYPE,
|
||||
CEIL_TYPE,
|
||||
COS_TYPE,
|
||||
COSH_TYPE,
|
||||
EXP_TYPE,
|
||||
FABS_TYPE,
|
||||
FLOOR_TYPE,
|
||||
LOG_TYPE,
|
||||
LOG10_TYPE,
|
||||
SIN_TYPE,
|
||||
SINH_TYPE,
|
||||
SQRT_TYPE,
|
||||
TAN_TYPE,
|
||||
TANH_TYPE,
|
||||
TRANS_TYPE,
|
||||
|
||||
// binary expression
|
||||
ASSIGN_TYPE,
|
||||
INPLACE_ADD_TYPE,
|
||||
INPLACE_SUB_TYPE,
|
||||
ADD_TYPE,
|
||||
SUB_TYPE,
|
||||
MULT_TYPE,
|
||||
DIV_TYPE,
|
||||
ELEMENT_ARGFMAX_TYPE,
|
||||
ELEMENT_ARGFMIN_TYPE,
|
||||
ELEMENT_ARGMAX_TYPE,
|
||||
ELEMENT_ARGMIN_TYPE,
|
||||
ELEMENT_PROD_TYPE,
|
||||
ELEMENT_DIV_TYPE,
|
||||
ELEMENT_EQ_TYPE,
|
||||
ELEMENT_NEQ_TYPE,
|
||||
ELEMENT_GREATER_TYPE,
|
||||
ELEMENT_GEQ_TYPE,
|
||||
ELEMENT_LESS_TYPE,
|
||||
ELEMENT_LEQ_TYPE,
|
||||
ELEMENT_POW_TYPE,
|
||||
ELEMENT_FMAX_TYPE,
|
||||
ELEMENT_FMIN_TYPE,
|
||||
ELEMENT_MAX_TYPE,
|
||||
ELEMENT_MIN_TYPE,
|
||||
|
||||
//Products
|
||||
OUTER_PROD_TYPE,
|
||||
GEMM_NN_TYPE,
|
||||
GEMM_TN_TYPE,
|
||||
GEMM_NT_TYPE,
|
||||
GEMM_TT_TYPE,
|
||||
|
||||
//Access modifiers
|
||||
RESHAPE_TYPE,
|
||||
SHIFT_TYPE,
|
||||
DIAG_MATRIX_TYPE,
|
||||
DIAG_VECTOR_TYPE,
|
||||
ACCESS_INDEX_TYPE,
|
||||
|
||||
|
||||
PAIR_TYPE,
|
||||
|
||||
OPERATOR_FUSE,
|
||||
SFOR_TYPE,
|
||||
};
|
||||
|
||||
struct op_element
|
||||
{
|
||||
op_element();
|
||||
op_element(operation_type_family const & _type_family, operation_type const & _type);
|
||||
operation_type_family type_family;
|
||||
operation_type type;
|
||||
};
|
||||
|
||||
std::string to_string(operation_type type);
|
||||
|
||||
bool is_assignment(operation_type op);
|
||||
bool is_operator(operation_type op);
|
||||
bool is_function(operation_type op);
|
||||
bool is_cast(operation_type op);
|
||||
bool is_indexing(operation_type op);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user