Code quality: fixed compilation errors with CUDA

This commit is contained in:
Philippe Tillet
2015-08-20 21:24:41 -04:00
parent efdbf5f4a6
commit 33dac6b05a
15 changed files with 120 additions and 102 deletions

View File

@@ -35,6 +35,8 @@ else()
endif()
string(REPLACE ";" " " BLAS_DEF_STR "${BLAS_DEF}")
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
foreach(PROG blas)
if(CUDA_FOUND)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} " ${BLAS_DEF_STR} -std=c++11 ${BACKEND_DEFINES}")

View File

@@ -21,73 +21,11 @@
namespace sc = isaac;
typedef sc::int_t int_t;
template<std::size_t> struct int_{};
template <class Tuple, size_t Pos>
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<Pos> ) {
out << std::get< std::tuple_size<Tuple>::value-Pos >(t) << ',';
return print_tuple(out, t, int_<Pos-1>());
}
template <class Tuple>
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<1> ) {
return out << std::get<std::tuple_size<Tuple>::value-1>(t);
}
template <class... Args>
std::ostream& operator<<(std::ostream& out, const std::tuple<Args...>& t) {
print_tuple(out, t, int_<sizeof...(Args)>());
return out;
}
int ceil(int N, int pad)
{
return (N%pad==0)?N:(N+pad-1)/pad*pad;
}
std::vector<int> create_log_range(int min, int max, int N, int pad)
{
std::vector<int> res(N);
for(int i = 0 ; i < N ; ++i)
{
res[i] = static_cast<int>(std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N));
res[i] = ceil(res[i], pad);
}
return res;
}
std::vector<int> create_full_range(int min, int max, int pad)
{
std::vector<int> N;
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
N.push_back(i);
return N;
}
template<class T>
T median(std::vector<T> x)
{
size_t size = x.size();
std::sort(x.begin(), x.end());
if (size % 2 == 0)
return (x[size / 2 - 1] + x[size / 2]) / 2;
else
return x[size / 2];
}
template<class T>
T mean(std::vector<T> x)
{
T res = 0;
int N = x.size();
for(int i = 0 ; i < N ; ++i)
res += x[i];
return res/N;
}
static long time_event(long sum, sc::driver::Event const & e)
{ return sum + e.elapsed_time();}
{
return sum + e.elapsed_time();
}
template<class T>
void bench(sc::numeric_type dtype, std::string operation)

View File

@@ -3,6 +3,74 @@
#include <chrono>
template<std::size_t> struct int_{};
template <class Tuple, size_t Pos>
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<Pos> )
{
out << std::get< std::tuple_size<Tuple>::value-Pos >(t) << ',';
return print_tuple(out, t, int_<Pos-1>());
}
template <class Tuple>
std::ostream& print_tuple(std::ostream& out, const Tuple& t, int_<1> )
{
return out << std::get<std::tuple_size<Tuple>::value-1>(t);
}
template <class... Args>
std::ostream& operator<<(std::ostream& out, const std::tuple<Args...>& t)
{
print_tuple(out, t, int_<sizeof...(Args)>());
return out;
}
int ceil(int N, int pad)
{
return (N%pad==0)?N:(N+pad-1)/pad*pad;
}
std::vector<int> create_log_range(int min, int max, int N, int pad)
{
std::vector<int> res(N);
for(int i = 0 ; i < N ; ++i)
{
res[i] = static_cast<int>(std::exp(std::log(min) + (float)(std::log(max) - std::log(min))*i/N));
res[i] = ceil(res[i], pad);
}
return res;
}
std::vector<int> create_full_range(int min, int max, int pad)
{
std::vector<int> N;
for(int i = ceil(min, pad) ; i < ceil(max, pad) ; i+=pad)
N.push_back(i);
return N;
}
template<class T>
T median(std::vector<T> x)
{
size_t size = x.size();
std::sort(x.begin(), x.end());
if (size % 2 == 0)
return (x[size / 2 - 1] + x[size / 2]) / 2;
else
return x[size / 2];
}
template<class T>
T mean(std::vector<T> x)
{
T res = 0;
int N = x.size();
for(int i = 0 ; i < N ; ++i)
res += x[i];
return res/N;
}
class Timer
{
typedef std::chrono::high_resolution_clock high_resolution_clock;
@@ -30,4 +98,5 @@ private:
};
#endif

View File

@@ -15,6 +15,10 @@ namespace driver
class ISAACAPI Event
{
friend class CommandQueue;
private:
#ifdef ISAAC_WITH_CUDA
typedef std::pair<CUevent, CUevent> cu_event_t;
#endif
public:
Event(cl_event const & event, bool take_ownership = true);
Event(backend_type backend);
@@ -22,9 +26,6 @@ public:
HANDLE_TYPE(cl_event, cu_event_t)& handle();
private:
backend_type backend_;
#ifdef ISAAC_WITH_CUDA
typedef std::pair<CUevent, CUevent> cu_event_t;
#endif
HANDLE_TYPE(cl_event, cu_event_t) h_;
};

View File

@@ -48,7 +48,8 @@ public:
CLType & cl();
CLType const & cl() const;
#ifdef ISAAC_WITH_CUDA
CUTYPE & cu();
CUType & cu();
CUType const & cu() const;
#endif
~Handle();

View File

@@ -20,7 +20,7 @@ Buffer::Buffer(Context const & context, size_t size) : backend_(context.backend_
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuMemAlloc(h_.cu.get(), size));
cuda::check(cuMemAlloc(&h_.cu(), size));
break;
#endif
case OPENCL:

View File

@@ -29,7 +29,7 @@ CommandQueue::CommandQueue(Context const & context, Device const & device, cl_co
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuStreamCreate(h_.cu.get(), 0));
cuda::check(cuStreamCreate(&h_.cu(), 0));
break;
#endif
case OPENCL:
@@ -58,7 +58,7 @@ void CommandQueue::synchronize()
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: cuda::check(cuStreamSynchronize(*h_.cu)); break;
case CUDA: cuda::check(cuStreamSynchronize(h_.cu())); break;
#endif
case OPENCL: ocl::check(clFinish(h_.cl())); break;
default: throw;
@@ -72,10 +72,10 @@ Event CommandQueue::enqueue(Kernel const & kernel, NDRange global, driver::NDRan
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuEventRecord(event.h_.cu->first, *h_.cu));
cuda::check(cuLaunchKernel(*kernel.h_.cu, global[0]/local[0], global[1]/local[1], global[2]/local[2],
local[0], local[1], local[2], 0, *h_.cu,(void**)&kernel.cu_params_[0], NULL));
cuda::check(cuEventRecord(event.h_.cu->second, *h_.cu));
cuda::check(cuEventRecord(event.h_.cu().first, h_.cu()));
cuda::check(cuLaunchKernel(kernel.h_.cu(), global[0]/local[0], global[1]/local[1], global[2]/local[2],
local[0], local[1], local[2], 0, h_.cu(),(void**)&kernel.cu_params_[0], NULL));
cuda::check(cuEventRecord(event.h_.cu().second, h_.cu()));
break;
#endif
case OPENCL:
@@ -93,9 +93,9 @@ void CommandQueue::write(Buffer const & buffer, bool blocking, std::size_t offse
#ifdef ISAAC_WITH_CUDA
case CUDA:
if(blocking)
cuda::check(cuMemcpyHtoD(*buffer.h_.cu + offset, ptr, size));
cuda::check(cuMemcpyHtoD(buffer.h_.cu() + offset, ptr, size));
else
cuda::check(cuMemcpyHtoDAsync(*buffer.h_.cu + offset, ptr, size, *h_.cu));
cuda::check(cuMemcpyHtoDAsync(buffer.h_.cu() + offset, ptr, size, h_.cu()));
break;
#endif
case OPENCL:
@@ -112,9 +112,9 @@ void CommandQueue::read(Buffer const & buffer, bool blocking, std::size_t offset
#ifdef ISAAC_WITH_CUDA
case CUDA:
if(blocking)
cuda::check(cuMemcpyDtoH(ptr, *buffer.h_.cu + offset, size));
cuda::check(cuMemcpyDtoH(ptr, buffer.h_.cu() + offset, size));
else
cuda::check(cuMemcpyDtoHAsync(ptr, *buffer.h_.cu + offset, size, *h_.cu));
cuda::check(cuMemcpyDtoHAsync(ptr, buffer.h_.cu() + offset, size, h_.cu()));
break;
#endif
case OPENCL:

View File

@@ -23,7 +23,7 @@ Context::Context(Device const & device) : backend_(device.backend_), device_(dev
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuCtxCreate(h_.cu.get(), CU_CTX_SCHED_AUTO, *device.h_.cu));
cuda::check(cuCtxCreate(&h_.cu(), CU_CTX_SCHED_AUTO, device.h_.cu()));
break;
#endif
case OPENCL:

View File

@@ -18,13 +18,13 @@ template<CUdevice_attribute attr>
int Device::cuGetInfo() const
{
int res;
cuda::check(cuDeviceGetAttribute(&res, attr, *h_.cu));
cuda::check(cuDeviceGetAttribute(&res, attr, h_.cu()));
return res;
}
Device::Device(int ordinal): backend_(CUDA), h_(backend_, true)
{
cuda::check(cuDeviceGet(h_.cu.get(), ordinal));
cuda::check(cuDeviceGet(&h_.cu(), ordinal));
}
#endif
@@ -114,7 +114,7 @@ std::string Device::name() const
#ifdef ISAAC_WITH_CUDA
case CUDA:
char tmp[128];
cuda::check(cuDeviceGetName(tmp, 128, *h_.cu));
cuda::check(cuDeviceGetName(tmp, 128, h_.cu()));
return std::string(tmp);
#endif
case OPENCL: return ocl::info<CL_DEVICE_NAME>(h_.cl());
@@ -161,7 +161,7 @@ Device::Type Device::type() const
switch(backend_)
{
#ifdef ISAAC_WITH_CUDA
case CUDA: return DEVICE_TYPE_GPU;
case CUDA: return Type::GPU;
#endif
case OPENCL: return static_cast<Type>(ocl::info<CL_DEVICE_TYPE>(h_.cl()));
default: throw;

View File

@@ -13,8 +13,8 @@ Event::Event(backend_type backend) : backend_(backend), h_(backend_, true)
{
#ifdef ISAAC_WITH_CUDA
case CUDA:
cuda::check(cuEventCreate(&h_.cu->first, CU_EVENT_DEFAULT));
cuda::check(cuEventCreate(&h_.cu->second, CU_EVENT_DEFAULT));
cuda::check(cuEventCreate(&h_.cu().first, CU_EVENT_DEFAULT));
cuda::check(cuEventCreate(&h_.cu().second, CU_EVENT_DEFAULT));
break;
#endif
case OPENCL: break;
@@ -34,7 +34,7 @@ long Event::elapsed_time() const
#ifdef ISAAC_WITH_CUDA
case CUDA:
float time;
cuda::check(cuEventElapsedTime(&time, h_.cu->first, h_.cu->second));
cuda::check(cuEventElapsedTime(&time, h_.cu().first, h_.cu().second));
return 1e6*time;
#endif
case OPENCL:
@@ -44,7 +44,7 @@ long Event::elapsed_time() const
}
}
HANDLE_TYPE(cl_event, cu_event_t) & Event::handle()
HANDLE_TYPE(cl_event, Event::cu_event_t) & Event::handle()
{ return h_; }
}

View File

@@ -100,7 +100,7 @@ template<class CLType, class CUType>
Handle<CLType, CUType>::~Handle()
{
#ifdef ISAAC_WITH_CUDA
if(has_ownership_ && cu_ && cu_.unique() && *cu_)
if(has_ownership_ && cu_.unique())
_delete(*cu_);
#endif
if(has_ownership_ && cl_ && cl_.unique() && *cl_)
@@ -122,6 +122,12 @@ CUType & Handle<CLType, CUType>::cu()
return *cu_;
}
template<class CLType, class CUType>
CUType const & Handle<CLType, CUType>::cu() const
{
return *cu_;
}
template class Handle<cl_mem, CUdeviceptr>;
template class Handle<cl_command_queue, CUstream>;
template class Handle<cl_context, CUcontext>;

View File

@@ -1,6 +1,7 @@
#include "isaac/driver/kernel.h"
#include "isaac/driver/buffer.h"
#include <iostream>
#include <cstring>
namespace isaac
{
@@ -16,7 +17,7 @@ Kernel::Kernel(Program const & program, const char * name) : backend_(program.ba
case CUDA:
cu_params_store_.reserve(32);
cu_params_.reserve(32);
cuda::check(cuModuleGetFunction(h_.cu.get(), *program.h_.cu, name));\
cuda::check(cuModuleGetFunction(&h_.cu(), program.h_.cu(), name));\
break;
#endif
case OPENCL:
@@ -60,7 +61,7 @@ void Kernel::setArg(unsigned int index, Buffer const & data)
#ifdef ISAAC_WITH_CUDA
case CUDA:
{
setArg(index, sizeof(CUdeviceptr), data.h_.cu.get()); break;
setArg(index, sizeof(CUdeviceptr), (void*)&data.h_.cu()); break;
}
#endif
case OPENCL:

View File

@@ -1,9 +1,9 @@
#include "isaac/driver/platform.h"
#include "isaac/driver/device.h"
#include "helpers/ocl/infos.hpp"
#include <string>
#include "to_string.hpp"
namespace isaac
{
@@ -12,7 +12,7 @@ namespace driver
{
#ifdef ISAAC_WITH_CUDA
Platform::Platform(backend_type backend): backend_(backend, take_ownership){}
Platform::Platform(backend_type backend): backend_(backend){}
#endif
Platform::Platform(cl_platform_id const & platform) : backend_(OPENCL)

View File

@@ -34,7 +34,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
//Load cached program
if(cache_path.size() && std::ifstream(fname, std::ios::binary))
{
cuda::check(cuModuleLoad(h_.cu.get(), fname.c_str()));
cuda::check(cuModuleLoad(&h_.cu(), fname.c_str()));
break;
}
@@ -61,7 +61,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
nvrtc::check(nvrtcGetPTXSize(prog, &ptx_size));
std::vector<char> ptx(ptx_size);
nvrtc::check(nvrtcGetPTX(prog, ptx.data()));
cuda::check(cuModuleLoadDataEx(h_.cu.get(), ptx.data(), 0, NULL, NULL));
cuda::check(cuModuleLoadDataEx(&h_.cu(), ptx.data(), 0, NULL, NULL));
//Save cached program
if (cache_path.size())
@@ -90,7 +90,7 @@ Program::Program(Context const & context, std::string const & source) : backend_
// str.assign((std::istreambuf_iterator<char>(ifs)),
// std::istreambuf_iterator<char>());
// cuda::check(cuModuleLoadDataEx(h_.cu.get(), str.c_str(), 0, NULL, NULL));
// cuda::check(cuModuleLoadDataEx(&h_.cu(), str.c_str(), 0, NULL, NULL));
break;
}

View File

@@ -114,7 +114,7 @@ def main():
#Include directories
numpy_include = os.path.join(find_module("numpy")[1], "core", "include")
include =' src/include src/lib/external'.split() + ['external/boost/', 'external/boost/boost/', numpy_include]
include =' src/include src/lib/external /usr/local/cuda/include'.split() + ['external/boost/', 'external/boost/boost/', numpy_include]
#Android
if for_android:
@@ -124,7 +124,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/value_scalar.cpp src/lib/profiles/profiles.cpp src/lib/profiles/presets.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/keywords.cpp src/lib/kernels/binder.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/axpy.cpp src/lib/wrap/clBLAS.cpp src/lib/array.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/program.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/platform.cpp src/lib/driver/kernel.cpp src/lib/driver/event.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/value_scalar.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/backend.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/event.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/buffer.cpp src/lib/driver/context.cpp src/lib/kernels/templates/axpy.cpp src/lib/kernels/templates/gemv.cpp src/lib/kernels/templates/dot.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/ger.cpp src/lib/kernels/templates/gemm.cpp src/lib/kernels/stream.cpp src/lib/kernels/keywords.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/binder.cpp src/lib/kernels/parse.cpp src/lib/wrap/clBLAS.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/io.cpp src/lib/symbolic/preset.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
@@ -143,7 +143,7 @@ def main():
libraries=libraries)]
#External
extensions += [Extension('external.sklearn._tree',
extensions += [Extension('autotuning.external.sklearn._tree',
['external/sklearn/_tree.c'],
include_dirs = [numpy_include])]
@@ -155,7 +155,7 @@ def main():
author='Philippe Tillet',
author_email='ptillet@g.harvard.edu',
license='MPL 2.0',
packages=['isaac', 'isaac.external', 'isaac.external.sklearn'],
packages=['isaac','isaac.autotuning', 'isaac.autotuning.external', 'isaac.autotuning.external.deap', 'isaac.autotuning.external.deap.tools', 'isaac.autotuning.external.sklearn'],
ext_package="isaac",
ext_modules=extensions,
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},