Package: Major fall update:

-> Major performance improvement of CONV for small batches
-> Adopted NCHW image layout
-> 3-D convolutions supported
-> HFMA2 deprecated
-> Multi-threaded auto-tuning
-> Tensorflow Wrapper
This commit is contained in:
Philippe Tillet
2017-11-09 13:11:41 -05:00
parent 4209180450
commit 612d6a9bdf
93 changed files with 22429 additions and 21989 deletions

View File

@@ -33,25 +33,28 @@ namespace isaac
namespace driver
{
Buffer::Buffer(Context const & context, size_t size) : context_(context), size_(size)
Buffer::Buffer(Context const & context, size_t size) : context_(context)
{
ContextSwitcher ctx_switch(context_);
dispatch::cuMemAlloc(&*cu_, size);
}
Buffer::Buffer(Context const & context, CUdeviceptr cu, bool take_ownership):
context_(context), cu_(cu, take_ownership)
{ }
void Buffer::set_zero(Stream const & queue, size_t size)
{
ContextSwitcher ctx_switch(context_);
dispatch::cuMemsetD8Async(*cu_, 0, size, queue);
}
void Buffer::set_zero(Stream const & queue)
{ set_zero(queue, size_); }
Handle<CUdeviceptr> const & Buffer::cu() const
{ return cu_; }
Handle<CUdeviceptr> & Buffer::cu()
{ return cu_; }
}
}