Package: Major fall update:

-> Major performance improvement of CONV for small batches
-> Adopted NCHW image layout
-> 3-D convolutions supported
-> HFMA2 deprecated
-> Multi-threaded auto-tuning
-> Tensorflow Wrapper
This commit is contained in:
Philippe Tillet
2017-11-09 13:11:41 -05:00
parent 4209180450
commit 612d6a9bdf
93 changed files with 22429 additions and 21989 deletions

View File

@@ -38,14 +38,14 @@ class Buffer: public HandleInterface<Buffer, CUdeviceptr>
{
public:
Buffer(Context const & context, size_t size);
Buffer(Context const & context, CUdeviceptr cu, bool take_ownership);
void set_zero(Stream const & queue, size_t size);
void set_zero(Stream const & queue);
Handle<CUdeviceptr> const & cu() const;
Handle<CUdeviceptr> & cu();
private:
Context context_;
Handle<CUdeviceptr> cu_;
size_t size_;
};
}