more work on heuristics
This commit is contained in:
@@ -49,7 +49,7 @@ class DotOp : public OpKernel {
|
||||
triton::driver::cu_buffer db(ctx, (CUdeviceptr)b.flat<Eigen::half>().data(), false);
|
||||
triton::driver::cu_buffer dc(ctx, (CUdeviceptr)c->flat<float>().data(), false);
|
||||
// template
|
||||
triton::dnn::gemm dot(M, N, K, false, false, "fp16", "fp16", 4, 4);
|
||||
triton::dnn::dot dot(M, N, K, false, false, "fp16", "fp16", 8, 8);
|
||||
dot.enqueue(stream, {&da, &db, &dc});
|
||||
}
|
||||
|
||||
|
@@ -19,7 +19,7 @@
|
||||
using namespace tensorflow;
|
||||
using GPUDevice = Eigen::GpuDevice;
|
||||
|
||||
template<triton::dnn::shift::type OP>
|
||||
template<triton::dnn::shift::op_t OP>
|
||||
class ShiftConvOp : public OpKernel {
|
||||
public:
|
||||
explicit ShiftConvOp(OpKernelConstruction* context) : OpKernel(context), layout_(triton::dnn::shift::NCHW) {
|
||||
|
Reference in New Issue
Block a user