[PYTHON] Removed dead code for alloc_empty and register_scalar

2019-10-30 10:37:30 -04:00
parent f4fcaf84df
commit bf3dc63858
7 changed files with 3 additions and 172 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,18 +34,6 @@ if(BUILD_PYTHON_MODULE)
    # PyBind11 wrapper source file
    file(GLOB_RECURSE PYTHON_SRC python/src/bindings.cc)
    include_directories(python/src/ ${PYTHON_INCLUDE_DIRS})
    if(TF_LIBS)
        # extra tensorflow ops (e.g., alloc_empty)
        # update directories
        link_directories(${TF_LIB_DIRS})
        include_directories(${TF_INCLUDE_DIRS})
        # get sources
        file(GLOB_RECURSE EXTRA_TF_OPS_SRC python/src/tensorflow/*.cc)
        add_library(extra_tf_ops SHARED ${EXTRA_TF_OPS_SRC})
        # create target
        target_link_libraries(extra_tf_ops triton ${TF_LIBS})
        target_compile_definitions(extra_tf_ops PRIVATE "-D_GLIBCXX_USE_CXX11_ABI=${TF_ABI}")
    endif()
 endif()
--- a/python/examples/einsum.py
+++ b/python/examples/einsum.py
@@ -24,7 +24,7 @@ cases = []
 # Matmul
 cases += [[[4, 1024, 1024], [1024, 1024], [4, 1024, 1024], "btc,ck->btk"]]
 # Attention
-cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
+# cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
 if mode == MODE.TF:
    sess = tf.InteractiveSession()
--- a/python/src/tensorflow/alloc_empty.cc
+++ b/python/src/tensorflow/alloc_empty.cc
@@ -1,39 +0,0 @@
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/shape_inference.h"
 using namespace tensorflow;
 class AllocEmptyOp : public OpKernel {
 public:
  explicit AllocEmptyOp(OpKernelConstruction* context) : OpKernel(context) {}
  void Compute(OpKernelContext* context) override {
     std::cout << "executing allocempty" << std::endl;
    // fetch input
    const Tensor& x = context->input(0);
    const int32* x_data = (const int32*)x.tensor_data().data();
    // allocate output
    Tensor* y = NULL;
    int32 x_rank = x.dims();
    OP_REQUIRES(context, x_rank == 1, errors::InvalidArgument("Input tensor must be 1D"));
    int32 y_rank = x.dim_size(0);
    TensorShape y_shapes;
    for(size_t i = 0; i < y_rank; i++)
      y_shapes.AddDim(x_data[i]);
    OP_REQUIRES_OK(context, context->allocate_output(0, y_shapes, &y));
  }
 };
 REGISTER_KERNEL_BUILDER(Name("AllocEmpty").HostMemory("x").Device(DEVICE_CPU).Device(DEVICE_GPU), AllocEmptyOp);
 REGISTER_OP("AllocEmpty")
  .Input("x: int32")
  .Attr("T : {bool, int8, int16, int32, int64, float16, float32, float64}")
  .Output("y: T")
  .SetShapeFn([](shape_inference::InferenceContext* c) {
    shape_inference::ShapeHandle handle;
    c->MakeShapeFromShapeTensor(0, &handle);
    c->set_output(0, handle);
    return Status::OK();
  });
 ;
--- a/python/src/tensorflow/register_scalar.cc
+++ b/python/src/tensorflow/register_scalar.cc
@@ -1,37 +0,0 @@
 #include <map>
 #include "tensorflow/core/framework/op_kernel.h"
 using namespace tensorflow;
 extern std::map<size_t, int64_t> i64scalar_map;
 class RegisterScalarOp : public OpKernel {
 public:
  explicit RegisterScalarOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
  }
  void Compute(OpKernelContext* context) override {
    // fetch input
    const Tensor& x = context->input(0);
    const int32* x_data = (const int32*)x.tensor_data().data();
    const int32 x_rank = x.dims();
    OP_REQUIRES(context, x_rank == 0, errors::InvalidArgument("Input must be a scalar"));
    i64scalar_map[id_] = *x_data;
    context->set_output(0, x);
  }
 private:
  int id_;
 };
 REGISTER_KERNEL_BUILDER(Name("RegisterScalar")
                        .HostMemory("x")
                        .Device(DEVICE_CPU), RegisterScalarOp);
 REGISTER_OP("RegisterScalar")
  .Input("x: int32")
  .Output("y: int32")
  .Attr("id: int")
 ;
--- a/python/triton/frameworks.py
+++ b/python/triton/frameworks.py
@@ -4,8 +4,6 @@ import triton._C.libtriton as libtriton
 torch = None
 tensorflow = None
 tf_extra_ops = None
 gen_resource_variable_ops = None
 def _import_torch():
  global torch
@@ -14,24 +12,13 @@ def _import_torch():
 def _import_tensorflow():
  global tensorflow
  global gen_resource_variable_ops
  if tensorflow is None:
    import tensorflow
    from tensorflow.python.ops import gen_resource_variable_ops
 def _import_tf_extra_ops():
  global tf_extra_ops
  if tf_extra_ops is None:
    path = os.path.dirname(libtriton.__file__)
    path = os.path.join(path, 'libextra_tf_ops.so')
    _import_tensorflow()
    tf_extra_ops = tensorflow.load_op_library(path)
 def has_tensorflow():
  result = 'tensorflow' in sys.modules
  if result:
    _import_tensorflow()
    _import_tf_extra_ops()
  return result
 def has_torch():
--- a/python/triton/kernel.py
+++ b/python/triton/kernel.py
@@ -219,7 +219,7 @@ class kernel:
    # retrieve framework op
    op_id = self.fw_id[key]
    # register grid
-    libtriton.register_grid(op_id, _make_grid(args))
+    libtriton.register_grid(op_id, args[-1])
    # id for the benchmark result
    bench_id = libtriton.make_scalar_id() if bench > 0 else -1
    # call framework function
--- a/python/triton/utils.py
+++ b/python/triton/utils.py
@@ -13,90 +13,22 @@ class tf_empty_proxy:
 def empty(shape, dtype):
  if fw.has_tensorflow():
-    shape = [x.handle if isinstance(x, scalar) else fw.tensorflow.constant(x) for x in shape]
+    shape = [fw.tensorflow.constant(x) for x in shape]
    shape = fw.tensorflow.stack(shape)
    return tf_empty_proxy(shape, dtype)
    #return fw.tf_extra_ops.alloc_empty(args, T = dtype)
  elif fw.has_torch():
    return fw.torch.empty(*shape).cuda()
 class lazy_shape:
  def __init__(self, shape):
    self.shape = shape
  def __getitem__(self, key):
    return scalar(self.shape[key])
 def shape(A) :
  if fw.has_tensorflow():
    return A.shape.as_list()
    #return lazy_shape(fw.tensorflow.shape(A))
  elif fw.has_torch():
    return A.shape
  else:
    assert False
 class scalar:
  def __init__(self, x):
    self.id = libtriton.make_scalar_id()
    self.handle = fw.tf_extra_ops.register_scalar(x, id=self.id)
    self.assume_initialized = False
  def set_assume_initialized(self):
    self.assume_initialized = True
  def unset_assume_initialized(self):
    self.assume_initialized = False
  def get_value(self):
    if self.assume_initialized:
      return libtriton.retrieve_scalar(self.id)
    else:
      return self.handle
  def __add__(self, other):
    return self.get_value() + other
  def __radd__(self, other):
    return other + self.get_value()
  def __sub__(self, other):
    return self.get_value() - other
  def __rsub(self, other):
    return other - self.get_value()
  def __mul__(self, other):
    return self.get_value() * other
  def __rmul(self, other):
    return other * self.get_value()
  def __floordiv__(self, other):
    return self.get_value() // other
  def __rfloordiv__(self, other):
    return other // self.get_value()
  def __div__(self, other):
    return self.get_value() / other
  def __rdiv__(self, other):
    return other / self.get_value()
  def __truediv__(self, other):
    self.get_value().__truediv__(other)
  def __rtruediv__(self, other):
    other.__truediv__(self.get_value())
  def __neg__(self):
    return -self.get_value()
 class id_dict:
  # Lazy entry for e.g., tensorflow, when value of benchmark is