[PYTHON] Removed dead code for alloc_empty and register_scalar

2019-10-30 10:37:30 -04:00
parent f4fcaf84df
commit bf3dc63858
7 changed files with 3 additions and 172 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,18 +34,6 @@ if(BUILD_PYTHON_MODULE)
    # PyBind11 wrapper source file
    file(GLOB_RECURSE PYTHON_SRC python/src/bindings.cc)
    include_directories(python/src/ ${PYTHON_INCLUDE_DIRS})
-    if(TF_LIBS)
-        # extra tensorflow ops (e.g., alloc_empty)
-        # update directories
-        link_directories(${TF_LIB_DIRS})
-        include_directories(${TF_INCLUDE_DIRS})
-        # get sources
-        file(GLOB_RECURSE EXTRA_TF_OPS_SRC python/src/tensorflow/*.cc)
-        add_library(extra_tf_ops SHARED ${EXTRA_TF_OPS_SRC})
-        # create target
-        target_link_libraries(extra_tf_ops triton ${TF_LIBS})
-        target_compile_definitions(extra_tf_ops PRIVATE "-D_GLIBCXX_USE_CXX11_ABI=${TF_ABI}")
-    endif()
 endif()


--- a/python/examples/einsum.py
+++ b/python/examples/einsum.py
@@ -24,7 +24,7 @@ cases = []
 # Matmul
 cases += [[[4, 1024, 1024], [1024, 1024], [4, 1024, 1024], "btc,ck->btk"]]
 # Attention
-cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
+# cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]

 if mode == MODE.TF:
    sess = tf.InteractiveSession()
--- a/python/src/tensorflow/alloc_empty.cc
+++ b/python/src/tensorflow/alloc_empty.cc
@@ -1,39 +0,0 @@
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-using namespace tensorflow;
-
-class AllocEmptyOp : public OpKernel {
- public:
-  explicit AllocEmptyOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-     std::cout << "executing allocempty" << std::endl;
-    // fetch input
-    const Tensor& x = context->input(0);
-    const int32* x_data = (const int32*)x.tensor_data().data();
-    // allocate output
-    Tensor* y = NULL;
-    int32 x_rank = x.dims();
-    OP_REQUIRES(context, x_rank == 1, errors::InvalidArgument("Input tensor must be 1D"));
-    int32 y_rank = x.dim_size(0);
-    TensorShape y_shapes;
-    for(size_t i = 0; i < y_rank; i++)
-      y_shapes.AddDim(x_data[i]);
-    OP_REQUIRES_OK(context, context->allocate_output(0, y_shapes, &y));
-  }
-};
-
-
-REGISTER_KERNEL_BUILDER(Name("AllocEmpty").HostMemory("x").Device(DEVICE_CPU).Device(DEVICE_GPU), AllocEmptyOp);
-REGISTER_OP("AllocEmpty")
-  .Input("x: int32")
-  .Attr("T : {bool, int8, int16, int32, int64, float16, float32, float64}")
-  .Output("y: T")
-  .SetShapeFn([](shape_inference::InferenceContext* c) {
-    shape_inference::ShapeHandle handle;
-    c->MakeShapeFromShapeTensor(0, &handle);
-    c->set_output(0, handle);
-    return Status::OK();
-  });
-;
--- a/python/src/tensorflow/register_scalar.cc
+++ b/python/src/tensorflow/register_scalar.cc
@@ -1,37 +0,0 @@
-#include <map>
-#include "tensorflow/core/framework/op_kernel.h"
-
-using namespace tensorflow;
-
-extern std::map<size_t, int64_t> i64scalar_map;
-
-class RegisterScalarOp : public OpKernel {
-public:
-  explicit RegisterScalarOp(OpKernelConstruction* context)
-      : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // fetch input
-    const Tensor& x = context->input(0);
-    const int32* x_data = (const int32*)x.tensor_data().data();
-    const int32 x_rank = x.dims();
-    OP_REQUIRES(context, x_rank == 0, errors::InvalidArgument("Input must be a scalar"));
-    i64scalar_map[id_] = *x_data;
-    context->set_output(0, x);
-  }
-
-private:
-  int id_;
-};
-
-
-REGISTER_KERNEL_BUILDER(Name("RegisterScalar")
-                        .HostMemory("x")
-                        .Device(DEVICE_CPU), RegisterScalarOp);
-REGISTER_OP("RegisterScalar")
-  .Input("x: int32")
-  .Output("y: int32")
-  .Attr("id: int")
-;
--- a/python/triton/frameworks.py
+++ b/python/triton/frameworks.py
@@ -4,8 +4,6 @@ import triton._C.libtriton as libtriton

 torch = None
 tensorflow = None
-tf_extra_ops = None
-gen_resource_variable_ops = None

 def _import_torch():
  global torch
@@ -14,24 +12,13 @@ def _import_torch():

 def _import_tensorflow():
  global tensorflow
-  global gen_resource_variable_ops
  if tensorflow is None:
    import tensorflow
-    from tensorflow.python.ops import gen_resource_variable_ops
-
-def _import_tf_extra_ops():
-  global tf_extra_ops
-  if tf_extra_ops is None:
-    path = os.path.dirname(libtriton.__file__)
-    path = os.path.join(path, 'libextra_tf_ops.so')
-    _import_tensorflow()
-    tf_extra_ops = tensorflow.load_op_library(path)

 def has_tensorflow():
  result = 'tensorflow' in sys.modules
  if result:
    _import_tensorflow()
-    _import_tf_extra_ops()
  return result

 def has_torch():
--- a/python/triton/kernel.py
+++ b/python/triton/kernel.py
@@ -219,7 +219,7 @@ class kernel:
    # retrieve framework op
    op_id = self.fw_id[key]
    # register grid
-    libtriton.register_grid(op_id, _make_grid(args))
+    libtriton.register_grid(op_id, args[-1])
    # id for the benchmark result
    bench_id = libtriton.make_scalar_id() if bench > 0 else -1
    # call framework function
--- a/python/triton/utils.py
+++ b/python/triton/utils.py
@@ -13,90 +13,22 @@ class tf_empty_proxy:

 def empty(shape, dtype):
  if fw.has_tensorflow():
-    shape = [x.handle if isinstance(x, scalar) else fw.tensorflow.constant(x) for x in shape]
+    shape = [fw.tensorflow.constant(x) for x in shape]
    shape = fw.tensorflow.stack(shape)
    return tf_empty_proxy(shape, dtype)
    #return fw.tf_extra_ops.alloc_empty(args, T = dtype)
  elif fw.has_torch():
    return fw.torch.empty(*shape).cuda()

-class lazy_shape:
-
-  def __init__(self, shape):
-    self.shape = shape
-  
-  def __getitem__(self, key):
-    return scalar(self.shape[key])
-
 def shape(A) :
  if fw.has_tensorflow():
    return A.shape.as_list()
-    #return lazy_shape(fw.tensorflow.shape(A))
  elif fw.has_torch():
    return A.shape
  else:
    assert False


-class scalar:
-  
-  def __init__(self, x):
-    self.id = libtriton.make_scalar_id()
-    self.handle = fw.tf_extra_ops.register_scalar(x, id=self.id)
-    self.assume_initialized = False
-  
-  def set_assume_initialized(self):
-    self.assume_initialized = True
-  
-  def unset_assume_initialized(self):
-    self.assume_initialized = False
-
-  def get_value(self):
-    if self.assume_initialized:
-      return libtriton.retrieve_scalar(self.id)
-    else:
-      return self.handle
-
-  def __add__(self, other):
-    return self.get_value() + other
-
-  def __radd__(self, other):
-    return other + self.get_value()
-
-  def __sub__(self, other):
-    return self.get_value() - other
-  
-  def __rsub(self, other):
-    return other - self.get_value()
-  
-  def __mul__(self, other):
-    return self.get_value() * other
-  
-  def __rmul(self, other):
-    return other * self.get_value()
-
-  def __floordiv__(self, other):
-    return self.get_value() // other
-  
-  def __rfloordiv__(self, other):
-    return other // self.get_value()
-
-  def __div__(self, other):
-    return self.get_value() / other
-
-  def __rdiv__(self, other):
-    return other / self.get_value()
-
-  def __truediv__(self, other):
-    self.get_value().__truediv__(other)
-  
-  def __rtruediv__(self, other):
-    other.__truediv__(self.get_value())
-  
-  def __neg__(self):
-    return -self.get_value()
-
-
 class id_dict:

  # Lazy entry for e.g., tensorflow, when value of benchmark is