[PYTHON] Removed dead code for alloc_empty and register_scalar

This commit is contained in:
Philippe Tillet
2019-10-30 10:37:30 -04:00
parent f4fcaf84df
commit bf3dc63858
7 changed files with 3 additions and 172 deletions

View File

@@ -34,18 +34,6 @@ if(BUILD_PYTHON_MODULE)
# PyBind11 wrapper source file
file(GLOB_RECURSE PYTHON_SRC python/src/bindings.cc)
include_directories(python/src/ ${PYTHON_INCLUDE_DIRS})
if(TF_LIBS)
# extra tensorflow ops (e.g., alloc_empty)
# update directories
link_directories(${TF_LIB_DIRS})
include_directories(${TF_INCLUDE_DIRS})
# get sources
file(GLOB_RECURSE EXTRA_TF_OPS_SRC python/src/tensorflow/*.cc)
add_library(extra_tf_ops SHARED ${EXTRA_TF_OPS_SRC})
# create target
target_link_libraries(extra_tf_ops triton ${TF_LIBS})
target_compile_definitions(extra_tf_ops PRIVATE "-D_GLIBCXX_USE_CXX11_ABI=${TF_ABI}")
endif()
endif()

View File

@@ -24,7 +24,7 @@ cases = []
# Matmul
cases += [[[4, 1024, 1024], [1024, 1024], [4, 1024, 1024], "btc,ck->btk"]]
# Attention
cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
# cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
if mode == MODE.TF:
sess = tf.InteractiveSession()

View File

@@ -1,39 +0,0 @@
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
using namespace tensorflow;
class AllocEmptyOp : public OpKernel {
public:
explicit AllocEmptyOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
std::cout << "executing allocempty" << std::endl;
// fetch input
const Tensor& x = context->input(0);
const int32* x_data = (const int32*)x.tensor_data().data();
// allocate output
Tensor* y = NULL;
int32 x_rank = x.dims();
OP_REQUIRES(context, x_rank == 1, errors::InvalidArgument("Input tensor must be 1D"));
int32 y_rank = x.dim_size(0);
TensorShape y_shapes;
for(size_t i = 0; i < y_rank; i++)
y_shapes.AddDim(x_data[i]);
OP_REQUIRES_OK(context, context->allocate_output(0, y_shapes, &y));
}
};
REGISTER_KERNEL_BUILDER(Name("AllocEmpty").HostMemory("x").Device(DEVICE_CPU).Device(DEVICE_GPU), AllocEmptyOp);
REGISTER_OP("AllocEmpty")
.Input("x: int32")
.Attr("T : {bool, int8, int16, int32, int64, float16, float32, float64}")
.Output("y: T")
.SetShapeFn([](shape_inference::InferenceContext* c) {
shape_inference::ShapeHandle handle;
c->MakeShapeFromShapeTensor(0, &handle);
c->set_output(0, handle);
return Status::OK();
});
;

View File

@@ -1,37 +0,0 @@
#include <map>
#include "tensorflow/core/framework/op_kernel.h"
using namespace tensorflow;
extern std::map<size_t, int64_t> i64scalar_map;
class RegisterScalarOp : public OpKernel {
public:
explicit RegisterScalarOp(OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
}
void Compute(OpKernelContext* context) override {
// fetch input
const Tensor& x = context->input(0);
const int32* x_data = (const int32*)x.tensor_data().data();
const int32 x_rank = x.dims();
OP_REQUIRES(context, x_rank == 0, errors::InvalidArgument("Input must be a scalar"));
i64scalar_map[id_] = *x_data;
context->set_output(0, x);
}
private:
int id_;
};
REGISTER_KERNEL_BUILDER(Name("RegisterScalar")
.HostMemory("x")
.Device(DEVICE_CPU), RegisterScalarOp);
REGISTER_OP("RegisterScalar")
.Input("x: int32")
.Output("y: int32")
.Attr("id: int")
;

View File

@@ -4,8 +4,6 @@ import triton._C.libtriton as libtriton
torch = None
tensorflow = None
tf_extra_ops = None
gen_resource_variable_ops = None
def _import_torch():
global torch
@@ -14,24 +12,13 @@ def _import_torch():
def _import_tensorflow():
global tensorflow
global gen_resource_variable_ops
if tensorflow is None:
import tensorflow
from tensorflow.python.ops import gen_resource_variable_ops
def _import_tf_extra_ops():
global tf_extra_ops
if tf_extra_ops is None:
path = os.path.dirname(libtriton.__file__)
path = os.path.join(path, 'libextra_tf_ops.so')
_import_tensorflow()
tf_extra_ops = tensorflow.load_op_library(path)
def has_tensorflow():
result = 'tensorflow' in sys.modules
if result:
_import_tensorflow()
_import_tf_extra_ops()
return result
def has_torch():

View File

@@ -219,7 +219,7 @@ class kernel:
# retrieve framework op
op_id = self.fw_id[key]
# register grid
libtriton.register_grid(op_id, _make_grid(args))
libtriton.register_grid(op_id, args[-1])
# id for the benchmark result
bench_id = libtriton.make_scalar_id() if bench > 0 else -1
# call framework function

View File

@@ -13,90 +13,22 @@ class tf_empty_proxy:
def empty(shape, dtype):
if fw.has_tensorflow():
shape = [x.handle if isinstance(x, scalar) else fw.tensorflow.constant(x) for x in shape]
shape = [fw.tensorflow.constant(x) for x in shape]
shape = fw.tensorflow.stack(shape)
return tf_empty_proxy(shape, dtype)
#return fw.tf_extra_ops.alloc_empty(args, T = dtype)
elif fw.has_torch():
return fw.torch.empty(*shape).cuda()
class lazy_shape:
def __init__(self, shape):
self.shape = shape
def __getitem__(self, key):
return scalar(self.shape[key])
def shape(A) :
if fw.has_tensorflow():
return A.shape.as_list()
#return lazy_shape(fw.tensorflow.shape(A))
elif fw.has_torch():
return A.shape
else:
assert False
class scalar:
def __init__(self, x):
self.id = libtriton.make_scalar_id()
self.handle = fw.tf_extra_ops.register_scalar(x, id=self.id)
self.assume_initialized = False
def set_assume_initialized(self):
self.assume_initialized = True
def unset_assume_initialized(self):
self.assume_initialized = False
def get_value(self):
if self.assume_initialized:
return libtriton.retrieve_scalar(self.id)
else:
return self.handle
def __add__(self, other):
return self.get_value() + other
def __radd__(self, other):
return other + self.get_value()
def __sub__(self, other):
return self.get_value() - other
def __rsub(self, other):
return other - self.get_value()
def __mul__(self, other):
return self.get_value() * other
def __rmul(self, other):
return other * self.get_value()
def __floordiv__(self, other):
return self.get_value() // other
def __rfloordiv__(self, other):
return other // self.get_value()
def __div__(self, other):
return self.get_value() / other
def __rdiv__(self, other):
return other / self.get_value()
def __truediv__(self, other):
self.get_value().__truediv__(other)
def __rtruediv__(self, other):
other.__truediv__(self.get_value())
def __neg__(self):
return -self.get_value()
class id_dict:
# Lazy entry for e.g., tensorflow, when value of benchmark is