[PYTHON] Removed dead code for alloc_empty and register_scalar
This commit is contained in:
@@ -34,18 +34,6 @@ if(BUILD_PYTHON_MODULE)
|
||||
# PyBind11 wrapper source file
|
||||
file(GLOB_RECURSE PYTHON_SRC python/src/bindings.cc)
|
||||
include_directories(python/src/ ${PYTHON_INCLUDE_DIRS})
|
||||
if(TF_LIBS)
|
||||
# extra tensorflow ops (e.g., alloc_empty)
|
||||
# update directories
|
||||
link_directories(${TF_LIB_DIRS})
|
||||
include_directories(${TF_INCLUDE_DIRS})
|
||||
# get sources
|
||||
file(GLOB_RECURSE EXTRA_TF_OPS_SRC python/src/tensorflow/*.cc)
|
||||
add_library(extra_tf_ops SHARED ${EXTRA_TF_OPS_SRC})
|
||||
# create target
|
||||
target_link_libraries(extra_tf_ops triton ${TF_LIBS})
|
||||
target_compile_definitions(extra_tf_ops PRIVATE "-D_GLIBCXX_USE_CXX11_ABI=${TF_ABI}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
@@ -24,7 +24,7 @@ cases = []
|
||||
# Matmul
|
||||
cases += [[[4, 1024, 1024], [1024, 1024], [4, 1024, 1024], "btc,ck->btk"]]
|
||||
# Attention
|
||||
cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
|
||||
# cases += [[[4, 256, 8, 2, 64], [8, 2, 512, 64], [4, 256, 8, 2, 512], "bchak,hank->bchan"]]
|
||||
|
||||
if mode == MODE.TF:
|
||||
sess = tf.InteractiveSession()
|
||||
|
@@ -1,39 +0,0 @@
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
class AllocEmptyOp : public OpKernel {
|
||||
public:
|
||||
explicit AllocEmptyOp(OpKernelConstruction* context) : OpKernel(context) {}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
std::cout << "executing allocempty" << std::endl;
|
||||
// fetch input
|
||||
const Tensor& x = context->input(0);
|
||||
const int32* x_data = (const int32*)x.tensor_data().data();
|
||||
// allocate output
|
||||
Tensor* y = NULL;
|
||||
int32 x_rank = x.dims();
|
||||
OP_REQUIRES(context, x_rank == 1, errors::InvalidArgument("Input tensor must be 1D"));
|
||||
int32 y_rank = x.dim_size(0);
|
||||
TensorShape y_shapes;
|
||||
for(size_t i = 0; i < y_rank; i++)
|
||||
y_shapes.AddDim(x_data[i]);
|
||||
OP_REQUIRES_OK(context, context->allocate_output(0, y_shapes, &y));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("AllocEmpty").HostMemory("x").Device(DEVICE_CPU).Device(DEVICE_GPU), AllocEmptyOp);
|
||||
REGISTER_OP("AllocEmpty")
|
||||
.Input("x: int32")
|
||||
.Attr("T : {bool, int8, int16, int32, int64, float16, float32, float64}")
|
||||
.Output("y: T")
|
||||
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||
shape_inference::ShapeHandle handle;
|
||||
c->MakeShapeFromShapeTensor(0, &handle);
|
||||
c->set_output(0, handle);
|
||||
return Status::OK();
|
||||
});
|
||||
;
|
@@ -1,37 +0,0 @@
|
||||
#include <map>
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
|
||||
using namespace tensorflow;
|
||||
|
||||
extern std::map<size_t, int64_t> i64scalar_map;
|
||||
|
||||
class RegisterScalarOp : public OpKernel {
|
||||
public:
|
||||
explicit RegisterScalarOp(OpKernelConstruction* context)
|
||||
: OpKernel(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// fetch input
|
||||
const Tensor& x = context->input(0);
|
||||
const int32* x_data = (const int32*)x.tensor_data().data();
|
||||
const int32 x_rank = x.dims();
|
||||
OP_REQUIRES(context, x_rank == 0, errors::InvalidArgument("Input must be a scalar"));
|
||||
i64scalar_map[id_] = *x_data;
|
||||
context->set_output(0, x);
|
||||
}
|
||||
|
||||
private:
|
||||
int id_;
|
||||
};
|
||||
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("RegisterScalar")
|
||||
.HostMemory("x")
|
||||
.Device(DEVICE_CPU), RegisterScalarOp);
|
||||
REGISTER_OP("RegisterScalar")
|
||||
.Input("x: int32")
|
||||
.Output("y: int32")
|
||||
.Attr("id: int")
|
||||
;
|
@@ -4,8 +4,6 @@ import triton._C.libtriton as libtriton
|
||||
|
||||
torch = None
|
||||
tensorflow = None
|
||||
tf_extra_ops = None
|
||||
gen_resource_variable_ops = None
|
||||
|
||||
def _import_torch():
|
||||
global torch
|
||||
@@ -14,24 +12,13 @@ def _import_torch():
|
||||
|
||||
def _import_tensorflow():
|
||||
global tensorflow
|
||||
global gen_resource_variable_ops
|
||||
if tensorflow is None:
|
||||
import tensorflow
|
||||
from tensorflow.python.ops import gen_resource_variable_ops
|
||||
|
||||
def _import_tf_extra_ops():
|
||||
global tf_extra_ops
|
||||
if tf_extra_ops is None:
|
||||
path = os.path.dirname(libtriton.__file__)
|
||||
path = os.path.join(path, 'libextra_tf_ops.so')
|
||||
_import_tensorflow()
|
||||
tf_extra_ops = tensorflow.load_op_library(path)
|
||||
|
||||
def has_tensorflow():
|
||||
result = 'tensorflow' in sys.modules
|
||||
if result:
|
||||
_import_tensorflow()
|
||||
_import_tf_extra_ops()
|
||||
return result
|
||||
|
||||
def has_torch():
|
||||
|
@@ -219,7 +219,7 @@ class kernel:
|
||||
# retrieve framework op
|
||||
op_id = self.fw_id[key]
|
||||
# register grid
|
||||
libtriton.register_grid(op_id, _make_grid(args))
|
||||
libtriton.register_grid(op_id, args[-1])
|
||||
# id for the benchmark result
|
||||
bench_id = libtriton.make_scalar_id() if bench > 0 else -1
|
||||
# call framework function
|
||||
|
@@ -13,90 +13,22 @@ class tf_empty_proxy:
|
||||
|
||||
def empty(shape, dtype):
|
||||
if fw.has_tensorflow():
|
||||
shape = [x.handle if isinstance(x, scalar) else fw.tensorflow.constant(x) for x in shape]
|
||||
shape = [fw.tensorflow.constant(x) for x in shape]
|
||||
shape = fw.tensorflow.stack(shape)
|
||||
return tf_empty_proxy(shape, dtype)
|
||||
#return fw.tf_extra_ops.alloc_empty(args, T = dtype)
|
||||
elif fw.has_torch():
|
||||
return fw.torch.empty(*shape).cuda()
|
||||
|
||||
class lazy_shape:
|
||||
|
||||
def __init__(self, shape):
|
||||
self.shape = shape
|
||||
|
||||
def __getitem__(self, key):
|
||||
return scalar(self.shape[key])
|
||||
|
||||
def shape(A) :
|
||||
if fw.has_tensorflow():
|
||||
return A.shape.as_list()
|
||||
#return lazy_shape(fw.tensorflow.shape(A))
|
||||
elif fw.has_torch():
|
||||
return A.shape
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class scalar:
|
||||
|
||||
def __init__(self, x):
|
||||
self.id = libtriton.make_scalar_id()
|
||||
self.handle = fw.tf_extra_ops.register_scalar(x, id=self.id)
|
||||
self.assume_initialized = False
|
||||
|
||||
def set_assume_initialized(self):
|
||||
self.assume_initialized = True
|
||||
|
||||
def unset_assume_initialized(self):
|
||||
self.assume_initialized = False
|
||||
|
||||
def get_value(self):
|
||||
if self.assume_initialized:
|
||||
return libtriton.retrieve_scalar(self.id)
|
||||
else:
|
||||
return self.handle
|
||||
|
||||
def __add__(self, other):
|
||||
return self.get_value() + other
|
||||
|
||||
def __radd__(self, other):
|
||||
return other + self.get_value()
|
||||
|
||||
def __sub__(self, other):
|
||||
return self.get_value() - other
|
||||
|
||||
def __rsub(self, other):
|
||||
return other - self.get_value()
|
||||
|
||||
def __mul__(self, other):
|
||||
return self.get_value() * other
|
||||
|
||||
def __rmul(self, other):
|
||||
return other * self.get_value()
|
||||
|
||||
def __floordiv__(self, other):
|
||||
return self.get_value() // other
|
||||
|
||||
def __rfloordiv__(self, other):
|
||||
return other // self.get_value()
|
||||
|
||||
def __div__(self, other):
|
||||
return self.get_value() / other
|
||||
|
||||
def __rdiv__(self, other):
|
||||
return other / self.get_value()
|
||||
|
||||
def __truediv__(self, other):
|
||||
self.get_value().__truediv__(other)
|
||||
|
||||
def __rtruediv__(self, other):
|
||||
other.__truediv__(self.get_value())
|
||||
|
||||
def __neg__(self):
|
||||
return -self.get_value()
|
||||
|
||||
|
||||
class id_dict:
|
||||
|
||||
# Lazy entry for e.g., tensorflow, when value of benchmark is
|
||||
|
Reference in New Issue
Block a user