[PYTHON] bugfix in bench_cross_entropy
This commit is contained in:
@@ -30,7 +30,7 @@ def bench_op(M, N, dtype, mode, provider):
|
|||||||
if mode == 'backward':
|
if mode == 'backward':
|
||||||
y = op(x, idx)
|
y = op(x, idx)
|
||||||
dy = torch.randn_like(y)
|
dy = torch.randn_like(y)
|
||||||
ms = triton.testing.do_bench(lambda: y.backward(dy, retain_graph=True))
|
ms = triton.testing.do_bench(lambda: y.backward(dy, retain_graph=True), grad_to_none=x)
|
||||||
return num_gb / ms * 1e3
|
return num_gb / ms * 1e3
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@@ -1,24 +1,17 @@
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def sparsify_tensor(x, mask, block):
|
def sparsify_tensor(x, mask, block):
|
||||||
ret = torch.empty(
|
ret = torch.empty((x.size(0), mask.sum(), block, block), dtype=x.dtype, device=x.device)
|
||||||
(x.size(0), mask.sum(), block, block), dtype=x.dtype, device=x.device
|
|
||||||
)
|
|
||||||
for idx, (h, i, j) in enumerate(zip(*mask.nonzero(as_tuple=True))):
|
for idx, (h, i, j) in enumerate(zip(*mask.nonzero(as_tuple=True))):
|
||||||
ret[:, idx, :, :] = x[
|
ret[:, idx, :, :] = x[:, h, i * block:(i + 1) * block, j * block:(j + 1) * block]
|
||||||
:, h, i * block : (i + 1) * block, j * block : (j + 1) * block
|
|
||||||
]
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def mask_tensor(x, mask, block, value=0):
|
def mask_tensor(x, mask, block, value=0):
|
||||||
ret = x.clone()
|
ret = x.clone()
|
||||||
for h, i, j in zip(*(mask == 0).nonzero(as_tuple=True)):
|
for h, i, j in zip(*(mask == 0).nonzero(as_tuple=True)):
|
||||||
ret[:, h, i * block : (i + 1) * block, j * block : (j + 1) * block] = value
|
ret[:, h, i * block:(i + 1) * block, j * block:(j + 1) * block] = value
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def allclose(x, y):
|
def allclose(x, y):
|
||||||
assert x.dtype == y.dtype
|
assert x.dtype == y.dtype
|
||||||
diff = abs(x - y)
|
diff = abs(x - y)
|
||||||
@@ -28,8 +21,7 @@ def allclose(x, y):
|
|||||||
err = torch.max(diff) / torch.max(x_max, y_max)
|
err = torch.max(diff) / torch.max(x_max, y_max)
|
||||||
return err < tol
|
return err < tol
|
||||||
|
|
||||||
|
def do_bench(fn, flops=0, warmup=10, rep=50, grad_to_none=None):
|
||||||
def do_bench(fn, flops=0, warmup=10, rep=50):
|
|
||||||
start_event = torch.cuda.Event(enable_timing=True)
|
start_event = torch.cuda.Event(enable_timing=True)
|
||||||
end_event = torch.cuda.Event(enable_timing=True)
|
end_event = torch.cuda.Event(enable_timing=True)
|
||||||
ret = fn()
|
ret = fn()
|
||||||
@@ -38,17 +30,16 @@ def do_bench(fn, flops=0, warmup=10, rep=50):
|
|||||||
torch.cuda.synchronize()
|
torch.cuda.synchronize()
|
||||||
start_event.record()
|
start_event.record()
|
||||||
for i in range(rep):
|
for i in range(rep):
|
||||||
|
if grad_to_none is not None:
|
||||||
|
grad_to_none.grad = None
|
||||||
fn()
|
fn()
|
||||||
end_event.record()
|
end_event.record()
|
||||||
torch.cuda.synchronize()
|
torch.cuda.synchronize()
|
||||||
time_ms = start_event.elapsed_time(end_event) / rep
|
time_ms = start_event.elapsed_time(end_event) / rep
|
||||||
return time_ms
|
return time_ms
|
||||||
|
|
||||||
|
|
||||||
class Benchmark:
|
class Benchmark:
|
||||||
def __init__(
|
def __init__(self, x_names, x_vals, y_name, y_vals, y_lines, ylabel, loglog, plot_name, args):
|
||||||
self, x_names, x_vals, y_name, y_vals, y_lines, ylabel, loglog, plot_name, args
|
|
||||||
):
|
|
||||||
self.x_names = x_names
|
self.x_names = x_names
|
||||||
self.x_vals = x_vals
|
self.x_vals = x_vals
|
||||||
self.y_name = y_name
|
self.y_name = y_name
|
||||||
@@ -59,7 +50,6 @@ class Benchmark:
|
|||||||
self.plot_name = plot_name
|
self.plot_name = plot_name
|
||||||
self.args = args
|
self.args = args
|
||||||
|
|
||||||
|
|
||||||
class Mark:
|
class Mark:
|
||||||
def __init__(self, fn, benchmarks):
|
def __init__(self, fn, benchmarks):
|
||||||
self.fn = fn
|
self.fn = fn
|
||||||
@@ -73,10 +63,7 @@ class Mark:
|
|||||||
df = pd.DataFrame(columns=[bench.x_names[0]] + bench.y_lines)
|
df = pd.DataFrame(columns=[bench.x_names[0]] + bench.y_lines)
|
||||||
for x in bench.x_vals:
|
for x in bench.x_vals:
|
||||||
x_args = {x_name: x for x_name in bench.x_names}
|
x_args = {x_name: x for x_name in bench.x_names}
|
||||||
row = [
|
row = [self.fn(**x_args, **{bench.y_name: y}, **bench.args) for y in bench.y_vals]
|
||||||
self.fn(**x_args, **{bench.y_name: y}, **bench.args)
|
|
||||||
for y in bench.y_vals
|
|
||||||
]
|
|
||||||
df.loc[len(df)] = [x] + row
|
df.loc[len(df)] = [x] + row
|
||||||
if with_plot and bench.plot_name:
|
if with_plot and bench.plot_name:
|
||||||
xlabel = " = ".join(bench.x_names)
|
xlabel = " = ".join(bench.x_names)
|
||||||
@@ -93,7 +80,6 @@ class Mark:
|
|||||||
for bench in self.benchmarks:
|
for bench in self.benchmarks:
|
||||||
self._run(bench, result_path, with_plot)
|
self._run(bench, result_path, with_plot)
|
||||||
|
|
||||||
|
|
||||||
def perf_report(benchmarks):
|
def perf_report(benchmarks):
|
||||||
wrapper = lambda fn: Mark(fn, benchmarks)
|
wrapper = lambda fn: Mark(fn, benchmarks)
|
||||||
return wrapper
|
return wrapper
|
||||||
|
Reference in New Issue
Block a user