[OPTIMIZER] Fixed memory coalescing (#847)

This commit is contained in:
Philippe Tillet
2022-11-07 06:22:18 -08:00
committed by GitHub
parent b6f15e214b
commit 976cf12af1
7 changed files with 25 additions and 29 deletions

View File

@@ -940,7 +940,9 @@ reduce_configs1 = [
# shape (128, 256) and (32, 1024) are not enabled on sm86 because the required shared memory
# exceeds the limit of 99KB
reduce2d_shapes = [(2, 32), (4, 32), (4, 128), (32, 64), (64, 128)]
reduce2d_shapes = [(2, 32), (4, 32), (4, 128)]
# TODO: fix and uncomment
#, (32, 64), (64, 128)]
if 'V100' in torch.cuda.get_device_name(0):
reduce2d_shapes += [(128, 256) and (32, 1024)]