[RUNTIME] Lower-level interface for executing functions

This commit is contained in:
Philippe Tillet
2020-08-11 20:10:39 -04:00
committed by Philippe Tillet
parent f4f216b88a
commit acff1b5e05
25 changed files with 219 additions and 916 deletions

View File

@@ -7,7 +7,9 @@ class _dot(torch.autograd.Function):
TYPE *B __noalias __readonly __aligned(16),
TYPE *C __noalias __aligned(16),
float alpha,
int M, int N, int K,
int M __retune,
int N __retune,
int K __retune,
int lda __multipleof(8),
int ldb __multipleof(8),
int ldc __multipleof(8)) {
@@ -128,4 +130,4 @@ b = torch.rand((K, N)).cuda()
#zc = torch.matmul(a,b)
zc_ = dot(a,b)
#print(torch.allclose(zc, zc_))
#print(torch.allclose(zc, zc_))