[RUNTIME] Lower-level interface for executing functions

This commit is contained in:
Philippe Tillet
2020-08-11 20:10:39 -04:00
committed by Philippe Tillet
parent f4f216b88a
commit acff1b5e05
25 changed files with 219 additions and 916 deletions

View File

@@ -8,7 +8,9 @@ class _conv(torch.autograd.Function):
TYPE *C __noalias __aligned(16),
float alpha,
// equivalent matmul
int M, int N, int K,
int M __retune,
int N __retune,
int K __retune,
// convolution properties
int pad_h, int pad_w, int stride_h, int stride_w,
// pointer increment
@@ -197,4 +199,4 @@ c = conv(a, b, pad, stride, time)
print((cc - c).abs().max() / max(cc.max(), c.max()))
print(time[0], 2*Z*H*W*CI*CO*R*S/(time[0]*1e-9)*1e-12)
#zc = torch.matmul(a,b)
#zc_ = dot(a,b)
#zc_ = dot(a,b)

View File

@@ -4,7 +4,9 @@ import triton
class _copy(torch.autograd.Function):
src = """
__global__ void copy(TYPE * X, TYPE * Y,
int M, int N, int ldx __multipleof(8)) {
int M __retune,
int N __retune,
int ldx __multipleof(8)) {
// extract program ID
int pidm = get_program_id(0); //(1)
int pidn = get_program_id(1); //(2)

View File

@@ -7,7 +7,9 @@ class _dot(torch.autograd.Function):
TYPE *B __noalias __readonly __aligned(16),
TYPE *C __noalias __aligned(16),
float alpha,
int M, int N, int K,
int M __retune,
int N __retune,
int K __retune,
int lda __multipleof(8),
int ldb __multipleof(8),
int ldc __multipleof(8)) {
@@ -128,4 +130,4 @@ b = torch.rand((K, N)).cuda()
#zc = torch.matmul(a,b)
zc_ = dot(a,b)
#print(torch.allclose(zc, zc_))
#print(torch.allclose(zc, zc_))

View File

@@ -4,7 +4,9 @@ import triton
class _transpose(torch.autograd.Function):
src = """
__global__ void transpose(TYPE * X, TYPE * Y,
int M, int N, int ldx __multipleof(8), int ldy __multipleof(8)) {
int M __retune,
int N __retune,
int ldx __multipleof(8), int ldy __multipleof(8)) {
// extract program ID
int pidm = get_program_id(0); //(1)
int pidn = get_program_id(1); //(2)