[RUNTIME] Lower-level interface for executing functions
This commit is contained in:
committed by
Philippe Tillet
parent
f4f216b88a
commit
acff1b5e05
@@ -8,7 +8,9 @@ class _conv(torch.autograd.Function):
|
||||
TYPE *C __noalias __aligned(16),
|
||||
float alpha,
|
||||
// equivalent matmul
|
||||
int M, int N, int K,
|
||||
int M __retune,
|
||||
int N __retune,
|
||||
int K __retune,
|
||||
// convolution properties
|
||||
int pad_h, int pad_w, int stride_h, int stride_w,
|
||||
// pointer increment
|
||||
@@ -197,4 +199,4 @@ c = conv(a, b, pad, stride, time)
|
||||
print((cc - c).abs().max() / max(cc.max(), c.max()))
|
||||
print(time[0], 2*Z*H*W*CI*CO*R*S/(time[0]*1e-9)*1e-12)
|
||||
#zc = torch.matmul(a,b)
|
||||
#zc_ = dot(a,b)
|
||||
#zc_ = dot(a,b)
|
||||
|
@@ -4,7 +4,9 @@ import triton
|
||||
class _copy(torch.autograd.Function):
|
||||
src = """
|
||||
__global__ void copy(TYPE * X, TYPE * Y,
|
||||
int M, int N, int ldx __multipleof(8)) {
|
||||
int M __retune,
|
||||
int N __retune,
|
||||
int ldx __multipleof(8)) {
|
||||
// extract program ID
|
||||
int pidm = get_program_id(0); //(1)
|
||||
int pidn = get_program_id(1); //(2)
|
||||
|
@@ -7,7 +7,9 @@ class _dot(torch.autograd.Function):
|
||||
TYPE *B __noalias __readonly __aligned(16),
|
||||
TYPE *C __noalias __aligned(16),
|
||||
float alpha,
|
||||
int M, int N, int K,
|
||||
int M __retune,
|
||||
int N __retune,
|
||||
int K __retune,
|
||||
int lda __multipleof(8),
|
||||
int ldb __multipleof(8),
|
||||
int ldc __multipleof(8)) {
|
||||
@@ -128,4 +130,4 @@ b = torch.rand((K, N)).cuda()
|
||||
#zc = torch.matmul(a,b)
|
||||
zc_ = dot(a,b)
|
||||
|
||||
#print(torch.allclose(zc, zc_))
|
||||
#print(torch.allclose(zc, zc_))
|
||||
|
@@ -4,7 +4,9 @@ import triton
|
||||
class _transpose(torch.autograd.Function):
|
||||
src = """
|
||||
__global__ void transpose(TYPE * X, TYPE * Y,
|
||||
int M, int N, int ldx __multipleof(8), int ldy __multipleof(8)) {
|
||||
int M __retune,
|
||||
int N __retune,
|
||||
int ldx __multipleof(8), int ldy __multipleof(8)) {
|
||||
// extract program ID
|
||||
int pidm = get_program_id(0); //(1)
|
||||
int pidn = get_program_id(1); //(2)
|
||||
|
Reference in New Issue
Block a user