[RUNTIME] Lower-level interface for executing functions

This commit is contained in:
Philippe Tillet
2020-08-11 20:10:39 -04:00
committed by Philippe Tillet
parent f4f216b88a
commit acff1b5e05
25 changed files with 219 additions and 916 deletions

View File

@@ -4,7 +4,9 @@ import triton
class _transpose(torch.autograd.Function):
src = """
__global__ void transpose(TYPE * X, TYPE * Y,
int M, int N, int ldx __multipleof(8), int ldy __multipleof(8)) {
int M __retune,
int N __retune,
int ldx __multipleof(8), int ldy __multipleof(8)) {
// extract program ID
int pidm = get_program_id(0); //(1)
int pidn = get_program_id(1); //(2)