[BUILD] Added automatic nightly build releases to pip in CI; removed build-time dependence on LLVM and PyTorch (#77)

Recently there has been more and more report about installation issues:

    - Installing Triton before upgrading pytorch can create some issues because Triton uses some torch headers

    - llvm-10-dev not available on some platform; llvm-11-dev not available on e.g. Ubuntu.
    absence of nightly builds

This PR should fix all these issues. Some CMake tricks are used to download and install llvm at build time. Triton Python bindings were modified to remove dependence on pytorch ops. Midnight CI job added to generate binary wheels for all Triton version and update them on pypi's new triton-nightly project.

This PR will also make it very easy to use LLVM forks in the future for whatever needs we have.
This commit is contained in:
Philippe Tillet
2021-03-22 20:03:37 -04:00
committed by Philippe Tillet
parent 3ad0a4d7be
commit 2f80a98776
22 changed files with 491 additions and 314 deletions

View File

@@ -2,12 +2,15 @@ import torch
import triton
import pytest
@pytest.mark.parametrize(
"MODE, TRANS_A, TRANS_B, BLOCK, DTYPE",
[(mode, at, bt, block, dtype) for dtype in ["float16", "float32"] for mode in ["sdd", "dsd", "dds"]
for at in [False, True] for bt in [False, True] for block in [16, 32, 64]],
[
(mode, at, bt, block, dtype) for dtype in ["float16", "float32"] for mode in ["sdd", "dsd", "dds"]
for at in [False, True] for bt in [False, True] for block in [16, 32, 64]
],
)
def test_matmul(MODE, TRANS_A, TRANS_B, BLOCK, DTYPE, Z=3, H=2, M=128, N=256, K=384):
def test_matmul(MODE, TRANS_A, TRANS_B, BLOCK, DTYPE, Z=3, H=2, M=512, N=384, K=256):
DTYPE = {"float16": torch.float16, "float32": torch.float32}[DTYPE]
# set seed
torch.random.manual_seed(0)
@@ -36,6 +39,7 @@ def test_matmul(MODE, TRANS_A, TRANS_B, BLOCK, DTYPE, Z=3, H=2, M=128, N=256, K=
# compare
assert triton.testing.allclose(rc, tc)
@pytest.mark.parametrize(
"BLOCK, WIDTH",
[(block, width) for block in [32] for width in [256, 576, 1024, 1792]],
@@ -76,6 +80,7 @@ def test_softmax(BLOCK, WIDTH, DTYPE=torch.float16):
# compare
assert triton.testing.allclose(ry, ty)
def test_attention_fwd_bwd(
input_scale=1.0,
tol=2e-2,
@@ -88,9 +93,7 @@ def test_attention_fwd_bwd(
):
# inputs
qkv_shape = (batch_size, n_heads, n_ctx, 64)
qkvs = [
torch.nn.Parameter(input_scale * torch.randn(qkv_shape), requires_grad=True).to(dtype).cuda() for _ in range(3)
]
qkvs = [torch.nn.Parameter(input_scale * torch.randn(qkv_shape), requires_grad=True).to(dtype).cuda() for _ in range(3)]
attn_mask = torch.tril(
torch.ones(
[n_ctx, n_ctx],
@@ -134,6 +137,7 @@ def test_attention_fwd_bwd(
for g1, g2 in zip(grads, torch_grads):
torch.testing.assert_allclose(g1, g2, rtol=tol, atol=tol)
def triton_attention(
layout,
block: int,