Add bf16/fp16/fp64 support for ty_to_cpp (#800)

In ```torch._inductor```, we [convert 0d CPU tensor to scalar during triton codegen](https://github.com/pytorch/pytorch/pull/87329), so need add missing triton support for bf16/fp16/fp64.
2022-10-24 19:41:25 -07:00
parent db3aa1d1fb
commit 5ca1ed0101
1 changed files with 5 additions and 0 deletions
--- a/python/triton/compiler.py
+++ b/python/triton/compiler.py
@@ -913,7 +913,10 @@ def ty_to_cpp(ty):
        "i64": "int64_t",
        "u32": "uint32_t",
        "u64": "uint64_t",
+        "fp16": "float",
+        "bf16": "float",
        "fp32": "float",
+        "fp64": "double",
    }[ty]


@@ -943,6 +946,8 @@ def generate_launcher(identifier, constants, signature):
            'i64': 'int64_t',
            'u32': 'uint32_t',
            'u64': 'uint64_t',
+            'fp16': 'float',
+            'bf16': 'float',
            'fp32': 'float',
            'fp64': 'double',
        }[ty]