Files
triton/python/triton/language/libdevice.py
Shintaro Iwasaki 3c635449e5 [Triton] Support math and libdevice ops (#91)
This PR adds basic math ops by using `MathDialect` and `libdevice` ops by using `extern_elementwise`. This is needed to compile some tutorial code (e.g., `softmax`). This PR implements only interface till PTX (so from frontend to TritonGPU-MLIR) 
- Currently till TritonGPU. It cannot be lowered to PTX now.
- No special optimizations (e.g., constant folding etc) are applied.
  - 14.x does not define folders for many operators for math ops, but 15.x seems to increase its coverage: https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0-rc3/mlir/include/mlir/Dialect/Math/IR/MathOps.td
  - No constant folding etc for `libdevice` ops.

```py
import triton
import triton.language as tl
import sys

@triton.jit
def add_kernel(
    x_ptr,
    y_ptr,
    BLOCK_SIZE: tl.constexpr,
):
    offsets = tl.arange(0, BLOCK_SIZE)
    x = tl.load(x_ptr + offsets)
    x = tl.sin(x)
    output = tl.libdevice.sin(x)
    output = tl.libdevice.fdiv_rn(output, output)
    output = tl.libdevice.fmaf_rd(output, output, output)
    tl.store(y_ptr + offsets, output)


if __name__ == "__main__" and len(sys.argv) >= 2:
    signature = "*fp32,*fp32"
    constants = {'BLOCK_SIZE': 1024}
    output = triton.compile(add_kernel, signature, device=0, constants=constants, output="ttgir")
    print(output)
```
->
```llvm
#blocked = #triton_gpu.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
module attributes {"triton_gpu.num-warps" = 4 : i32} {
  func @add_kernel__Pfp32_Pfp32__2c1024(%arg0: !tt.ptr<f32>, %arg1: !tt.ptr<f32>) {
    %0 = tt.make_range {end = 1024 : i32, start = 0 : i32} : tensor<1024xi32, #blocked>
    %1 = tt.splat %arg0 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked>
    %2 = tt.getelementptr %1, %0 : tensor<1024x!tt.ptr<f32>, #blocked>
    %3 = tt.load %2 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<1024xf32, #blocked>
    %4 = math.sin %3 : tensor<1024xf32, #blocked>
    %5 = tt.ext_elemwise %4 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_sinf"} : tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
    %6 = tt.ext_elemwise %5, %5 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fdiv_rn"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
    %7 = tt.ext_elemwise %6, %6, %6 {libname = "libdevice", libpath = "/home/siwasaki/triton/python/triton/language/libdevice.10.bc", symbol = "__nv_fmaf_rd"} : tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked>, tensor<1024xf32, #blocked> -> tensor<1024xf32, #blocked>
    %8 = tt.splat %arg1 : (!tt.ptr<f32>) -> tensor<1024x!tt.ptr<f32>, #blocked>
    %9 = tt.getelementptr %8, %0 : tensor<1024x!tt.ptr<f32>, #blocked>
    tt.store %9, %7 : tensor<1024xf32, #blocked>
    return
  }
}
```
2022-09-01 16:34:27 -07:00

1662 lines
70 KiB
Python

import os
from . import core, extern
LIBDEVICE_PATH = os.path.dirname(
os.path.abspath(__file__)) + "/libdevice.10.bc"
@extern.extern
def clz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_clz", core.dtype("int32")),
(core.dtype("int64"),): ("__nv_clzll", core.dtype("int32")),
}, _builder)
@extern.extern
def popc(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_popc", core.dtype("int32")),
(core.dtype("int64"),): ("__nv_popcll", core.dtype("int32")),
}, _builder)
@extern.extern
def byte_perm(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("int32"), core.dtype("int32"), core.dtype("int32"),): ("__nv_byte_perm", core.dtype("int32")),
}, _builder)
@extern.extern
def min(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_min", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_umin", core.dtype("uint32")),
(core.dtype("int64"), core.dtype("int64"),): ("__nv_llmin", core.dtype("int64")),
(core.dtype("uint64"), core.dtype("uint64"),): ("__nv_ullmin", core.dtype("uint64")),
(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fminf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fmin", core.dtype("fp64")),
}, _builder)
@extern.extern
def max(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_max", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_umax", core.dtype("uint32")),
(core.dtype("int64"), core.dtype("int64"),): ("__nv_llmax", core.dtype("int64")),
(core.dtype("uint64"), core.dtype("uint64"),): ("__nv_ullmax", core.dtype("uint64")),
(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaxf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fmax", core.dtype("fp64")),
}, _builder)
@extern.extern
def mulhi(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_mulhi", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_umulhi", core.dtype("uint32")),
}, _builder)
@extern.extern
def mul64hi(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int64"), core.dtype("int64"),): ("__nv_mul64hi", core.dtype("int64")),
(core.dtype("uint64"), core.dtype("uint64"),): ("__nv_umul64hi", core.dtype("uint64")),
}, _builder)
@extern.extern
def mul24(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_mul24", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_umul24", core.dtype("uint32")),
}, _builder)
@extern.extern
def brev(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_brev", core.dtype("int32")),
(core.dtype("int64"),): ("__nv_brevll", core.dtype("int64")),
}, _builder)
@extern.extern
def sad(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("int32"), core.dtype("int32"), core.dtype("uint32"),): ("__nv_sad", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"), core.dtype("uint32"),): ("__nv_usad", core.dtype("uint32")),
}, _builder)
@extern.extern
def abs(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_abs", core.dtype("int32")),
(core.dtype("int64"),): ("__nv_llabs", core.dtype("int64")),
(core.dtype("fp32"),): ("__nv_fabsf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_fabs", core.dtype("fp64")),
}, _builder)
@extern.extern
def floor(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_floorf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_floor", core.dtype("fp64")),
}, _builder)
@extern.extern
def rcp64h(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_rcp64h", core.dtype("fp64")),
}, _builder)
@extern.extern
def rsqrt(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_rsqrtf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_rsqrt", core.dtype("fp64")),
}, _builder)
@extern.extern
def ceil(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_ceil", core.dtype("fp64")),
(core.dtype("fp32"),): ("__nv_ceilf", core.dtype("fp32")),
}, _builder)
@extern.extern
def trunc(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_trunc", core.dtype("fp64")),
(core.dtype("fp32"),): ("__nv_truncf", core.dtype("fp32")),
}, _builder)
@extern.extern
def exp2(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_exp2f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_exp2", core.dtype("fp64")),
}, _builder)
@extern.extern
def saturatef(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_saturatef", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_rn(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_rz(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_rd(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_ru(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_ieee_rn(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_ieee_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_ieee_rz(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_ieee_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_ieee_rd(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_ieee_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmaf_ieee_ru(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf_ieee_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def fma_rn(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fma_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def fma_rz(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fma_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def fma_rd(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fma_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def fma_ru(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fma_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def fast_fdividef(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fast_fdividef", core.dtype("fp32")),
}, _builder)
@extern.extern
def fdiv_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fdiv_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fdiv_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fdiv_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fdiv_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fdiv_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fdiv_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fdiv_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def frcp_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_frcp_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def frcp_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_frcp_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def frcp_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_frcp_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def frcp_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_frcp_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsqrt_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fsqrt_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsqrt_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fsqrt_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsqrt_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fsqrt_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsqrt_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fsqrt_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def ddiv_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_ddiv_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def ddiv_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_ddiv_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def ddiv_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_ddiv_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def ddiv_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_ddiv_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def drcp_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_drcp_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def drcp_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_drcp_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def drcp_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_drcp_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def drcp_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_drcp_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsqrt_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_dsqrt_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsqrt_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_dsqrt_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsqrt_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_dsqrt_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsqrt_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_dsqrt_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def sqrt(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_sqrtf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_sqrt", core.dtype("fp64")),
}, _builder)
@extern.extern
def dadd_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dadd_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def dadd_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dadd_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def dadd_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dadd_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def dadd_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dadd_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def dmul_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dmul_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def dmul_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dmul_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def dmul_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dmul_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def dmul_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dmul_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def fadd_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fadd_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fadd_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fadd_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmul_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmul_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmul_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmul_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def fadd_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fadd_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fadd_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fadd_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmul_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmul_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fmul_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmul_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def double2float_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2float_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def double2float_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2float_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def double2float_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2float_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def double2float_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2float_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def double2int_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2int_rn", core.dtype("int32")),
}, _builder)
@extern.extern
def double2int_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2int_rz", core.dtype("int32")),
}, _builder)
@extern.extern
def double2int_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2int_rd", core.dtype("int32")),
}, _builder)
@extern.extern
def double2int_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2int_ru", core.dtype("int32")),
}, _builder)
@extern.extern
def double2uint_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2uint_rn", core.dtype("int32")),
}, _builder)
@extern.extern
def double2uint_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2uint_rz", core.dtype("int32")),
}, _builder)
@extern.extern
def double2uint_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2uint_rd", core.dtype("int32")),
}, _builder)
@extern.extern
def double2uint_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2uint_ru", core.dtype("int32")),
}, _builder)
@extern.extern
def int2double_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int2double_rn", core.dtype("fp64")),
(core.dtype("uint32"),): ("__nv_uint2double_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def float2int_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2int_rn", core.dtype("int32")),
}, _builder)
@extern.extern
def float2int_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2int_rz", core.dtype("int32")),
}, _builder)
@extern.extern
def float2int_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2int_rd", core.dtype("int32")),
}, _builder)
@extern.extern
def float2int_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2int_ru", core.dtype("int32")),
}, _builder)
@extern.extern
def float2uint_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2uint_rn", core.dtype("int32")),
}, _builder)
@extern.extern
def float2uint_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2uint_rz", core.dtype("int32")),
}, _builder)
@extern.extern
def float2uint_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2uint_rd", core.dtype("int32")),
}, _builder)
@extern.extern
def float2uint_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2uint_ru", core.dtype("int32")),
}, _builder)
@extern.extern
def int2float_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int2float_rn", core.dtype("fp32")),
(core.dtype("uint32"),): ("__nv_uint2float_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def int2float_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int2float_rz", core.dtype("fp32")),
(core.dtype("uint32"),): ("__nv_uint2float_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def int2float_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int2float_rd", core.dtype("fp32")),
(core.dtype("uint32"),): ("__nv_uint2float_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def int2float_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int2float_ru", core.dtype("fp32")),
(core.dtype("uint32"),): ("__nv_uint2float_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def hiloint2double(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_hiloint2double", core.dtype("fp64")),
}, _builder)
@extern.extern
def double2loint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2loint", core.dtype("int32")),
}, _builder)
@extern.extern
def double2hiint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2hiint", core.dtype("int32")),
}, _builder)
@extern.extern
def float2ll_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ll_rn", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ll_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ll_rz", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ll_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ll_rd", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ll_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ll_ru", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ull_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ull_rn", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ull_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ull_rz", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ull_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ull_rd", core.dtype("int64")),
}, _builder)
@extern.extern
def float2ull_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float2ull_ru", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ll_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ll_rn", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ll_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ll_rz", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ll_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ll_rd", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ll_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ll_ru", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ull_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ull_rn", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ull_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ull_rz", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ull_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ull_rd", core.dtype("int64")),
}, _builder)
@extern.extern
def double2ull_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double2ull_ru", core.dtype("int64")),
}, _builder)
@extern.extern
def ll2float_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2float_rn", core.dtype("fp32")),
(core.dtype("uint64"),): ("__nv_ull2float_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def ll2float_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2float_rz", core.dtype("fp32")),
(core.dtype("uint64"),): ("__nv_ull2float_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def ll2float_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2float_rd", core.dtype("fp32")),
(core.dtype("uint64"),): ("__nv_ull2float_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def ll2float_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2float_ru", core.dtype("fp32")),
(core.dtype("uint64"),): ("__nv_ull2float_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def ll2double_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2double_rn", core.dtype("fp64")),
(core.dtype("uint64"),): ("__nv_ull2double_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def ll2double_rz(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2double_rz", core.dtype("fp64")),
(core.dtype("uint64"),): ("__nv_ull2double_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def ll2double_rd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2double_rd", core.dtype("fp64")),
(core.dtype("uint64"),): ("__nv_ull2double_rd", core.dtype("fp64")),
}, _builder)
@extern.extern
def ll2double_ru(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_ll2double_ru", core.dtype("fp64")),
(core.dtype("uint64"),): ("__nv_ull2double_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def int_as_float(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_int_as_float", core.dtype("fp32")),
(core.dtype("uint32"),): ("__nv_uint_as_float", core.dtype("fp32")),
}, _builder)
@extern.extern
def float_as_int(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float_as_int", core.dtype("int32")),
}, _builder)
@extern.extern
def float_as_uint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_float_as_uint", core.dtype("int32")),
}, _builder)
@extern.extern
def longlong_as_double(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int64"),): ("__nv_longlong_as_double", core.dtype("fp64")),
}, _builder)
@extern.extern
def double_as_longlong(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_double_as_longlong", core.dtype("int64")),
}, _builder)
@extern.extern
def fast_sinf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_sinf", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_cosf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_cosf", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_log2f(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_log2f", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_logf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_logf", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_expf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_expf", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_tanf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_tanf", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_exp10f(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_exp10f", core.dtype("fp32")),
}, _builder)
@extern.extern
def fast_log10f(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_fast_log10f", core.dtype("fp32")),
}, _builder)
@extern.extern
def pow(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fast_powf", core.dtype("fp32")),
(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_powf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_pow", core.dtype("fp64")),
}, _builder)
@extern.extern
def hadd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_hadd", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_uhadd", core.dtype("uint32")),
}, _builder)
@extern.extern
def rhadd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("int32"),): ("__nv_rhadd", core.dtype("int32")),
(core.dtype("uint32"), core.dtype("uint32"),): ("__nv_urhadd", core.dtype("uint32")),
}, _builder)
@extern.extern
def fsub_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fsub_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsub_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fsub_rz", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsub_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fsub_rd", core.dtype("fp32")),
}, _builder)
@extern.extern
def fsub_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fsub_ru", core.dtype("fp32")),
}, _builder)
@extern.extern
def frsqrt_rn(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_frsqrt_rn", core.dtype("fp32")),
}, _builder)
@extern.extern
def ffs(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("int32"),): ("__nv_ffs", core.dtype("int32")),
(core.dtype("int64"),): ("__nv_ffsll", core.dtype("int32")),
}, _builder)
@extern.extern
def rint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_rintf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_rint", core.dtype("fp64")),
}, _builder)
@extern.extern
def llrint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_llrintf", core.dtype("int64")),
(core.dtype("fp64"),): ("__nv_llrint", core.dtype("int64")),
}, _builder)
@extern.extern
def nearbyint(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_nearbyintf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_nearbyint", core.dtype("fp64")),
}, _builder)
@extern.extern
def isnanf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_isnanf", core.dtype("int32")),
}, _builder)
@extern.extern
def signbitf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_signbitf", core.dtype("int32")),
}, _builder)
@extern.extern
def copysign(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_copysignf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_copysign", core.dtype("fp64")),
}, _builder)
@extern.extern
def finitef(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_finitef", core.dtype("int32")),
}, _builder)
@extern.extern
def isinff(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_isinff", core.dtype("int32")),
}, _builder)
@extern.extern
def nextafter(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_nextafterf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_nextafter", core.dtype("fp64")),
}, _builder)
@extern.extern
def sin(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_sinf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_sin", core.dtype("fp64")),
}, _builder)
@extern.extern
def cos(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_cosf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cos", core.dtype("fp64")),
}, _builder)
@extern.extern
def sinpi(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_sinpif", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_sinpi", core.dtype("fp64")),
}, _builder)
@extern.extern
def cospi(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_cospif", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cospi", core.dtype("fp64")),
}, _builder)
@extern.extern
def tan(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_tanf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_tan", core.dtype("fp64")),
}, _builder)
@extern.extern
def log2(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_log2f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_log2", core.dtype("fp64")),
}, _builder)
@extern.extern
def exp(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_expf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_exp", core.dtype("fp64")),
}, _builder)
@extern.extern
def exp10(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_exp10f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_exp10", core.dtype("fp64")),
}, _builder)
@extern.extern
def cosh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_coshf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cosh", core.dtype("fp64")),
}, _builder)
@extern.extern
def sinh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_sinhf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_sinh", core.dtype("fp64")),
}, _builder)
@extern.extern
def tanh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_tanhf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_tanh", core.dtype("fp64")),
}, _builder)
@extern.extern
def atan2(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_atan2f", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_atan2", core.dtype("fp64")),
}, _builder)
@extern.extern
def atan(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_atanf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_atan", core.dtype("fp64")),
}, _builder)
@extern.extern
def asin(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_asinf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_asin", core.dtype("fp64")),
}, _builder)
@extern.extern
def acos(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_acosf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_acos", core.dtype("fp64")),
}, _builder)
@extern.extern
def log(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_logf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_log", core.dtype("fp64")),
}, _builder)
@extern.extern
def log10(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_log10f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_log10", core.dtype("fp64")),
}, _builder)
@extern.extern
def log1p(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_log1pf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_log1p", core.dtype("fp64")),
}, _builder)
@extern.extern
def acosh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_acoshf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_acosh", core.dtype("fp64")),
}, _builder)
@extern.extern
def asinh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_asinhf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_asinh", core.dtype("fp64")),
}, _builder)
@extern.extern
def atanh(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_atanhf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_atanh", core.dtype("fp64")),
}, _builder)
@extern.extern
def expm1(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_expm1f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_expm1", core.dtype("fp64")),
}, _builder)
@extern.extern
def hypot(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_hypotf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_hypot", core.dtype("fp64")),
}, _builder)
@extern.extern
def rhypot(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_rhypotf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_rhypot", core.dtype("fp64")),
}, _builder)
@extern.extern
def norm3d(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_norm3df", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_norm3d", core.dtype("fp64")),
}, _builder)
@extern.extern
def rnorm3d(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_rnorm3df", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_rnorm3d", core.dtype("fp64")),
}, _builder)
@extern.extern
def norm4d(arg0, arg1, arg2, arg3, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, arg3, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_norm4df", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_norm4d", core.dtype("fp64")),
}, _builder)
@extern.extern
def rnorm4d(arg0, arg1, arg2, arg3, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, arg3, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_rnorm4df", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_rnorm4d", core.dtype("fp64")),
}, _builder)
@extern.extern
def cbrt(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_cbrtf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cbrt", core.dtype("fp64")),
}, _builder)
@extern.extern
def rcbrt(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_rcbrtf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_rcbrt", core.dtype("fp64")),
}, _builder)
@extern.extern
def j0(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_j0f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_j0", core.dtype("fp64")),
}, _builder)
@extern.extern
def j1(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_j1f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_j1", core.dtype("fp64")),
}, _builder)
@extern.extern
def y0(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_y0f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_y0", core.dtype("fp64")),
}, _builder)
@extern.extern
def y1(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_y1f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_y1", core.dtype("fp64")),
}, _builder)
@extern.extern
def yn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("fp32"),): ("__nv_ynf", core.dtype("fp32")),
(core.dtype("int32"), core.dtype("fp64"),): ("__nv_yn", core.dtype("fp64")),
}, _builder)
@extern.extern
def jn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("int32"), core.dtype("fp32"),): ("__nv_jnf", core.dtype("fp32")),
(core.dtype("int32"), core.dtype("fp64"),): ("__nv_jn", core.dtype("fp64")),
}, _builder)
@extern.extern
def cyl_bessel_i0(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_cyl_bessel_i0f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cyl_bessel_i0", core.dtype("fp64")),
}, _builder)
@extern.extern
def cyl_bessel_i1(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_cyl_bessel_i1f", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_cyl_bessel_i1", core.dtype("fp64")),
}, _builder)
@extern.extern
def erf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_erff", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_erf", core.dtype("fp64")),
}, _builder)
@extern.extern
def erfinv(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_erfinvf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_erfinv", core.dtype("fp64")),
}, _builder)
@extern.extern
def erfc(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_erfcf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_erfc", core.dtype("fp64")),
}, _builder)
@extern.extern
def erfcx(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_erfcxf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_erfcx", core.dtype("fp64")),
}, _builder)
@extern.extern
def erfcinv(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_erfcinvf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_erfcinv", core.dtype("fp64")),
}, _builder)
@extern.extern
def normcdfinv(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_normcdfinvf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_normcdfinv", core.dtype("fp64")),
}, _builder)
@extern.extern
def normcdf(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_normcdff", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_normcdf", core.dtype("fp64")),
}, _builder)
@extern.extern
def lgamma(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_lgammaf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_lgamma", core.dtype("fp64")),
}, _builder)
@extern.extern
def ldexp(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("int32"),): ("__nv_ldexpf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("int32"),): ("__nv_ldexp", core.dtype("fp64")),
}, _builder)
@extern.extern
def scalbn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("int32"),): ("__nv_scalbnf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("int32"),): ("__nv_scalbn", core.dtype("fp64")),
}, _builder)
@extern.extern
def fmod(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmodf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fmod", core.dtype("fp64")),
}, _builder)
@extern.extern
def remainder(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_remainderf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_remainder", core.dtype("fp64")),
}, _builder)
@extern.extern
def fma(arg0, arg1, arg2, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, arg2, ],
{(core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fmaf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fma", core.dtype("fp64")),
}, _builder)
@extern.extern
def powi(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("int32"),): ("__nv_powif", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("int32"),): ("__nv_powi", core.dtype("fp64")),
}, _builder)
@extern.extern
def tgamma(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_tgammaf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_tgamma", core.dtype("fp64")),
}, _builder)
@extern.extern
def round(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_roundf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_round", core.dtype("fp64")),
}, _builder)
@extern.extern
def llround(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_llroundf", core.dtype("int64")),
(core.dtype("fp64"),): ("__nv_llround", core.dtype("int64")),
}, _builder)
@extern.extern
def fdim(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp32"), core.dtype("fp32"),): ("__nv_fdimf", core.dtype("fp32")),
(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_fdim", core.dtype("fp64")),
}, _builder)
@extern.extern
def ilogb(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_ilogbf", core.dtype("int32")),
(core.dtype("fp64"),): ("__nv_ilogb", core.dtype("int32")),
}, _builder)
@extern.extern
def logb(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp32"),): ("__nv_logbf", core.dtype("fp32")),
(core.dtype("fp64"),): ("__nv_logb", core.dtype("fp64")),
}, _builder)
@extern.extern
def signbitd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_signbitd", core.dtype("int32")),
}, _builder)
@extern.extern
def isfinited(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_isfinited", core.dtype("int32")),
}, _builder)
@extern.extern
def isinfd(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_isinfd", core.dtype("int32")),
}, _builder)
@extern.extern
def isnand(arg0, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, ],
{(core.dtype("fp64"),): ("__nv_isnand", core.dtype("int32")),
}, _builder)
@extern.extern
def dsub_rn(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dsub_rn", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsub_rz(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dsub_rz", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsub_ru(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dsub_ru", core.dtype("fp64")),
}, _builder)
@extern.extern
def dsub_rd(arg0, arg1, _builder=None):
return extern.elementwise("libdevice", LIBDEVICE_PATH, [arg0, arg1, ],
{(core.dtype("fp64"), core.dtype("fp64"),): ("__nv_dsub_rd", core.dtype("fp64")),
}, _builder)