Merge branch 'master' into v2.0
This commit is contained in:
@@ -38,8 +38,8 @@ def add_kernel(
|
||||
offsets = block_start + tl.arange(0, BLOCK_SIZE)
|
||||
# Create a mask to guard memory operations against out-of-bounds accesses
|
||||
mask = offsets < n_elements
|
||||
# Load x and y from DRAM, masking out any extra elements in case
|
||||
# the input is not a multiple of the block size
|
||||
# Load x and y from DRAM, masking out any extra elements in case the input is not a
|
||||
# multiple of the block size
|
||||
x = tl.load(x_ptr + offsets, mask=mask)
|
||||
y = tl.load(y_ptr + offsets, mask=mask)
|
||||
output = x + y
|
||||
|
Reference in New Issue
Block a user