From 13669b46a6d67daa762dba4d6bf2bccbdaf99892 Mon Sep 17 00:00:00 2001
From: Shintaro Iwasaki <shintaro.iwasaki.work@gmail.com>
Date: Fri, 16 Sep 2022 15:07:34 -0700
Subject: [PATCH] [DOCS] Correct spelling (#665)

This PR corrects spelling like #664 for Triton-MLIR. It should not break anything.
---
 cmake/FindLLVM.cmake                                     | 4 ++--
 docs/programming-guide/chapter-2/related-work.rst        | 2 +-
 include/triton/Conversion/TritonGPUToLLVM/PtxAsmFormat.h | 8 ++++----
 lib/Conversion/TritonGPUToLLVM/PtxAsmFormat.cpp          | 4 ++--
 lib/Dialect/TritonGPU/Transforms/Pipeline.cpp            | 2 +-
 lib/driver/llvm.cc                                       | 2 +-
 python/src/triton.cc                                     | 4 ++--
 python/triton/compiler.py                                | 4 ++--
 python/triton/language/core.py                           | 4 ++--
 python/triton/ops/blocksparse/matmul.py                  | 2 +-
 python/triton/tools/disasm.py                            | 2 +-
 python/tutorials/02-fused-softmax.py                     | 2 +-
 python/tutorials/03-matrix-multiplication.py             | 2 +-
 13 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake
index b615936e6..f9216a24e 100644
--- a/cmake/FindLLVM.cmake
+++ b/cmake/FindLLVM.cmake
@@ -25,7 +25,7 @@
 #  LLVM_VERSION_STRING - Full LLVM version string (e.g. 6.0.0svn).
 #  LLVM_VERSION_BASE_STRING - Base LLVM version string without git/svn suffix (e.g. 6.0.0).
 #
-# Note: The variable names were chosen in conformance with the offical CMake
+# Note: The variable names were chosen in conformance with the official CMake
 # guidelines, see ${CMAKE_ROOT}/Modules/readme.txt.
 
 # Try suffixed versions to pick up the newest LLVM install available on Debian
@@ -196,4 +196,4 @@ include(FindPackageHandleStandardArgs)
 
 find_package_handle_standard_args(LLVM
     REQUIRED_VARS LLVM_ROOT_DIR
-    VERSION_VAR LLVM_VERSION_STRING)
\ No newline at end of file
+    VERSION_VAR LLVM_VERSION_STRING)
diff --git a/docs/programming-guide/chapter-2/related-work.rst b/docs/programming-guide/chapter-2/related-work.rst
index bb83d4851..e21ec4de7 100644
--- a/docs/programming-guide/chapter-2/related-work.rst
+++ b/docs/programming-guide/chapter-2/related-work.rst
@@ -14,7 +14,7 @@ Traditional compilers typically rely on intermediate representations, such as LL
 Program Representation
 +++++++++++++++++++++++
 
-Polyhedral compilation is a vast area of research. In this section we only outline the most basic aspects of this topic, but readers interested in the solid mathematical foundations underneath may refer to the ample litterature on linear and integer programming.
+Polyhedral compilation is a vast area of research. In this section we only outline the most basic aspects of this topic, but readers interested in the solid mathematical foundations underneath may refer to the ample literature on linear and integer programming.
 
 .. table::
     :widths: 50 50
diff --git a/include/triton/Conversion/TritonGPUToLLVM/PtxAsmFormat.h b/include/triton/Conversion/TritonGPUToLLVM/PtxAsmFormat.h
index 7cecd59da..9a082a1ec 100644
--- a/include/triton/Conversion/TritonGPUToLLVM/PtxAsmFormat.h
+++ b/include/triton/Conversion/TritonGPUToLLVM/PtxAsmFormat.h
@@ -44,7 +44,7 @@ class PTXInstrExecution;
 //
 // builder.getAllMlirArgs() // get {pVal, iVal, jVal, kVal}
 //
-// To get the string containing all the contraints with "," seperated,
+// To get the string containing all the constraints with "," separated,
 // builder.getConstraints() // get "=r,r,k"
 //
 // PTXBuilder can build a PTX asm with multiple instructions, sample code:
@@ -107,10 +107,10 @@ struct PTXBuilder {
   // Create a new operand. It will not add to operand list.
   // @value: the MLIR value bind to this operand.
   // @constraint: ASM operand constraint, .e.g. "=r"
-  // @formater: extra format to represent this operand in ASM code, default is
-  //            "%{0}".format(operand.idx).
+  // @formatter: extra format to represent this operand in ASM code, default is
+  //             "%{0}".format(operand.idx).
   Operand *newOperand(mlir::Value value, StringRef constraint,
-                      std::function<std::string(int idx)> formater = nullptr);
+                      std::function<std::string(int idx)> formatter = nullptr);
 
   // Create a new operand which is written to, that is, the constraint starts
   // with "=", e.g. "=r".
diff --git a/lib/Conversion/TritonGPUToLLVM/PtxAsmFormat.cpp b/lib/Conversion/TritonGPUToLLVM/PtxAsmFormat.cpp
index 1299a083f..9eeb49995 100644
--- a/lib/Conversion/TritonGPUToLLVM/PtxAsmFormat.cpp
+++ b/lib/Conversion/TritonGPUToLLVM/PtxAsmFormat.cpp
@@ -20,10 +20,10 @@ std::string strJoin(llvm::ArrayRef<std::string> strs,
 
 PTXInstr::Operand *
 PTXBuilder::newOperand(mlir::Value value, StringRef constraint,
-                       std::function<std::string(int)> formater) {
+                       std::function<std::string(int)> formatter) {
   argArchive.emplace_back(std::make_unique<Operand>(value, constraint));
   auto *opr = argArchive.back().get();
-  opr->repr = formater;
+  opr->repr = formatter;
   opr->idx = oprCounter++;
   return opr;
 }
diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp
index 0b40c9df4..0203ddd2b 100644
--- a/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp
+++ b/lib/Dialect/TritonGPU/Transforms/Pipeline.cpp
@@ -25,7 +25,7 @@ class LoopPipeliner {
   /// cache forOp we are working on
   scf::ForOp forOp;
 
-  /// cahce YieldOp for this forOp
+  /// cache YieldOp for this forOp
   scf::YieldOp yieldOp;
 
   /// loads to be pipelined
diff --git a/lib/driver/llvm.cc b/lib/driver/llvm.cc
index 8f5618d43..140eff6cd 100644
--- a/lib/driver/llvm.cc
+++ b/lib/driver/llvm.cc
@@ -103,7 +103,7 @@ static bool find_and_replace(std::string &str, const std::string &begin,
 std::string path_to_ptxas(int &version) {
   std::vector<std::string> rets;
   std::string ret;
-  // search pathes for ptxas
+  // search paths for ptxas
   std::vector<std::string> ptxas_prefixes = {"", "/usr/local/cuda/bin/"};
   std::string triton_ptxas = tools::getenv("TRITON_PTXAS_PATH");
   if (!triton_ptxas.empty())
diff --git a/python/src/triton.cc b/python/src/triton.cc
index 739dcdc69..20d12ce56 100644
--- a/python/src/triton.cc
+++ b/python/src/triton.cc
@@ -229,7 +229,7 @@ void parse_args(py::list &args, py::list do_not_specialize,
       // copy param
       std::memcpy(params_ptr, &value, 8);
       params_ptr += 8;
-      // udpate cache key
+      // update cache key
       cache_key += dtype_cache_key_part(arg.attr("dtype"));
       cache_key += "*";
       cache_key += "[multipleof(";
@@ -330,7 +330,7 @@ void parse_args(py::list &args, py::list &arg_names, std::string &params,
       // copy param
       std::memcpy(params_ptr, &value, 8);
       params_ptr += 8;
-      // udpate cache key
+      // update cache key
       continue;
     }
     // argument is `constexpr`
diff --git a/python/triton/compiler.py b/python/triton/compiler.py
index 2d871f523..e18f61ec2 100644
--- a/python/triton/compiler.py
+++ b/python/triton/compiler.py
@@ -53,7 +53,7 @@ def mangle_ty(ty):
         elt = mangle_ty(ty.scalar)
         shape = '_'.join(map(str, ty.shape))
         return f'{elt}S{shape}S'
-    assert False, "Unsupport type"
+    assert False, "Unsupported type"
 
 
 def mangle_fn(name, arg_tys, constants):
@@ -464,7 +464,7 @@ class CodeGenerator(ast.NodeVisitor):
         with enter_sub_region(self) as sr:
             liveins, insert_block = sr
 
-            # condtion (the before region)
+            # condition (the before region)
             cond_block = self.builder.create_block()
             self.builder.set_insertion_point_to_start(cond_block)
             cond = self.visit(node.test)
diff --git a/python/triton/language/core.py b/python/triton/language/core.py
index 39245676b..33f3e7d41 100644
--- a/python/triton/language/core.py
+++ b/python/triton/language/core.py
@@ -185,7 +185,7 @@ class dtype:
             return builder.get_float_ty()
         elif self.name == 'fp64':
             return builder.get_double_ty()
-        raise ValueError(f'fail to covert {self} to ir type')
+        raise ValueError(f'fail to convert {self} to ir type')
 
     def __str__(self):
         return self.name
@@ -895,7 +895,7 @@ def where(condition, x, y, _builder=None):
 
     Note that :code:`x` and :code:`y` are always evaluated regardless of the value of :code:`condition`.
 
-    If you want to avoid unintented memory operations, use the :code:`mask` arguments in `triton.load` and `triton.store` instead.
+    If you want to avoid unintended memory operations, use the :code:`mask` arguments in `triton.load` and `triton.store` instead.
 
     The shape of :code:`x` and :code:`y` are both broadcast to the shape of :code:`condition`.
     :code:`x` and :code:`y` must have the data type.
diff --git a/python/triton/ops/blocksparse/matmul.py b/python/triton/ops/blocksparse/matmul.py
index 0fa1a5878..4b6d98aac 100644
--- a/python/triton/ops/blocksparse/matmul.py
+++ b/python/triton/ops/blocksparse/matmul.py
@@ -328,7 +328,7 @@ def dsd_lut(layout, block, step, trans, device):
     # create increments
     incs = torch.stack((B_incs, A_incs), dim=1).view(-1).contiguous()
     # pad by a factor 2*MAX_NUM_STAGES
-    # to accomodate pre-fetching inside the kernel
+    # to accommodate pre-fetching inside the kernel
     pad = torch.zeros(20, device=incs.device, dtype=incs.dtype)
     incs = torch.cat((incs, pad))
     # create lut
diff --git a/python/triton/tools/disasm.py b/python/triton/tools/disasm.py
index 3672d4b05..24a0787c5 100644
--- a/python/triton/tools/disasm.py
+++ b/python/triton/tools/disasm.py
@@ -104,7 +104,7 @@ def extract(file_path, fun):
             # peek the next line
             line = sass_lines[line_idx].decode()
         # Print sass
-        # label naming convension: LBB#i
+        # label naming convention: LBB#i
         for idx, (ctrl, asm) in enumerate(asm_buffer):
             # Print label if this is BRA target
             offset = idx * 16
diff --git a/python/tutorials/02-fused-softmax.py b/python/tutorials/02-fused-softmax.py
index 7af24e18d..7447b60af 100644
--- a/python/tutorials/02-fused-softmax.py
+++ b/python/tutorials/02-fused-softmax.py
@@ -78,7 +78,7 @@ def softmax_kernel(
     input_ptrs = row_start_ptr + col_offsets
     # Load the row into SRAM, using a mask since BLOCK_SIZE may be > than n_cols
     row = tl.load(input_ptrs, mask=col_offsets < n_cols, other=-float('inf'))
-    # Substract maximum for numerical stability
+    # Subtract maximum for numerical stability
     row_minus_max = row - tl.max(row, axis=0)
     # Note that exponentials in Triton are fast but approximate (i.e., think __expf in CUDA)
     numerator = tl.exp(row_minus_max)
diff --git a/python/tutorials/03-matrix-multiplication.py b/python/tutorials/03-matrix-multiplication.py
index f773a3787..e323c1d21 100644
--- a/python/tutorials/03-matrix-multiplication.py
+++ b/python/tutorials/03-matrix-multiplication.py
@@ -18,7 +18,7 @@ You will specifically learn about:
 # They are notoriously hard to optimize, hence their implementation is generally done by
 # hardware vendors themselves as part of so-called "kernel libraries" (e.g., cuBLAS).
 # Unfortunately, these libraries are often proprietary and cannot be easily customized
-# to accomodate the needs of modern deep learning workloads (e.g., fused activation functions).
+# to accommodate the needs of modern deep learning workloads (e.g., fused activation functions).
 # In this tutorial, you will learn how to implement efficient matrix multiplications by
 # yourself with Triton, in a way that is easy to customize and extend.
 #