chore: remove repetitive words (#16957)
diff --git a/gallery/how_to/deploy_models/deploy_prequantized.py b/gallery/how_to/deploy_models/deploy_prequantized.py
index b93ed5e..c55e608 100644
--- a/gallery/how_to/deploy_models/deploy_prequantized.py
+++ b/gallery/how_to/deploy_models/deploy_prequantized.py
@@ -162,7 +162,7 @@
#
# You would see operators specific to quantization such as
# qnn.quantize, qnn.dequantize, qnn.requantize, and qnn.conv2d etc.
-input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
+input_name = "input" # the input name can be arbitrary for PyTorch frontend.
input_shapes = [(input_name, (1, 3, 224, 224))]
mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
# print(mod) # comment in to see the QNN IR dump
diff --git a/include/tvm/relax/dataflow_pattern.h b/include/tvm/relax/dataflow_pattern.h
index 0d8e767..f7094b2 100644
--- a/include/tvm/relax/dataflow_pattern.h
+++ b/include/tvm/relax/dataflow_pattern.h
@@ -914,7 +914,7 @@
public:
String global_symbol_; /*!< The global symbol name of the external function */
- /*! \brief The the external function name */
+ /*! \brief The external function name */
const String& global_symbol() const { return global_symbol_; }
void VisitAttrs(tvm::AttrVisitor* v) { v->Visit("global_symbol", &global_symbol_); }
diff --git a/src/runtime/contrib/vllm/attention_kernels.cu b/src/runtime/contrib/vllm/attention_kernels.cu
index fe6e974..2b59044 100644
--- a/src/runtime/contrib/vllm/attention_kernels.cu
+++ b/src/runtime/contrib/vllm/attention_kernels.cu
@@ -145,7 +145,7 @@
// Load the query to registers.
// Each thread in a thread group has a different part of the query.
- // For example, if the the thread group size is 4, then the first thread in the group
+ // For example, if the thread group size is 4, then the first thread in the group
// has 0, 4, 8, ... th vectors of the query, and the second thread has 1, 5, 9, ...
// th vectors of the query, and so on.
// NOTE(woosuk): Because q is split from a qkv tensor, it may not be contiguous.
@@ -185,7 +185,7 @@
// Load a key to registers.
// Each thread in a thread group has a different part of the key.
- // For example, if the the thread group size is 4, then the first thread in the group
+ // For example, if the thread group size is 4, then the first thread in the group
// has 0, 4, 8, ... th vectors of the key, and the second thread has 1, 5, 9, ... th
// vectors of the key, and so on.
for (int i = 0; i < NUM_TOKENS_PER_THREAD_GROUP; i++) {
diff --git a/src/runtime/relax_vm/kv_state.h b/src/runtime/relax_vm/kv_state.h
index e3c6e96..7b90ffc 100644
--- a/src/runtime/relax_vm/kv_state.h
+++ b/src/runtime/relax_vm/kv_state.h
@@ -83,7 +83,7 @@
* with prefill length "10", "15", "20", then we pass `[5, 1, 8]`
* as the seq_ids and `[10, 15, 20]` as the append_lengths.
* This method is invoked right before entering the model forward
- * function, and contains operations to prepare the the incoming
+ * function, and contains operations to prepare the incoming
* forward. For instance, this method may send auxiliary KV cache
* data structures to GPUs so that they can be operated
* in the model forward function.
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc
index 9a17354..b07ae3d 100644
--- a/src/runtime/relax_vm/paged_kv_cache.cc
+++ b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -85,7 +85,7 @@
int32_t start_pos = 0;
/*!
* \brief The current attention sink length of the block.
- * It means the the **first** sink size elements will be pinned
+ * It means the **first** sink size elements will be pinned
* in the KV cache even when sliding window is enabled.
*/
int32_t sink_length = 0;
@@ -247,7 +247,7 @@
/*!
* \brief Copy the append length indptr array on device.
* \note Since the Q/K/V data may have raggedness in terms of lengths,
- * we represent the the append lengths in CSR format.
+ * we represent the append lengths in CSR format.
*/
virtual NDArray CopyCurAppendLengthIndptrAsync(std::vector<int32_t>* data) = 0;
/*! \brief Copy the k position offset of applying RoPE for each sequence. */