Reduce string copies cow (#2075)
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 1ca5031..745c735 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -23,12 +23,15 @@
#[cfg(not(feature = "std"))]
use alloc::{
- borrow::ToOwned,
+ borrow::{Cow, ToOwned},
format,
string::{String, ToString},
vec,
vec::Vec,
};
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+
use core::iter::Peekable;
use core::num::NonZeroU8;
use core::str::Chars;
@@ -934,7 +937,7 @@
chars: &mut State<'a>,
) -> Result<Option<Token>, TokenizerError> {
chars.next(); // consume the first char
- let word = self.tokenize_word(consumed_byte_len, chars);
+ let word = self.tokenize_word(consumed_byte_len, chars)?;
// TODO: implement parsing of exponent here
if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
@@ -1008,7 +1011,7 @@
}
_ => {
// regular identifier starting with an "b" or "B"
- let s = self.tokenize_word(b.len_utf8(), chars);
+ let s = self.tokenize_word(b.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
}
@@ -1035,7 +1038,7 @@
),
_ => {
// regular identifier starting with an "r" or "R"
- let s = self.tokenize_word(b.len_utf8(), chars);
+ let s = self.tokenize_word(b.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
}
@@ -1054,7 +1057,7 @@
}
_ => {
// regular identifier starting with an "N"
- let s = self.tokenize_word(n.len_utf8(), chars);
+ let s = self.tokenize_word(n.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
}
@@ -1071,7 +1074,7 @@
}
_ => {
// regular identifier starting with an "E" or "e"
- let s = self.tokenize_word(x.len_utf8(), chars);
+ let s = self.tokenize_word(x.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
}
@@ -1090,7 +1093,7 @@
}
}
// regular identifier starting with an "U" or "u"
- let s = self.tokenize_word(x.len_utf8(), chars);
+ let s = self.tokenize_word(x.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
// The spec only allows an uppercase 'X' to introduce a hex
@@ -1105,7 +1108,7 @@
}
_ => {
// regular identifier starting with an "X"
- let s = self.tokenize_word(x.len_utf8(), chars);
+ let s = self.tokenize_word(x.len_utf8(), chars)?;
Ok(Some(Token::make_word(&s, None)))
}
}
@@ -1351,7 +1354,7 @@
if is_comment {
chars.next(); // consume second '-'
- let comment = self.tokenize_single_line_comment(chars);
+ let comment = self.tokenize_single_line_comment(chars)?;
return Ok(Some(Token::Whitespace(
Whitespace::SingleLineComment {
prefix: "--".to_owned(),
@@ -1382,7 +1385,7 @@
}
Some('/') if dialect_of!(self is SnowflakeDialect) => {
chars.next(); // consume the second '/', starting a snowflake single-line comment
- let comment = self.tokenize_single_line_comment(chars);
+ let comment = self.tokenize_single_line_comment(chars)?;
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
prefix: "//".to_owned(),
comment,
@@ -1588,7 +1591,7 @@
'#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
{
chars.next(); // consume the '#', starting a snowflake single-line comment
- let comment = self.tokenize_single_line_comment(chars);
+ let comment = self.tokenize_single_line_comment(chars)?;
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
prefix: "#".to_owned(),
comment,
@@ -1783,80 +1786,133 @@
}
/// Tokenize dollar preceded value (i.e: a string/placeholder)
- fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
- let mut s = String::new();
- let mut value = String::new();
+ fn tokenize_dollar_preceded_value(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<Token, TokenizerError> {
+ let starting_loc = chars.location();
- chars.next();
+ // Validate we're at a $ before consuming
+ if chars.peek() != Some(&'$') {
+ return self.tokenizer_error(starting_loc, "Expected $ character");
+ }
+ chars.next(); // consume first $
- // If the dialect does not support dollar-quoted strings, then `$$` is rather a placeholder.
+ // Case 1: $$text$$ (untagged dollar-quoted string)
if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
- chars.next();
+ let (value, tag) = self.tokenize_dollar_quoted_string_borrowed(chars, None)?;
+ return Ok(Token::DollarQuotedString(DollarQuotedString {
+ value: value.into_owned(),
+ tag: tag.map(|t| t.into_owned()),
+ }));
+ }
- let mut is_terminated = false;
- let mut prev: Option<char> = None;
+ // If it's not $$ we have 2 options :
+ // Case 2: $tag$text$tag$ (tagged dollar-quoted string) if dialect supports it
+ // Case 3: $placeholder (e.g., $1, $name)
+ let tag_start = chars.byte_pos;
+ let _tag_slice = peeking_take_while_ref(chars, |ch| {
+ ch.is_alphanumeric()
+ || ch == '_'
+ || matches!(ch, '$' if self.dialect.supports_dollar_placeholder())
+ });
+ let tag_end = chars.byte_pos;
- while let Some(&ch) = chars.peek() {
- if prev == Some('$') {
- if ch == '$' {
- chars.next();
- is_terminated = true;
- break;
- } else {
- s.push('$');
- s.push(ch);
+ // Case 2: $tag$text$tag$ (tagged dollar-quoted string)
+ if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
+ let tag_value = self.safe_slice(chars.source, tag_start, tag_end, starting_loc)?;
+ let (value, tag) =
+ self.tokenize_dollar_quoted_string_borrowed(chars, Some(tag_value))?;
+ return Ok(Token::DollarQuotedString(DollarQuotedString {
+ value: value.into_owned(),
+ tag: tag.map(|t| t.into_owned()),
+ }));
+ }
+
+ // Case 3: $placeholder (e.g., $1, $name)
+ let tag_value = self.safe_slice(chars.source, tag_start, tag_end, starting_loc)?;
+ Ok(Token::Placeholder(format!("${}", tag_value)))
+ }
+
+ /// Tokenize a dollar-quoted string ($$text$$ or $tag$text$tag$), returning borrowed slices.
+ /// tag_prefix: None for $$, Some("tag") for $tag$
+ /// Returns (value: Cow<'a, str>, tag: Option<Cow<'a, str>>)
+ fn tokenize_dollar_quoted_string_borrowed(
+ &self,
+ chars: &mut State<'a>,
+ tag_prefix: Option<&'a str>,
+ ) -> Result<(Cow<'a, str>, Option<Cow<'a, str>>), TokenizerError> {
+ let starting_loc = chars.location();
+
+ // Validate we're at a $ before consuming
+ if chars.peek() != Some(&'$') {
+ return self.tokenizer_error(starting_loc, "Expected $ for dollar-quoted string");
+ }
+ chars.next(); // consume $ after tag (or second $ for $$)
+ let content_start = chars.byte_pos;
+
+ match tag_prefix {
+ None => {
+ // Case: $$text$$
+ let mut prev: Option<char> = None;
+
+ while let Some(&ch) = chars.peek() {
+ if prev == Some('$') && ch == '$' {
+ chars.next(); // consume final $
+ // content_end is before the first $ of $$
+ let content_end = chars.byte_pos - 2;
+ let value = self.safe_slice(
+ chars.source,
+ content_start,
+ content_end,
+ starting_loc,
+ )?;
+ return Ok((Cow::Borrowed(value), None));
}
- } else if ch != '$' {
- s.push(ch);
+
+ prev = Some(ch);
+ chars.next();
}
- prev = Some(ch);
- chars.next();
- }
-
- return if chars.peek().is_none() && !is_terminated {
self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
- } else {
- Ok(Token::DollarQuotedString(DollarQuotedString {
- value: s,
- tag: None,
- }))
- };
- } else {
- value.push_str(&peeking_take_while(chars, |ch| {
- ch.is_alphanumeric()
- || ch == '_'
- // Allow $ as a placeholder character if the dialect supports it
- || matches!(ch, '$' if self.dialect.supports_dollar_placeholder())
- }));
+ }
+ Some(tag) => {
+ // Case: $tag$text$tag$
+ let end_delimiter = format!("${}$", tag);
- // If the dialect does not support dollar-quoted strings, don't look for the end delimiter.
- if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
- chars.next();
-
- let mut temp = String::new();
- let end_delimiter = format!("${value}$");
-
+ // Scan for the end delimiter
+ let buffer_start = content_start;
loop {
match chars.next() {
- Some(ch) => {
- temp.push(ch);
+ Some(_) => {
+ let current_pos = chars.byte_pos;
+ let buffer = self.safe_slice(
+ chars.source,
+ buffer_start,
+ current_pos,
+ starting_loc,
+ )?;
- if temp.ends_with(&end_delimiter) {
- if let Some(temp) = temp.strip_suffix(&end_delimiter) {
- s.push_str(temp);
- }
- break;
+ if buffer.ends_with(&end_delimiter) {
+ // Found the end delimiter
+ let content_end = current_pos - end_delimiter.len();
+ let value = self.safe_slice(
+ chars.source,
+ content_start,
+ content_end,
+ starting_loc,
+ )?;
+ return Ok((
+ Cow::Borrowed(value),
+ if tag.is_empty() {
+ None
+ } else {
+ Some(Cow::Borrowed(tag))
+ },
+ ));
}
}
None => {
- if temp.ends_with(&end_delimiter) {
- if let Some(temp) = temp.strip_suffix(&end_delimiter) {
- s.push_str(temp);
- }
- break;
- }
-
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
@@ -1864,15 +1920,23 @@
}
}
}
- } else {
- return Ok(Token::Placeholder(String::from("$") + &value));
}
}
+ }
- Ok(Token::DollarQuotedString(DollarQuotedString {
- value: s,
- tag: if value.is_empty() { None } else { Some(value) },
- }))
+ /// Helper function to safely slice a string with bounds validation
+ fn safe_slice<'b>(
+ &self,
+ source: &'b str,
+ start: usize,
+ end: usize,
+ error_loc: Location,
+ ) -> Result<&'b str, TokenizerError> {
+ // Validate slice bounds
+ if end < start || end > source.len() {
+ return self.tokenizer_error(error_loc, "Invalid string slice bounds");
+ }
+ Ok(&source[start..end])
}
fn tokenizer_error<R>(
@@ -1887,63 +1951,90 @@
}
// Consume characters until newline
- fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
- let mut comment = peeking_take_while(chars, |ch| match ch {
+ fn tokenize_single_line_comment(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<String, TokenizerError> {
+ Ok(self
+ .tokenize_single_line_comment_borrowed(chars)?
+ .to_string())
+ }
+
+ /// Tokenize a single-line comment, returning a borrowed slice.
+ /// Returns a slice that includes the terminating newline character.
+ fn tokenize_single_line_comment_borrowed(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<&'a str, TokenizerError> {
+ let start_pos = chars.byte_pos;
+ let error_loc = chars.location();
+
+ // Consume until newline
+ peeking_take_while_ref(chars, |ch| match ch {
'\n' => false, // Always stop at \n
'\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop at \r for Postgres
_ => true, // Keep consuming for other characters
});
+ // Consume the newline character
if let Some(ch) = chars.next() {
assert!(ch == '\n' || ch == '\r');
- comment.push(ch);
}
- comment
+ // Return slice including the newline
+ self.safe_slice(chars.source, start_pos, chars.byte_pos, error_loc)
}
/// Tokenize an identifier or keyword, after the first char(s) have already been consumed.
/// `consumed_byte_len` is the byte length of the consumed character(s).
- fn tokenize_word(&self, consumed_byte_len: usize, chars: &mut State<'a>) -> String {
+ fn tokenize_word(
+ &self,
+ consumed_byte_len: usize,
+ chars: &mut State<'a>,
+ ) -> Result<String, TokenizerError> {
+ let error_loc = chars.location();
+
// Overflow check: ensure we can safely subtract
if consumed_byte_len > chars.byte_pos {
- return String::new();
+ return self.tokenizer_error(error_loc, "Invalid byte position in tokenize_word");
}
// Calculate where the first character started
let first_char_byte_pos = chars.byte_pos - consumed_byte_len;
// Use the zero-copy version and convert to String
- self.tokenize_word_borrowed(first_char_byte_pos, chars)
- .to_string()
+ Ok(self
+ .tokenize_word_borrowed(first_char_byte_pos, chars)?
+ .to_string())
}
/// Tokenize an identifier or keyword, returning a borrowed slice when possible.
/// The first character position must be provided (before it was consumed).
/// Returns a slice with the same lifetime as the State's source.
- fn tokenize_word_borrowed(&self, first_char_byte_pos: usize, chars: &mut State<'a>) -> &'a str {
+ fn tokenize_word_borrowed(
+ &self,
+ first_char_byte_pos: usize,
+ chars: &mut State<'a>,
+ ) -> Result<&'a str, TokenizerError> {
+ let error_loc = chars.location();
+
// Consume the rest of the word
peeking_take_while_ref(chars, |ch| self.dialect.is_identifier_part(ch));
- // Boundary check: ensure first_char_byte_pos is valid
- if first_char_byte_pos > chars.byte_pos || first_char_byte_pos > chars.source.len() {
- return "";
- }
-
- // Return a slice from the first char to the current position
- &chars.source[first_char_byte_pos..chars.byte_pos]
+ // Return a slice from the first char to the current position using safe_slice
+ self.safe_slice(chars.source, first_char_byte_pos, chars.byte_pos, error_loc)
}
/// Read a quoted identifier
fn tokenize_quoted_identifier(
&self,
quote_start: char,
- chars: &mut State,
+ chars: &mut State<'a>,
) -> Result<String, TokenizerError> {
let error_loc = chars.location();
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
- let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
+ let (s, last_char) = self.parse_quoted_ident(chars, quote_end)?;
if last_char == Some(quote_end) {
Ok(s)
@@ -2152,9 +2243,21 @@
fn tokenize_multiline_comment(
&self,
- chars: &mut State,
+ chars: &mut State<'a>,
) -> Result<Option<Token>, TokenizerError> {
- let mut s = String::new();
+ let s = self.tokenize_multiline_comment_borrowed(chars)?;
+ Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(
+ s.to_string(),
+ ))))
+ }
+
+ /// Tokenize a multi-line comment, returning a borrowed slice.
+ /// Returns a slice that excludes the opening `/*` (already consumed) and the final closing `*/`.
+ fn tokenize_multiline_comment_borrowed(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<&'a str, TokenizerError> {
+ let start_pos = chars.byte_pos;
let mut nested = 1;
let supports_nested_comments = self.dialect.supports_nested_comments();
@@ -2162,24 +2265,22 @@
match chars.next() {
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
chars.next(); // consume the '*'
- s.push('/');
- s.push('*');
nested += 1;
}
Some('*') if matches!(chars.peek(), Some('/')) => {
chars.next(); // consume the '/'
nested -= 1;
if nested == 0 {
- break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
+ // We've consumed the final */, so exclude it from the slice
+ let end_pos = chars.byte_pos - 2; // Subtract 2 bytes for '*' and '/'
+ return self.safe_slice(chars.source, start_pos, end_pos, chars.location());
}
- s.push('*');
- s.push('/');
}
- Some(ch) => {
- s.push(ch);
+ Some(_) => {
+ // Just consume the character, don't need to push to string
}
None => {
- break self.tokenizer_error(
+ return self.tokenizer_error(
chars.location(),
"Unexpected EOF while in a multi-line comment",
);
@@ -2188,27 +2289,71 @@
}
}
- fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option<char>) {
+ fn parse_quoted_ident(
+ &self,
+ chars: &mut State<'a>,
+ quote_end: char,
+ ) -> Result<(String, Option<char>), TokenizerError> {
+ let (cow, last_char) = self.parse_quoted_ident_borrowed(chars, quote_end)?;
+ Ok((cow.into_owned(), last_char))
+ }
+
+ /// Parse quoted identifier, returning borrowed slice when possible.
+ /// Returns `(Cow<'a, str>, Option<char>)` where the `Option<char>` is the closing quote.
+ fn parse_quoted_ident_borrowed(
+ &self,
+ chars: &mut State<'a>,
+ quote_end: char,
+ ) -> Result<(Cow<'a, str>, Option<char>), TokenizerError> {
+ let content_start = chars.byte_pos;
+ let mut has_doubled_quotes = false;
let mut last_char = None;
- let mut s = String::new();
+
+ // Scan to find the end and detect doubled quotes
while let Some(ch) = chars.next() {
if ch == quote_end {
if chars.peek() == Some("e_end) {
- chars.next();
- s.push(ch);
- if !self.unescape {
- // In no-escape mode, the given query has to be saved completely
- s.push(ch);
- }
+ has_doubled_quotes = true;
+ chars.next(); // consume the second quote
} else {
last_char = Some(quote_end);
break;
}
- } else {
- s.push(ch);
}
}
- (s, last_char)
+
+ let content_end = if last_char.is_some() {
+ chars.byte_pos - 1 // exclude the closing quote
+ } else {
+ chars.byte_pos
+ };
+
+ let content =
+ self.safe_slice(chars.source, content_start, content_end, chars.location())?;
+
+ // If no doubled quotes, we can always borrow
+ if !has_doubled_quotes {
+ return Ok((Cow::Borrowed(content), last_char));
+ }
+
+ // If unescape=false, keep the content as-is (with doubled quotes)
+ if !self.unescape {
+ return Ok((Cow::Borrowed(content), last_char));
+ }
+
+ // Need to unescape: process doubled quotes
+ let mut result = String::new();
+ let mut chars_iter = content.chars();
+
+ while let Some(ch) = chars_iter.next() {
+ result.push(ch);
+ if ch == quote_end {
+ // This is the first of a doubled quote, skip the second one
+ chars_iter.next();
+ }
+ }
+
+ Ok((Cow::Owned(result), last_char))
}
#[allow(clippy::unnecessary_wraps)]
@@ -2304,7 +2449,78 @@
}
fn unescape_single_quoted_string(chars: &mut State<'_>) -> Option<String> {
- Unescape::new(chars).unescape()
+ borrow_or_unescape_single_quoted_string(chars, true).map(|cow| cow.into_owned())
+}
+
+/// Scans a single-quoted string and returns either a borrowed slice or an unescaped owned string.
+///
+/// Strategy: Scan once to find the end and detect escape sequences.
+/// - If no escapes exist (or unescape=false), return [Cow::Borrowed]
+/// - If escapes exist and unescape=true, reprocess using existing [Unescape] logic
+fn borrow_or_unescape_single_quoted_string<'a>(
+ chars: &mut State<'a>,
+ unescape: bool,
+) -> Option<Cow<'a, str>> {
+ let content_start = chars.byte_pos;
+
+ // Validate we're at an opening quote before consuming
+ if chars.peek() != Some(&'\'') {
+ return None;
+ }
+ chars.next(); // consume opening '
+
+ // Scan to find end and check for escape sequences
+ let mut has_escapes = false;
+
+ loop {
+ match chars.next() {
+ Some('\'') => {
+ // Check for doubled single quote (escape)
+ if chars.peek() == Some(&'\'') {
+ has_escapes = true;
+ chars.next(); // consume the second '
+ } else {
+ // End of string found (including closing ')
+ let content_end = chars.byte_pos;
+ let full_content = &chars.source[content_start..content_end];
+
+ // If no unescaping needed, return borrowed (without quotes)
+ if !unescape || !has_escapes {
+ // Strip opening and closing quotes
+ // Safety: full_content includes opening and closing quotes (at least 2 chars)
+ if full_content.len() < 2 {
+ return None;
+ }
+ return Some(Cow::Borrowed(&full_content[1..full_content.len() - 1]));
+ }
+
+ // Need to unescape - reprocess using existing logic
+ // Create a temporary State from the content
+ let mut temp_state = State {
+ peekable: full_content.chars().peekable(),
+ source: full_content,
+ line: 0,
+ col: 0,
+ byte_pos: 0,
+ };
+
+ return Unescape::new(&mut temp_state).unescape().map(Cow::Owned);
+ }
+ }
+ Some('\\') => {
+ has_escapes = true;
+ // Skip next character (it's escaped)
+ chars.next();
+ }
+ Some(_) => {
+ // Regular character, continue scanning
+ }
+ None => {
+ // Unexpected EOF
+ return None;
+ }
+ }
+ }
}
struct Unescape<'a: 'b, 'b> {
@@ -2452,8 +2668,98 @@
}
fn unescape_unicode_single_quoted_string(chars: &mut State<'_>) -> Result<String, TokenizerError> {
+ borrow_or_unescape_unicode_single_quoted_string(chars, true).map(|cow| cow.into_owned())
+}
+
+/// Scans a unicode-escaped single-quoted string and returns either a borrowed slice or an unescaped owned string.
+///
+/// Strategy: Scan once to find the end and detect escape sequences.
+/// - If no escapes exist (or unescape=false), return [Cow::Borrowed]
+/// - If escapes exist and unescape=true, reprocess with unicode escaping logic
+fn borrow_or_unescape_unicode_single_quoted_string<'a>(
+ chars: &mut State<'a>,
+ unescape: bool,
+) -> Result<Cow<'a, str>, TokenizerError> {
+ let content_start = chars.byte_pos;
+ let error_loc = chars.location();
+
+ // Validate we're at an opening quote before consuming
+ if chars.peek() != Some(&'\'') {
+ return Err(TokenizerError {
+ message: "Expected opening quote for unicode string literal".to_string(),
+ location: error_loc,
+ });
+ }
+ chars.next(); // consume the opening quote
+
+ // Scan to find end and check for escape sequences
+ let mut has_escapes = false;
+
+ loop {
+ match chars.next() {
+ Some('\'') => {
+ // Check for doubled single quote (escape)
+ if chars.peek() == Some(&'\'') {
+ has_escapes = true;
+ chars.next(); // consume the second '
+ } else {
+ // End of string found (including closing ')
+ let content_end = chars.byte_pos;
+ let full_content = &chars.source[content_start..content_end];
+
+ // If no unescaping needed, return borrowed (without quotes)
+ if !unescape || !has_escapes {
+ // Strip opening and closing quotes
+ // Safety: full_content includes opening and closing quotes (at least 2 chars)
+ if full_content.len() < 2 {
+ return Err(TokenizerError {
+ message: "Invalid unicode string literal".to_string(),
+ location: error_loc,
+ });
+ }
+ return Ok(Cow::Borrowed(&full_content[1..full_content.len() - 1]));
+ }
+
+ // Need to unescape - reprocess with unicode logic
+ // Create a temporary State from the content
+ let mut temp_state = State {
+ peekable: full_content.chars().peekable(),
+ source: full_content,
+ line: 0,
+ col: 0,
+ byte_pos: 0,
+ };
+
+ return process_unicode_string_with_escapes(&mut temp_state, error_loc)
+ .map(Cow::Owned);
+ }
+ }
+ Some('\\') => {
+ has_escapes = true;
+ // Skip next character (it's escaped or part of unicode sequence)
+ chars.next();
+ }
+ Some(_) => {
+ // Regular character, continue scanning
+ }
+ None => {
+ return Err(TokenizerError {
+ message: "Unterminated unicode encoded string literal".to_string(),
+ location: error_loc,
+ });
+ }
+ }
+ }
+}
+
+/// Process a unicode-escaped string using the original unescape logic
+fn process_unicode_string_with_escapes(
+ chars: &mut State<'_>,
+ error_loc: Location,
+) -> Result<String, TokenizerError> {
let mut unescaped = String::new();
chars.next(); // consume the opening quote
+
while let Some(c) = chars.next() {
match c {
'\'' => {
@@ -2480,9 +2786,10 @@
}
}
}
+
Err(TokenizerError {
message: "Unterminated unicode encoded string literal".to_string(),
- location: chars.location(),
+ location: error_loc,
})
}