src/tokenizer.rs - datafusion-sqlparser-rs - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 //! SQL Tokenizer
 //!
 //! The tokenizer (a.k.a. lexer) converts a string into a sequence of tokens.
 //!
 //! The tokens then form the input for the parser, which outputs an Abstract Syntax Tree (AST).

 #[cfg(not(feature = "std"))]
 use alloc::{
     borrow::ToOwned,
     format,
     string::{String, ToString},
     vec,
     vec::Vec,
 };
 use core::iter::Peekable;
 use core::num::NonZeroU8;
 use core::str::Chars;
 use core::{cmp, fmt};

 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 #[cfg(feature = "visitor")]
 use sqlparser_derive::{Visit, VisitMut};

 use crate::dialect::Dialect;
 use crate::dialect::{
     BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
     SnowflakeDialect,
 };
 use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
 use crate::{ast::DollarQuotedString, dialect::HiveDialect};

 /// SQL Token enumeration
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub enum Token {
     /// An end-of-file marker, not a real token
     EOF,
     /// A keyword (like SELECT) or an optionally quoted SQL identifier
     Word(Word),
     /// An unsigned numeric literal
     Number(String, bool),
     /// A character that could not be tokenized
     Char(char),
     /// Single quoted string: i.e: 'string'
     SingleQuotedString(String),
     /// Double quoted string: i.e: "string"
     DoubleQuotedString(String),
     /// Triple single quoted strings: Example '''abc'''
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedString(String),
     /// Triple double quoted strings: Example """abc"""
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedString(String),
     /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
     DollarQuotedString(DollarQuotedString),
     /// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as
     /// PostgreSQL, may treat this syntax as a bit string literal instead, i.e: b'10010101')
     SingleQuotedByteStringLiteral(String),
     /// Byte string literal: i.e: b"string" or B"string"
     DoubleQuotedByteStringLiteral(String),
     /// Triple single quoted literal with byte string prefix. Example `B'''abc'''`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedByteStringLiteral(String),
     /// Triple double quoted literal with byte string prefix. Example `B"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedByteStringLiteral(String),
     /// Single quoted literal with raw string prefix. Example `R'abc'`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     SingleQuotedRawStringLiteral(String),
     /// Double quoted literal with raw string prefix. Example `R"abc"`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     DoubleQuotedRawStringLiteral(String),
     /// Triple single quoted literal with raw string prefix. Example `R'''abc'''`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedRawStringLiteral(String),
     /// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedRawStringLiteral(String),
     /// "National" string literal: i.e: N'string'
     NationalStringLiteral(String),
     /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
     EscapedStringLiteral(String),
     /// Unicode string literal: i.e: U&'first \000A second'
     UnicodeStringLiteral(String),
     /// Hexadecimal string literal: i.e.: X'deadbeef'
     HexStringLiteral(String),
     /// Comma
     Comma,
     /// Whitespace (space, tab, etc)
     Whitespace(Whitespace),
     /// Double equals sign `==`
     DoubleEq,
     /// Equality operator `=`
     Eq,
     /// Not Equals operator `<>` (or `!=` in some dialects)
     Neq,
     /// Less Than operator `<`
     Lt,
     /// Greater Than operator `>`
     Gt,
     /// Less Than Or Equals operator `<=`
     LtEq,
     /// Greater Than Or Equals operator `>=`
     GtEq,
     /// Spaceship operator <=>
     Spaceship,
     /// Plus operator `+`
     Plus,
     /// Minus operator `-`
     Minus,
     /// Multiplication operator `*`
     Mul,
     /// Division operator `/`
     Div,
     /// Integer division operator `//` in DuckDB
     DuckIntDiv,
     /// Modulo Operator `%`
     Mod,
     /// String concatenation `||`
     StringConcat,
     /// Left parenthesis `(`
     LParen,
     /// Right parenthesis `)`
     RParen,
     /// Period (used for compound identifiers or projections into nested types)
     Period,
     /// Colon `:`
     Colon,
     /// DoubleColon `::` (used for casting in PostgreSQL)
     DoubleColon,
     /// Assignment `:=` (used for keyword argument in DuckDB macros and some functions, and for variable declarations in DuckDB and Snowflake)
     Assignment,
     /// SemiColon `;` used as separator for COPY and payload
     SemiColon,
     /// Backslash `\` used in terminating the COPY payload with `\.`
     Backslash,
     /// Left bracket `[`
     LBracket,
     /// Right bracket `]`
     RBracket,
     /// Ampersand `&`
     Ampersand,
     /// Pipe `|`
     Pipe,
     /// Caret `^`
     Caret,
     /// Left brace `{`
     LBrace,
     /// Right brace `}`
     RBrace,
     /// Right Arrow `=>`
     RArrow,
     /// Sharp `#` used for PostgreSQL Bitwise XOR operator, also PostgreSQL/Redshift geometrical unary/binary operator (Number of points in path or polygon/Intersection)
     Sharp,
     /// `##` PostgreSQL/Redshift geometrical binary operator (Point of closest proximity)
     DoubleSharp,
     /// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator
     Tilde,
     /// `~*` , a case insensitive match regular expression operator in PostgreSQL
     TildeAsterisk,
     /// `!~` , a case sensitive not match regular expression operator in PostgreSQL
     ExclamationMarkTilde,
     /// `!~*` , a case insensitive not match regular expression operator in PostgreSQL
     ExclamationMarkTildeAsterisk,
     /// `~~`, a case sensitive match pattern operator in PostgreSQL
     DoubleTilde,
     /// `~~*`, a case insensitive match pattern operator in PostgreSQL
     DoubleTildeAsterisk,
     /// `!~~`, a case sensitive not match pattern operator in PostgreSQL
     ExclamationMarkDoubleTilde,
     /// `!~~*`, a case insensitive not match pattern operator in PostgreSQL
     ExclamationMarkDoubleTildeAsterisk,
     /// `<<`, a bitwise shift left operator in PostgreSQL
     ShiftLeft,
     /// `>>`, a bitwise shift right operator in PostgreSQL
     ShiftRight,
     /// `&&`, an overlap operator in PostgreSQL
     Overlap,
     /// Exclamation Mark `!` used for PostgreSQL factorial operator
     ExclamationMark,
     /// Double Exclamation Mark `!!` used for PostgreSQL prefix factorial operator
     DoubleExclamationMark,
     /// AtSign `@` used for PostgreSQL abs operator, also PostgreSQL/Redshift geometrical unary/binary operator (Center, Contained or on)
     AtSign,
     /// `^@`, a "starts with" string operator in PostgreSQL
     CaretAt,
     /// `|/`, a square root math operator in PostgreSQL
     PGSquareRoot,
     /// `||/`, a cube root math operator in PostgreSQL
     PGCubeRoot,
     /// `?` or `$` , a prepared statement arg placeholder
     Placeholder(String),
     /// `->`, used as a operator to extract json field in PostgreSQL
     Arrow,
     /// `->>`, used as a operator to extract json field as text in PostgreSQL
     LongArrow,
     /// `#>`, extracts JSON sub-object at the specified path
     HashArrow,
     /// `@-@` PostgreSQL/Redshift geometrical unary operator (Length or circumference)
     AtDashAt,
     /// `?-` PostgreSQL/Redshift geometrical unary/binary operator (Is horizontal?/Are horizontally aligned?)
     QuestionMarkDash,
     /// `&<` PostgreSQL/Redshift geometrical binary operator (Overlaps to left?)
     AmpersandLeftAngleBracket,
     /// `&>` PostgreSQL/Redshift geometrical binary operator (Overlaps to right?)`
     AmpersandRightAngleBracket,
     /// `&<|` PostgreSQL/Redshift geometrical binary operator (Does not extend above?)`
     AmpersandLeftAngleBracketVerticalBar,
     /// `|&>` PostgreSQL/Redshift geometrical binary operator (Does not extend below?)`
     VerticalBarAmpersandRightAngleBracket,
     /// `<->` PostgreSQL/Redshift geometrical binary operator (Distance between)
     TwoWayArrow,
     /// `<^` PostgreSQL/Redshift geometrical binary operator (Is below?)
     LeftAngleBracketCaret,
     /// `>^` PostgreSQL/Redshift geometrical binary operator (Is above?)
     RightAngleBracketCaret,
     /// `?#` PostgreSQL/Redshift geometrical binary operator (Intersects or overlaps)
     QuestionMarkSharp,
     /// `?-|` PostgreSQL/Redshift geometrical binary operator (Is perpendicular?)
     QuestionMarkDashVerticalBar,
     /// `?||` PostgreSQL/Redshift geometrical binary operator (Are parallel?)
     QuestionMarkDoubleVerticalBar,
     /// `~=` PostgreSQL/Redshift geometrical binary operator (Same as)
     TildeEqual,
     /// `<<| PostgreSQL/Redshift geometrical binary operator (Is strictly below?)
     ShiftLeftVerticalBar,
     /// `|>> PostgreSQL/Redshift geometrical binary operator (Is strictly above?)
     VerticalBarShiftRight,
     /// `|> BigQuery pipe operator
     VerticalBarRightAngleBracket,
     /// `#>>`, extracts JSON sub-object at the specified path as text
     HashLongArrow,
     /// jsonb @> jsonb -> boolean: Test whether left json contains the right json
     AtArrow,
     /// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
     ArrowAt,
     /// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
     /// path, where path elements can be either field keys or array indexes.
     HashMinus,
     /// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
     /// JSON value?
     AtQuestion,
     /// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
     /// for the specified JSON value. Only the first item of the result is taken into
     /// account. If the result is not Boolean, then NULL is returned.
     AtAt,
     /// jsonb ? text -> boolean: Checks whether the string exists as a top-level key within the
     /// jsonb object
     Question,
     /// jsonb ?& text[] -> boolean: Check whether all members of the text array exist as top-level
     /// keys within the jsonb object
     QuestionAnd,
     /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
     /// keys within the jsonb object
     QuestionPipe,
     /// Custom binary operator
     /// This is used to represent any custom binary operator that is not part of the SQL standard.
     /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
     CustomBinaryOperator(String),
 }

 impl fmt::Display for Token {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Token::EOF => f.write_str("EOF"),
             Token::Word(ref w) => write!(f, "{w}"),
             Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }),
             Token::Char(ref c) => write!(f, "{c}"),
             Token::SingleQuotedString(ref s) => write!(f, "'{s}'"),
             Token::TripleSingleQuotedString(ref s) => write!(f, "'''{s}'''"),
             Token::DoubleQuotedString(ref s) => write!(f, "\"{s}\""),
             Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
             Token::DollarQuotedString(ref s) => write!(f, "{s}"),
             Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
             Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
             Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
             Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
             Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
             Token::TripleSingleQuotedByteStringLiteral(ref s) => write!(f, "B'''{s}'''"),
             Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
             Token::TripleDoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"\"\"{s}\"\"\""),
             Token::SingleQuotedRawStringLiteral(ref s) => write!(f, "R'{s}'"),
             Token::DoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"{s}\""),
             Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
             Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
             Token::Comma => f.write_str(","),
             Token::Whitespace(ws) => write!(f, "{ws}"),
             Token::DoubleEq => f.write_str("=="),
             Token::Spaceship => f.write_str("<=>"),
             Token::Eq => f.write_str("="),
             Token::Neq => f.write_str("<>"),
             Token::Lt => f.write_str("<"),
             Token::Gt => f.write_str(">"),
             Token::LtEq => f.write_str("<="),
             Token::GtEq => f.write_str(">="),
             Token::Plus => f.write_str("+"),
             Token::Minus => f.write_str("-"),
             Token::Mul => f.write_str("*"),
             Token::Div => f.write_str("/"),
             Token::DuckIntDiv => f.write_str("//"),
             Token::StringConcat => f.write_str("||"),
             Token::Mod => f.write_str("%"),
             Token::LParen => f.write_str("("),
             Token::RParen => f.write_str(")"),
             Token::Period => f.write_str("."),
             Token::Colon => f.write_str(":"),
             Token::DoubleColon => f.write_str("::"),
             Token::Assignment => f.write_str(":="),
             Token::SemiColon => f.write_str(";"),
             Token::Backslash => f.write_str("\\"),
             Token::LBracket => f.write_str("["),
             Token::RBracket => f.write_str("]"),
             Token::Ampersand => f.write_str("&"),
             Token::Caret => f.write_str("^"),
             Token::Pipe => f.write_str("|"),
             Token::LBrace => f.write_str("{"),
             Token::RBrace => f.write_str("}"),
             Token::RArrow => f.write_str("=>"),
             Token::Sharp => f.write_str("#"),
             Token::DoubleSharp => f.write_str("##"),
             Token::ExclamationMark => f.write_str("!"),
             Token::DoubleExclamationMark => f.write_str("!!"),
             Token::Tilde => f.write_str("~"),
             Token::TildeAsterisk => f.write_str("~*"),
             Token::ExclamationMarkTilde => f.write_str("!~"),
             Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
             Token::DoubleTilde => f.write_str("~~"),
             Token::DoubleTildeAsterisk => f.write_str("~~*"),
             Token::ExclamationMarkDoubleTilde => f.write_str("!~~"),
             Token::ExclamationMarkDoubleTildeAsterisk => f.write_str("!~~*"),
             Token::AtSign => f.write_str("@"),
             Token::CaretAt => f.write_str("^@"),
             Token::ShiftLeft => f.write_str("<<"),
             Token::ShiftRight => f.write_str(">>"),
             Token::Overlap => f.write_str("&&"),
             Token::PGSquareRoot => f.write_str("|/"),
             Token::PGCubeRoot => f.write_str("||/"),
             Token::AtDashAt => f.write_str("@-@"),
             Token::QuestionMarkDash => f.write_str("?-"),
             Token::AmpersandLeftAngleBracket => f.write_str("&<"),
             Token::AmpersandRightAngleBracket => f.write_str("&>"),
             Token::AmpersandLeftAngleBracketVerticalBar => f.write_str("&<|"),
             Token::VerticalBarAmpersandRightAngleBracket => f.write_str("|&>"),
             Token::VerticalBarRightAngleBracket => f.write_str("|>"),
             Token::TwoWayArrow => f.write_str("<->"),
             Token::LeftAngleBracketCaret => f.write_str("<^"),
             Token::RightAngleBracketCaret => f.write_str(">^"),
             Token::QuestionMarkSharp => f.write_str("?#"),
             Token::QuestionMarkDashVerticalBar => f.write_str("?-|"),
             Token::QuestionMarkDoubleVerticalBar => f.write_str("?||"),
             Token::TildeEqual => f.write_str("~="),
             Token::ShiftLeftVerticalBar => f.write_str("<<|"),
             Token::VerticalBarShiftRight => f.write_str("|>>"),
             Token::Placeholder(ref s) => write!(f, "{s}"),
             Token::Arrow => write!(f, "->"),
             Token::LongArrow => write!(f, "->>"),
             Token::HashArrow => write!(f, "#>"),
             Token::HashLongArrow => write!(f, "#>>"),
             Token::AtArrow => write!(f, "@>"),
             Token::ArrowAt => write!(f, "<@"),
             Token::HashMinus => write!(f, "#-"),
             Token::AtQuestion => write!(f, "@?"),
             Token::AtAt => write!(f, "@@"),
             Token::Question => write!(f, "?"),
             Token::QuestionAnd => write!(f, "?&"),
             Token::QuestionPipe => write!(f, "?|"),
             Token::CustomBinaryOperator(s) => f.write_str(s),
         }
     }
 }

 impl Token {
     pub fn make_keyword(keyword: &str) -> Self {
         Token::make_word(keyword, None)
     }

     pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
         let word_uppercase = word.to_uppercase();
         Token::Word(Word {
             value: word.to_string(),
             quote_style,
             keyword: if quote_style.is_none() {
                 let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str());
                 keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x])
             } else {
                 Keyword::NoKeyword
             },
         })
     }
 }

 /// A keyword (like SELECT) or an optionally quoted SQL identifier
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Word {
     /// The value of the token, without the enclosing quotes, and with the
     /// escape sequences (if any) processed (TODO: escapes are not handled)
     pub value: String,
     /// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
     /// The standard and most implementations allow using double quotes for this,
     /// but some implementations support other quoting styles as well (e.g. \[MS SQL])
     pub quote_style: Option<char>,
     /// If the word was not quoted and it matched one of the known keywords,
     /// this will have one of the values from dialect::keywords, otherwise empty
     pub keyword: Keyword,
 }

 impl fmt::Display for Word {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.quote_style {
             Some(s) if s == '"' || s == '[' || s == '`' => {
                 write!(f, "{}{}{}", s, self.value, Word::matching_end_quote(s))
             }
             None => f.write_str(&self.value),
             _ => panic!("Unexpected quote_style!"),
         }
     }
 }

 impl Word {
     fn matching_end_quote(ch: char) -> char {
         match ch {
             '"' => '"', // ANSI and most dialects
             '[' => ']', // MS SQL
             '`' => '`', // MySQL
             _ => panic!("unexpected quoting style!"),
         }
     }
 }

 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub enum Whitespace {
     Space,
     Newline,
     Tab,
     SingleLineComment { comment: String, prefix: String },
     MultiLineComment(String),
 }

 impl fmt::Display for Whitespace {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Whitespace::Space => f.write_str(" "),
             Whitespace::Newline => f.write_str("\n"),
             Whitespace::Tab => f.write_str("\t"),
             Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"),
             Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"),
         }
     }
 }

 /// Location in input string
 ///
 /// # Create an "empty" (unknown) `Location`
 /// ```
 /// # use sqlparser::tokenizer::Location;
 /// let location = Location::empty();
 /// ```
 ///
 /// # Create a `Location` from a line and column
 /// ```
 /// # use sqlparser::tokenizer::Location;
 /// let location = Location::new(1, 1);
 /// ```
 ///
 /// # Create a `Location` from a pair
 /// ```
 /// # use sqlparser::tokenizer::Location;
 /// let location = Location::from((1, 1));
 /// ```
 #[derive(Eq, PartialEq, Hash, Clone, Copy, Ord, PartialOrd)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Location {
     /// Line number, starting from 1.
     ///
     /// Note: Line 0 is used for empty spans
     pub line: u64,
     /// Line column, starting from 1.
     ///
     /// Note: Column 0 is used for empty spans
     pub column: u64,
 }

 impl fmt::Display for Location {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         if self.line == 0 {
             return Ok(());
         }
         write!(f, " at Line: {}, Column: {}", self.line, self.column)
     }
 }

 impl fmt::Debug for Location {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "Location({},{})", self.line, self.column)
     }
 }

 impl Location {
     /// Return an "empty" / unknown location
     pub fn empty() -> Self {
         Self { line: 0, column: 0 }
     }

     /// Create a new `Location` for a given line and column
     pub fn new(line: u64, column: u64) -> Self {
         Self { line, column }
     }

     /// Create a new location for a given line and column
     ///
     /// Alias for [`Self::new`]
     // TODO: remove / deprecate in favor of` `new` for consistency?
     pub fn of(line: u64, column: u64) -> Self {
         Self::new(line, column)
     }

     /// Combine self and `end` into a new `Span`
     pub fn span_to(self, end: Self) -> Span {
         Span { start: self, end }
     }
 }

 impl From<(u64, u64)> for Location {
     fn from((line, column): (u64, u64)) -> Self {
         Self { line, column }
     }
 }

 /// A span represents a linear portion of the input string (start, end)
 ///
 /// See [Spanned](crate::ast::Spanned) for more information.
 #[derive(Eq, PartialEq, Hash, Clone, PartialOrd, Ord, Copy)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Span {
     pub start: Location,
     pub end: Location,
 }

 impl fmt::Debug for Span {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "Span({:?}..{:?})", self.start, self.end)
     }
 }

 impl Span {
     // An empty span (0, 0) -> (0, 0)
     // We need a const instance for pattern matching
     const EMPTY: Span = Self::empty();

     /// Create a new span from a start and end [`Location`]
     pub fn new(start: Location, end: Location) -> Span {
         Span { start, end }
     }

     /// Returns an empty span `(0, 0) -> (0, 0)`
     ///
     /// Empty spans represent no knowledge of source location
     /// See [Spanned](crate::ast::Spanned) for more information.
     pub const fn empty() -> Span {
         Span {
             start: Location { line: 0, column: 0 },
             end: Location { line: 0, column: 0 },
         }
     }

     /// Returns the smallest Span that contains both `self` and `other`
     /// If either span is [Span::empty], the other span is returned
     ///
     /// # Examples
     /// ```
     /// # use sqlparser::tokenizer::{Span, Location};
     /// // line 1, column1 -> line 2, column 5
     /// let span1 = Span::new(Location::new(1, 1), Location::new(2, 5));
     /// // line 2, column 3 -> line 3, column 7
     /// let span2 = Span::new(Location::new(2, 3), Location::new(3, 7));
     /// // Union of the two is the min/max of the two spans
     /// // line 1, column 1 -> line 3, column 7
     /// let union = span1.union(&span2);
     /// assert_eq!(union, Span::new(Location::new(1, 1), Location::new(3, 7)));
     /// ```
     pub fn union(&self, other: &Span) -> Span {
         // If either span is empty, return the other
         // this prevents propagating (0, 0) through the tree
         match (self, other) {
             (&Span::EMPTY, _) => *other,
             (_, &Span::EMPTY) => *self,
             _ => Span {
                 start: cmp::min(self.start, other.start),
                 end: cmp::max(self.end, other.end),
             },
         }
     }

     /// Same as [Span::union] for `Option<Span>`
     ///
     /// If `other` is `None`, `self` is returned
     pub fn union_opt(&self, other: &Option<Span>) -> Span {
         match other {
             Some(other) => self.union(other),
             None => *self,
         }
     }

     /// Return the [Span::union] of all spans in the iterator
     ///
     /// If the iterator is empty, an empty span is returned
     ///
     /// # Example
     /// ```
     /// # use sqlparser::tokenizer::{Span, Location};
     /// let spans = vec![
     ///     Span::new(Location::new(1, 1), Location::new(2, 5)),
     ///     Span::new(Location::new(2, 3), Location::new(3, 7)),
     ///     Span::new(Location::new(3, 1), Location::new(4, 2)),
     /// ];
     /// // line 1, column 1 -> line 4, column 2
     /// assert_eq!(
     ///   Span::union_iter(spans),
     ///   Span::new(Location::new(1, 1), Location::new(4, 2))
     /// );
     pub fn union_iter<I: IntoIterator<Item = Span>>(iter: I) -> Span {
         iter.into_iter()
             .reduce(|acc, item| acc.union(&item))
             .unwrap_or(Span::empty())
     }
 }

 /// Backwards compatibility struct for [`TokenWithSpan`]
 #[deprecated(since = "0.53.0", note = "please use `TokenWithSpan` instead")]
 pub type TokenWithLocation = TokenWithSpan;

 /// A [Token] with [Span] attached to it
 ///
 /// This is used to track the location of a token in the input string
 ///
 /// # Examples
 /// ```
 /// # use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan};
 /// // commas @ line 1, column 10
 /// let tok1 = TokenWithSpan::new(
 ///   Token::Comma,
 ///   Span::new(Location::new(1, 10), Location::new(1, 11)),
 /// );
 /// assert_eq!(tok1, Token::Comma); // can compare the token
 ///
 /// // commas @ line 2, column 20
 /// let tok2 = TokenWithSpan::new(
 ///   Token::Comma,
 ///   Span::new(Location::new(2, 20), Location::new(2, 21)),
 /// );
 /// // same token but different locations are not equal
 /// assert_ne!(tok1, tok2);
 /// ```
 #[derive(Debug, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct TokenWithSpan {
     pub token: Token,
     pub span: Span,
 }

 impl TokenWithSpan {
     /// Create a new [`TokenWithSpan`] from a [`Token`] and a [`Span`]
     pub fn new(token: Token, span: Span) -> Self {
         Self { token, span }
     }

     /// Wrap a token with an empty span
     pub fn wrap(token: Token) -> Self {
         Self::new(token, Span::empty())
     }

     /// Wrap a token with a location from `start` to `end`
     pub fn at(token: Token, start: Location, end: Location) -> Self {
         Self::new(token, Span::new(start, end))
     }

     /// Return an EOF token with no location
     pub fn new_eof() -> Self {
         Self::wrap(Token::EOF)
     }
 }

 impl PartialEq<Token> for TokenWithSpan {
     fn eq(&self, other: &Token) -> bool {
         &self.token == other
     }
 }

 impl PartialEq<TokenWithSpan> for Token {
     fn eq(&self, other: &TokenWithSpan) -> bool {
         self == &other.token
     }
 }

 impl fmt::Display for TokenWithSpan {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         self.token.fmt(f)
     }
 }

 /// Tokenizer error
 #[derive(Debug, PartialEq, Eq)]
 pub struct TokenizerError {
     pub message: String,
     pub location: Location,
 }

 impl fmt::Display for TokenizerError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "{}{}", self.message, self.location,)
     }
 }

 #[cfg(feature = "std")]
 impl std::error::Error for TokenizerError {}

 struct State<'a> {
     peekable: Peekable<Chars<'a>>,
     pub line: u64,
     pub col: u64,
 }

 impl State<'_> {
     /// return the next character and advance the stream
     pub fn next(&mut self) -> Option<char> {
         match self.peekable.next() {
             None => None,
             Some(s) => {
                 if s == '\n' {
                     self.line += 1;
                     self.col = 1;
                 } else {
                     self.col += 1;
                 }
                 Some(s)
             }
         }
     }

     /// return the next character but do not advance the stream
     pub fn peek(&mut self) -> Option<&char> {
         self.peekable.peek()
     }

     pub fn location(&self) -> Location {
         Location {
             line: self.line,
             column: self.col,
         }
     }
 }

 /// Represents how many quote characters enclose a string literal.
 #[derive(Copy, Clone)]
 enum NumStringQuoteChars {
     /// e.g. `"abc"`, `'abc'`, `r'abc'`
     One,
     /// e.g. `"""abc"""`, `'''abc'''`, `r'''abc'''`
     Many(NonZeroU8),
 }

 /// Settings for tokenizing a quoted string literal.
 struct TokenizeQuotedStringSettings {
     /// The character used to quote the string.
     quote_style: char,
     /// Represents how many quotes characters enclose the string literal.
     num_quote_chars: NumStringQuoteChars,
     /// The number of opening quotes left to consume, before parsing
     /// the remaining string literal.
     /// For example: given initial string `"""abc"""`. If the caller has
     /// already parsed the first quote for some reason, then this value
     /// is set to 1, flagging to look to consume only 2 leading quotes.
     num_opening_quotes_to_consume: u8,
     /// True if the string uses backslash escaping of special characters
     /// e.g `'abc\ndef\'ghi'
     backslash_escape: bool,
 }

 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
     dialect: &'a dyn Dialect,
     query: &'a str,
     /// If true (the default), the tokenizer will un-escape literal
     /// SQL strings See [`Tokenizer::with_unescape`] for more details.
     unescape: bool,
 }

 impl<'a> Tokenizer<'a> {
     /// Create a new SQL tokenizer for the specified SQL statement
     ///
     /// ```
     /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer};
     /// # use sqlparser::dialect::GenericDialect;
     /// # let dialect = GenericDialect{};
     /// let query = r#"SELECT 'foo'"#;
     ///
     /// // Parsing the query
     /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
     ///
     /// assert_eq!(tokens, vec![
     ///   Token::make_word("SELECT", None),
     ///   Token::Whitespace(Whitespace::Space),
     ///   Token::SingleQuotedString("foo".to_string()),
     /// ]);
     pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
         Self {
             dialect,
             query,
             unescape: true,
         }
     }

     /// Set unescape mode
     ///
     /// When true (default) the tokenizer unescapes literal values
     /// (for example, `""` in SQL is unescaped to the literal `"`).
     ///
     /// When false, the tokenizer provides the raw strings as provided
     /// in the query.  This can be helpful for programs that wish to
     /// recover the *exact* original query text without normalizing
     /// the escaping
     ///
     /// # Example
     ///
     /// ```
     /// # use sqlparser::tokenizer::{Token, Tokenizer};
     /// # use sqlparser::dialect::GenericDialect;
     /// # let dialect = GenericDialect{};
     /// let query = r#""Foo "" Bar""#;
     /// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"'));
     /// let original  = Token::make_word(r#"Foo "" Bar"#, Some('"'));
     ///
     /// // Parsing with unescaping (default)
     /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
     /// assert_eq!(tokens, vec![unescaped]);
     ///
     /// // Parsing with unescape = false
     /// let tokens = Tokenizer::new(&dialect, &query)
     ///    .with_unescape(false)
     ///    .tokenize().unwrap();
     /// assert_eq!(tokens, vec![original]);
     /// ```
     pub fn with_unescape(mut self, unescape: bool) -> Self {
         self.unescape = unescape;
         self
     }

     /// Tokenize the statement and produce a vector of tokens
     pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
         let twl = self.tokenize_with_location()?;
         Ok(twl.into_iter().map(|t| t.token).collect())
     }

     /// Tokenize the statement and produce a vector of tokens with location information
     pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithSpan>, TokenizerError> {
         let mut tokens: Vec<TokenWithSpan> = vec![];
         self.tokenize_with_location_into_buf(&mut tokens)
             .map(|_| tokens)
     }

     /// Tokenize the statement and append tokens with location information into the provided buffer.
     /// If an error is thrown, the buffer will contain all tokens that were successfully parsed before the error.
     pub fn tokenize_with_location_into_buf(
         &mut self,
         buf: &mut Vec<TokenWithSpan>,
     ) -> Result<(), TokenizerError> {
         let mut state = State {
             peekable: self.query.chars().peekable(),
             line: 1,
             col: 1,
         };

         let mut location = state.location();
         while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
             let span = location.span_to(state.location());

             buf.push(TokenWithSpan { token, span });

             location = state.location();
         }
         Ok(())
     }

     // Tokenize the identifier or keywords in `ch`
     fn tokenize_identifier_or_keyword(
         &self,
         ch: impl IntoIterator<Item = char>,
         chars: &mut State,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next(); // consume the first char
         let ch: String = ch.into_iter().collect();
         let word = self.tokenize_word(ch, chars);

         // TODO: implement parsing of exponent here
         if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
             let mut inner_state = State {
                 peekable: word.chars().peekable(),
                 line: 0,
                 col: 0,
             };
             let mut s = peeking_take_while(&mut inner_state, |ch| matches!(ch, '0'..='9' | '.'));
             let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
             s += s2.as_str();
             return Ok(Some(Token::Number(s, false)));
         }

         Ok(Some(Token::make_word(&word, None)))
     }

     /// Get the next token or return None
     fn next_token(
         &self,
         chars: &mut State,
         prev_token: Option<&Token>,
     ) -> Result<Option<Token>, TokenizerError> {
         match chars.peek() {
             Some(&ch) => match ch {
                 ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
                 '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
                 '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
                 '\r' => {
                     // Emit a single Whitespace::Newline token for \r and \r\n
                     chars.next();
                     if let Some('\n') = chars.peek() {
                         chars.next();
                     }
                     Ok(Some(Token::Whitespace(Whitespace::Newline)))
                 }
                 // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
                 b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
                 {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('\'') => {
                             if self.dialect.supports_triple_quoted_string() {
                                 return self
                                     .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                         chars,
                                         '\'',
                                         false,
                                         Token::SingleQuotedByteStringLiteral,
                                         Token::TripleSingleQuotedByteStringLiteral,
                                     );
                             }
                             let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
                             Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
                         }
                         Some('\"') => {
                             if self.dialect.supports_triple_quoted_string() {
                                 return self
                                     .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                         chars,
                                         '"',
                                         false,
                                         Token::DoubleQuotedByteStringLiteral,
                                         Token::TripleDoubleQuotedByteStringLiteral,
                                     );
                             }
                             let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
                             Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "b" or "B"
                             let s = self.tokenize_word(b, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // BigQuery uses r or R for raw string literal
                 b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('\'') => self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '\'',
                                 false,
                                 Token::SingleQuotedRawStringLiteral,
                                 Token::TripleSingleQuotedRawStringLiteral,
                             ),
                         Some('\"') => self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '"',
                                 false,
                                 Token::DoubleQuotedRawStringLiteral,
                                 Token::TripleDoubleQuotedRawStringLiteral,
                             ),
                         _ => {
                             // regular identifier starting with an "r" or "R"
                             let s = self.tokenize_word(b, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // Redshift uses lower case n for national string literal
                 n @ 'N' | n @ 'n' => {
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             // N'...' - a <national character string literal>
                             let backslash_escape =
                                 self.dialect.supports_string_literal_backslash_escape();
                             let s =
                                 self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
                             Ok(Some(Token::NationalStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "N"
                             let s = self.tokenize_word(n, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
                 x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
                     let starting_loc = chars.location();
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             let s =
                                 self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
                             Ok(Some(Token::EscapedStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "E" or "e"
                             let s = self.tokenize_word(x, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL
                 x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => {
                     chars.next(); // consume, to check the next char
                     if chars.peek() == Some(&'&') {
                         // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier
                         let mut chars_clone = chars.peekable.clone();
                         chars_clone.next(); // consume the '&' in the clone
                         if chars_clone.peek() == Some(&'\'') {
                             chars.next(); // consume the '&' in the original iterator
                             let s = unescape_unicode_single_quoted_string(chars)?;
                             return Ok(Some(Token::UnicodeStringLiteral(s)));
                         }
                     }
                     // regular identifier starting with an "U" or "u"
                     let s = self.tokenize_word(x, chars);
                     Ok(Some(Token::make_word(&s, None)))
                 }
                 // The spec only allows an uppercase 'X' to introduce a hex
                 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
                 x @ 'x' | x @ 'X' => {
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             // X'...' - a <binary string literal>
                             let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
                             Ok(Some(Token::HexStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "X"
                             let s = self.tokenize_word(x, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // single quoted string
                 '\'' => {
                     if self.dialect.supports_triple_quoted_string() {
                         return self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '\'',
                                 self.dialect.supports_string_literal_backslash_escape(),
                                 Token::SingleQuotedString,
                                 Token::TripleSingleQuotedString,
                             );
                     }
                     let s = self.tokenize_single_quoted_string(
                         chars,
                         '\'',
                         self.dialect.supports_string_literal_backslash_escape(),
                     )?;

                     Ok(Some(Token::SingleQuotedString(s)))
                 }
                 // double quoted string
                 '\"' if !self.dialect.is_delimited_identifier_start(ch)
                     && !self.dialect.is_identifier_start(ch) =>
                 {
                     if self.dialect.supports_triple_quoted_string() {
                         return self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '"',
                                 self.dialect.supports_string_literal_backslash_escape(),
                                 Token::DoubleQuotedString,
                                 Token::TripleDoubleQuotedString,
                             );
                     }
                     let s = self.tokenize_single_quoted_string(
                         chars,
                         '"',
                         self.dialect.supports_string_literal_backslash_escape(),
                     )?;

                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
                 quote_start if self.dialect.is_delimited_identifier_start(ch) => {
                     let word = self.tokenize_quoted_identifier(quote_start, chars)?;
                     Ok(Some(Token::make_word(&word, Some(quote_start))))
                 }
                 // Potentially nested delimited (quoted) identifier
                 quote_start
                     if self
                         .dialect
                         .is_nested_delimited_identifier_start(quote_start)
                         && self
                             .dialect
                             .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
                             .is_some() =>
                 {
                     let Some((quote_start, nested_quote_start)) = self
                         .dialect
                         .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
                     else {
                         return self.tokenizer_error(
                             chars.location(),
                             format!("Expected nested delimiter '{quote_start}' before EOF."),
                         );
                     };

                     let Some(nested_quote_start) = nested_quote_start else {
                         let word = self.tokenize_quoted_identifier(quote_start, chars)?;
                         return Ok(Some(Token::make_word(&word, Some(quote_start))));
                     };

                     let mut word = vec![];
                     let quote_end = Word::matching_end_quote(quote_start);
                     let nested_quote_end = Word::matching_end_quote(nested_quote_start);
                     let error_loc = chars.location();

                     chars.next(); // skip the first delimiter
                     peeking_take_while(chars, |ch| ch.is_whitespace());
                     if chars.peek() != Some(&nested_quote_start) {
                         return self.tokenizer_error(
                             error_loc,
                             format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
                         );
                     }
                     word.push(nested_quote_start.into());
                     word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
                     word.push(nested_quote_end.into());
                     peeking_take_while(chars, |ch| ch.is_whitespace());
                     if chars.peek() != Some(&quote_end) {
                         return self.tokenizer_error(
                             error_loc,
                             format!("Expected close delimiter '{quote_end}' before EOF."),
                         );
                     }
                     chars.next(); // skip close delimiter

                     Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
                 }
                 // numbers and period
                 '0'..='9' | '.' => {
                     // special case where if ._ is encountered after a word then that word
                     // is a table and the _ is the start of the col name.
                     // if the prev token is not a word, then this is not a valid sql
                     // word or number.
                     if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
                         if let Some(Token::Word(_)) = prev_token {
                             chars.next();
                             return Ok(Some(Token::Period));
                         }

                         return self.tokenizer_error(
                             chars.location(),
                             "Unexpected character '_'".to_string(),
                         );
                     }

                     // Some dialects support underscore as number separator
                     // There can only be one at a time and it must be followed by another digit
                     let is_number_separator = |ch: char, next_char: Option<char>| {
                         self.dialect.supports_numeric_literal_underscores()
                             && ch == '_'
                             && next_char.is_some_and(|next_ch| next_ch.is_ascii_hexdigit())
                     };

                     let mut s = peeking_next_take_while(chars, |ch, next_ch| {
                         ch.is_ascii_digit() || is_number_separator(ch, next_ch)
                     });

                     // match binary literal that starts with 0x
                     if s == "0" && chars.peek() == Some(&'x') {
                         chars.next();
                         let s2 = peeking_next_take_while(chars, |ch, next_ch| {
                             ch.is_ascii_hexdigit() || is_number_separator(ch, next_ch)
                         });
                         return Ok(Some(Token::HexStringLiteral(s2)));
                     }

                     // match one period
                     if let Some('.') = chars.peek() {
                         s.push('.');
                         chars.next();
                     }

                     // If the dialect supports identifiers that start with a numeric prefix
                     // and we have now consumed a dot, check if the previous token was a Word.
                     // If so, what follows is definitely not part of a decimal number and
                     // we should yield the dot as a dedicated token so compound identifiers
                     // starting with digits can be parsed correctly.
                     if s == "." && self.dialect.supports_numeric_prefix() {
                         if let Some(Token::Word(_)) = prev_token {
                             return Ok(Some(Token::Period));
                         }
                     }

                     // Consume fractional digits.
                     s += &peeking_next_take_while(chars, |ch, next_ch| {
                         ch.is_ascii_digit() || is_number_separator(ch, next_ch)
                     });

                     // No fraction -> Token::Period
                     if s == "." {
                         return Ok(Some(Token::Period));
                     }

                     // Parse exponent as number
                     let mut exponent_part = String::new();
                     if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
                         let mut char_clone = chars.peekable.clone();
                         exponent_part.push(char_clone.next().unwrap());

                         // Optional sign
                         match char_clone.peek() {
                             Some(&c) if matches!(c, '+' | '-') => {
                                 exponent_part.push(c);
                                 char_clone.next();
                             }
                             _ => (),
                         }

                         match char_clone.peek() {
                             // Definitely an exponent, get original iterator up to speed and use it
                             Some(&c) if c.is_ascii_digit() => {
                                 for _ in 0..exponent_part.len() {
                                     chars.next();
                                 }
                                 exponent_part +=
                                     &peeking_take_while(chars, |ch| ch.is_ascii_digit());
                                 s += exponent_part.as_str();
                             }
                             // Not an exponent, discard the work done
                             _ => (),
                         }
                     }

                     // If the dialect supports identifiers that start with a numeric prefix,
                     // we need to check if the value is in fact an identifier and must thus
                     // be tokenized as a word.
                     if self.dialect.supports_numeric_prefix() {
                         if exponent_part.is_empty() {
                             // If it is not a number with an exponent, it may be
                             // an identifier starting with digits.
                             let word =
                                 peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));

                             if !word.is_empty() {
                                 s += word.as_str();
                                 return Ok(Some(Token::make_word(s.as_str(), None)));
                             }
                         } else if prev_token == Some(&Token::Period) {
                             // If the previous token was a period, thus not belonging to a number,
                             // the value we have is part of an identifier.
                             return Ok(Some(Token::make_word(s.as_str(), None)));
                         }
                     }

                     let long = if chars.peek() == Some(&'L') {
                         chars.next();
                         true
                     } else {
                         false
                     };
                     Ok(Some(Token::Number(s, long)))
                 }
                 // punctuation
                 '(' => self.consume_and_return(chars, Token::LParen),
                 ')' => self.consume_and_return(chars, Token::RParen),
                 ',' => self.consume_and_return(chars, Token::Comma),
                 // operators
                 '-' => {
                     chars.next(); // consume the '-'

                     match chars.peek() {
                         Some('-') => {
                             let mut is_comment = true;
                             if self.dialect.requires_single_line_comment_whitespace() {
                                 is_comment = Some(' ') == chars.peekable.clone().nth(1);
                             }

                             if is_comment {
                                 chars.next(); // consume second '-'
                                 let comment = self.tokenize_single_line_comment(chars);
                                 return Ok(Some(Token::Whitespace(
                                     Whitespace::SingleLineComment {
                                         prefix: "--".to_owned(),
                                         comment,
                                     },
                                 )));
                             }

                             self.start_binop(chars, "-", Token::Minus)
                         }
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
                                 _ => self.start_binop(chars, "->", Token::Arrow),
                             }
                         }
                         // a regular '-' operator
                         _ => self.start_binop(chars, "-", Token::Minus),
                     }
                 }
                 '/' => {
                     chars.next(); // consume the '/'
                     match chars.peek() {
                         Some('*') => {
                             chars.next(); // consume the '*', starting a multi-line comment
                             self.tokenize_multiline_comment(chars)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
                             let comment = self.tokenize_single_line_comment(chars);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                 prefix: "//".to_owned(),
                                 comment,
                             })))
                         }
                         Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
                             self.consume_and_return(chars, Token::DuckIntDiv)
                         }
                         // a regular '/' operator
                         _ => Ok(Some(Token::Div)),
                     }
                 }
                 '+' => self.consume_and_return(chars, Token::Plus),
                 '*' => self.consume_and_return(chars, Token::Mul),
                 '%' => {
                     chars.next(); // advance past '%'
                     match chars.peek() {
                         Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
                         Some(sch) if self.dialect.is_identifier_start('%') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => self.start_binop(chars, "%", Token::Mod),
                     }
                 }
                 '|' => {
                     chars.next(); // consume the '|'
                     match chars.peek() {
                         Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
                         Some('|') => {
                             chars.next(); // consume the second '|'
                             match chars.peek() {
                                 Some('/') => {
                                     self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
                                 }
                                 _ => self.start_binop(chars, "||", Token::StringConcat),
                             }
                         }
                         Some('&') if self.dialect.supports_geometric_types() => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(
                                     chars,
                                     "|&>",
                                     Token::VerticalBarAmpersandRightAngleBracket,
                                 ),
                                 _ => self.start_binop_opt(chars, "|&", None),
                             }
                         }
                         Some('>') if self.dialect.supports_geometric_types() => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(
                                     chars,
                                     "|>>",
                                     Token::VerticalBarShiftRight,
                                 ),
                                 _ => self.start_binop_opt(chars, "|>", None),
                             }
                         }
                         Some('>') if self.dialect.supports_pipe_operator() => {
                             self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket)
                         }
                         // Bitshift '|' operator
                         _ => self.start_binop(chars, "|", Token::Pipe),
                     }
                 }
                 '=' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('>') => self.consume_and_return(chars, Token::RArrow),
                         Some('=') => self.consume_and_return(chars, Token::DoubleEq),
                         _ => Ok(Some(Token::Eq)),
                     }
                 }
                 '!' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => self.consume_and_return(chars, Token::Neq),
                         Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => self
                                     .consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
                                 Some('~') => {
                                     chars.next();
                                     match chars.peek() {
                                         Some('*') => self.consume_and_return(
                                             chars,
                                             Token::ExclamationMarkDoubleTildeAsterisk,
                                         ),
                                         _ => Ok(Some(Token::ExclamationMarkDoubleTilde)),
                                     }
                                 }
                                 _ => Ok(Some(Token::ExclamationMarkTilde)),
                             }
                         }
                         _ => Ok(Some(Token::ExclamationMark)),
                     }
                 }
                 '<' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
                                 _ => self.start_binop(chars, "<=", Token::LtEq),
                             }
                         }
                         Some('|') if self.dialect.supports_geometric_types() => {
                             self.consume_for_binop(chars, "<<|", Token::ShiftLeftVerticalBar)
                         }
                         Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
                         Some('<') if self.dialect.supports_geometric_types() => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('|') => self.consume_for_binop(
                                     chars,
                                     "<<|",
                                     Token::ShiftLeftVerticalBar,
                                 ),
                                 _ => self.start_binop(chars, "<<", Token::ShiftLeft),
                             }
                         }
                         Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
                         Some('-') if self.dialect.supports_geometric_types() => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('>') => {
                                     self.consume_for_binop(chars, "<->", Token::TwoWayArrow)
                                 }
                                 _ => self.start_binop_opt(chars, "<-", None),
                             }
                         }
                         Some('^') if self.dialect.supports_geometric_types() => {
                             self.consume_for_binop(chars, "<^", Token::LeftAngleBracketCaret)
                         }
                         Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
                         _ => self.start_binop(chars, "<", Token::Lt),
                     }
                 }
                 '>' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
                         Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
                         Some('^') if self.dialect.supports_geometric_types() => {
                             self.consume_for_binop(chars, ">^", Token::RightAngleBracketCaret)
                         }
                         _ => self.start_binop(chars, ">", Token::Gt),
                     }
                 }
                 ':' => {
                     chars.next();
                     match chars.peek() {
                         Some(':') => self.consume_and_return(chars, Token::DoubleColon),
                         Some('=') => self.consume_and_return(chars, Token::Assignment),
                         _ => Ok(Some(Token::Colon)),
                     }
                 }
                 ';' => self.consume_and_return(chars, Token::SemiColon),
                 '\\' => self.consume_and_return(chars, Token::Backslash),
                 '[' => self.consume_and_return(chars, Token::LBracket),
                 ']' => self.consume_and_return(chars, Token::RBracket),
                 '&' => {
                     chars.next(); // consume the '&'
                     match chars.peek() {
                         Some('>') if self.dialect.supports_geometric_types() => {
                             chars.next();
                             self.consume_and_return(chars, Token::AmpersandRightAngleBracket)
                         }
                         Some('<') if self.dialect.supports_geometric_types() => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('|') => self.consume_and_return(
                                     chars,
                                     Token::AmpersandLeftAngleBracketVerticalBar,
                                 ),
                                 _ => {
                                     self.start_binop(chars, "&<", Token::AmpersandLeftAngleBracket)
                                 }
                             }
                         }
                         Some('&') => {
                             chars.next(); // consume the second '&'
                             self.start_binop(chars, "&&", Token::Overlap)
                         }
                         // Bitshift '&' operator
                         _ => self.start_binop(chars, "&", Token::Ampersand),
                     }
                 }
                 '^' => {
                     chars.next(); // consume the '^'
                     match chars.peek() {
                         Some('@') => self.consume_and_return(chars, Token::CaretAt),
                         _ => Ok(Some(Token::Caret)),
                     }
                 }
                 '{' => self.consume_and_return(chars, Token::LBrace),
                 '}' => self.consume_and_return(chars, Token::RBrace),
                 '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                 {
                     chars.next(); // consume the '#', starting a snowflake single-line comment
                     let comment = self.tokenize_single_line_comment(chars);
                     Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "#".to_owned(),
                         comment,
                     })))
                 }
                 '~' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
                         Some('=') if self.dialect.supports_geometric_types() => {
                             self.consume_for_binop(chars, "~=", Token::TildeEqual)
                         }
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => {
                                     self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
                                 }
                                 _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                             }
                         }
                         _ => self.start_binop(chars, "~", Token::Tilde),
                     }
                 }
                 '#' => {
                     chars.next();
                     match chars.peek() {
                         Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => {
                                     self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
                                 }
                                 _ => self.start_binop(chars, "#>", Token::HashArrow),
                             }
                         }
                         Some(' ') => Ok(Some(Token::Sharp)),
                         Some('#') if self.dialect.supports_geometric_types() => {
                             self.consume_for_binop(chars, "##", Token::DoubleSharp)
                         }
                         Some(sch) if self.dialect.is_identifier_start('#') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => self.start_binop(chars, "#", Token::Sharp),
                     }
                 }
                 '@' => {
                     chars.next();
                     match chars.peek() {
                         Some('@') if self.dialect.supports_geometric_types() => {
                             self.consume_and_return(chars, Token::AtAt)
                         }
                         Some('-') if self.dialect.supports_geometric_types() => {
                             chars.next();
                             match chars.peek() {
                                 Some('@') => self.consume_and_return(chars, Token::AtDashAt),
                                 _ => self.start_binop_opt(chars, "@-", None),
                             }
                         }
                         Some('>') => self.consume_and_return(chars, Token::AtArrow),
                         Some('?') => self.consume_and_return(chars, Token::AtQuestion),
                         Some('@') => {
                             chars.next();
                             match chars.peek() {
                                 Some(' ') => Ok(Some(Token::AtAt)),
                                 Some(tch) if self.dialect.is_identifier_start('@') => {
                                     self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
                                 }
                                 _ => Ok(Some(Token::AtAt)),
                             }
                         }
                         Some(' ') => Ok(Some(Token::AtSign)),
                         // We break on quotes here, because no dialect allows identifiers starting
                         // with @ and containing quotation marks (e.g. `@'foo'`) unless they are
                         // quoted, which is tokenized as a quoted string, not here (e.g.
                         // `"@'foo'"`). Further, at least two dialects parse `@` followed by a
                         // quoted string as two separate tokens, which this allows. For example,
                         // Postgres parses `@'1'` as the absolute value of '1' which is implicitly
                         // cast to a numeric type. And when parsing MySQL-style grantees (e.g.
                         // `GRANT ALL ON *.* to 'root'@'localhost'`), we also want separate tokens
                         // for the user, the `@`, and the host.
                         Some('\'') => Ok(Some(Token::AtSign)),
                         Some('\"') => Ok(Some(Token::AtSign)),
                         Some('`') => Ok(Some(Token::AtSign)),
                         Some(sch) if self.dialect.is_identifier_start('@') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => Ok(Some(Token::AtSign)),
                     }
                 }
                 // Postgres uses ? for jsonb operators, not prepared statements
                 '?' if self.dialect.supports_geometric_types() => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('|') => {
                             chars.next();
                             match chars.peek() {
                                 Some('|') => self.consume_and_return(
                                     chars,
                                     Token::QuestionMarkDoubleVerticalBar,
                                 ),
                                 _ => Ok(Some(Token::QuestionPipe)),
                             }
                         }

                         Some('&') => self.consume_and_return(chars, Token::QuestionAnd),
                         Some('-') => {
                             chars.next(); // consume
                             match chars.peek() {
                                 Some('|') => self
                                     .consume_and_return(chars, Token::QuestionMarkDashVerticalBar),
                                 _ => Ok(Some(Token::QuestionMarkDash)),
                             }
                         }
                         Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp),
                         _ => self.consume_and_return(chars, Token::Question),
                     }
                 }
                 '?' => {
                     chars.next();
                     let s = peeking_take_while(chars, |ch| ch.is_numeric());
                     Ok(Some(Token::Placeholder(String::from("?") + &s)))
                 }

                 // identifier or keyword
                 ch if self.dialect.is_identifier_start(ch) => {
                     self.tokenize_identifier_or_keyword([ch], chars)
                 }
                 '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),

                 // whitespace check (including unicode chars) should be last as it covers some of the chars above
                 ch if ch.is_whitespace() => {
                     self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
                 }
                 other => self.consume_and_return(chars, Token::Char(other)),
             },
             None => Ok(None),
         }
     }

     /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
     fn consume_for_binop(
         &self,
         chars: &mut State,
         prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next(); // consume the first char
         self.start_binop_opt(chars, prefix, Some(default))
     }

     /// parse a custom binary operator
     fn start_binop(
         &self,
         chars: &mut State,
         prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         self.start_binop_opt(chars, prefix, Some(default))
     }

     /// parse a custom binary operator
     fn start_binop_opt(
         &self,
         chars: &mut State,
         prefix: &str,
         default: Option<Token>,
     ) -> Result<Option<Token>, TokenizerError> {
         let mut custom = None;
         while let Some(&ch) = chars.peek() {
             if !self.dialect.is_custom_operator_part(ch) {
                 break;
             }

             custom.get_or_insert_with(|| prefix.to_string()).push(ch);
             chars.next();
         }
         match (custom, default) {
             (Some(custom), _) => Ok(Token::CustomBinaryOperator(custom).into()),
             (None, Some(tok)) => Ok(Some(tok)),
             (None, None) => self.tokenizer_error(
                 chars.location(),
                 format!("Expected a valid binary operator after '{prefix}'"),
             ),
         }
     }

     /// Tokenize dollar preceded value (i.e: a string/placeholder)
     fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
         let mut s = String::new();
         let mut value = String::new();

         chars.next();

         // If the dialect does not support dollar-quoted strings, then `$$` is rather a placeholder.
         if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
             chars.next();

             let mut is_terminated = false;
             let mut prev: Option<char> = None;

             while let Some(&ch) = chars.peek() {
                 if prev == Some('$') {
                     if ch == '$' {
                         chars.next();
                         is_terminated = true;
                         break;
                     } else {
                         s.push('$');
                         s.push(ch);
                     }
                 } else if ch != '$' {
                     s.push(ch);
                 }

                 prev = Some(ch);
                 chars.next();
             }

             return if chars.peek().is_none() && !is_terminated {
                 self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
             } else {
                 Ok(Token::DollarQuotedString(DollarQuotedString {
                     value: s,
                     tag: None,
                 }))
             };
         } else {
             value.push_str(&peeking_take_while(chars, |ch| {
                 ch.is_alphanumeric()
                     || ch == '_'
                     // Allow $ as a placeholder character if the dialect supports it
                     || matches!(ch, '$' if self.dialect.supports_dollar_placeholder())
             }));

             // If the dialect does not support dollar-quoted strings, don't look for the end delimiter.
             if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
                 chars.next();

                 let mut temp = String::new();
                 let end_delimiter = format!("${value}$");

                 loop {
                     match chars.next() {
                         Some(ch) => {
                             temp.push(ch);

                             if temp.ends_with(&end_delimiter) {
                                 if let Some(temp) = temp.strip_suffix(&end_delimiter) {
                                     s.push_str(temp);
                                 }
                                 break;
                             }
                         }
                         None => {
                             if temp.ends_with(&end_delimiter) {
                                 if let Some(temp) = temp.strip_suffix(&end_delimiter) {
                                     s.push_str(temp);
                                 }
                                 break;
                             }

                             return self.tokenizer_error(
                                 chars.location(),
                                 "Unterminated dollar-quoted, expected $",
                             );
                         }
                     }
                 }
             } else {
                 return Ok(Token::Placeholder(String::from("$") + &value));
             }
         }

         Ok(Token::DollarQuotedString(DollarQuotedString {
             value: s,
             tag: if value.is_empty() { None } else { Some(value) },
         }))
     }

     fn tokenizer_error<R>(
         &self,
         loc: Location,
         message: impl Into<String>,
     ) -> Result<R, TokenizerError> {
         Err(TokenizerError {
             message: message.into(),
             location: loc,
         })
     }

     // Consume characters until newline
     fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
         let mut comment = peeking_take_while(chars, |ch| match ch {
             '\n' => false,                                           // Always stop at \n
             '\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop at \r for Postgres
             _ => true, // Keep consuming for other characters
         });

         if let Some(ch) = chars.next() {
             assert!(ch == '\n' || ch == '\r');
             comment.push(ch);
         }

         comment
     }

     /// Tokenize an identifier or keyword, after the first char is already consumed.
     fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
         let mut s = first_chars.into();
         s.push_str(&peeking_take_while(chars, |ch| {
             self.dialect.is_identifier_part(ch)
         }));
         s
     }

     /// Read a quoted identifier
     fn tokenize_quoted_identifier(
         &self,
         quote_start: char,
         chars: &mut State,
     ) -> Result<String, TokenizerError> {
         let error_loc = chars.location();
         chars.next(); // consume the opening quote
         let quote_end = Word::matching_end_quote(quote_start);
         let (s, last_char) = self.parse_quoted_ident(chars, quote_end);

         if last_char == Some(quote_end) {
             Ok(s)
         } else {
             self.tokenizer_error(
                 error_loc,
                 format!("Expected close delimiter '{quote_end}' before EOF."),
             )
         }
     }

     /// Read a single quoted string, starting with the opening quote.
     fn tokenize_escaped_single_quoted_string(
         &self,
         starting_loc: Location,
         chars: &mut State,
     ) -> Result<String, TokenizerError> {
         if let Some(s) = unescape_single_quoted_string(chars) {
             return Ok(s);
         }

         self.tokenizer_error(starting_loc, "Unterminated encoded string literal")
     }

     /// Reads a string literal quoted by a single or triple quote characters.
     /// Examples: `'abc'`, `'''abc'''`, `"""abc"""`.
     fn tokenize_single_or_triple_quoted_string<F>(
         &self,
         chars: &mut State,
         quote_style: char,
         backslash_escape: bool,
         single_quote_token: F,
         triple_quote_token: F,
     ) -> Result<Option<Token>, TokenizerError>
     where
         F: Fn(String) -> Token,
     {
         let error_loc = chars.location();

         let mut num_opening_quotes = 0u8;
         for _ in 0..3 {
             if Some(&quote_style) == chars.peek() {
                 chars.next(); // Consume quote.
                 num_opening_quotes += 1;
             } else {
                 break;
             }
         }

         let (token_fn, num_quote_chars) = match num_opening_quotes {
             1 => (single_quote_token, NumStringQuoteChars::One),
             2 => {
                 // If we matched double quotes, then this is an empty string.
                 return Ok(Some(single_quote_token("".into())));
             }
             3 => {
                 let Some(num_quote_chars) = NonZeroU8::new(3) else {
                     return self.tokenizer_error(error_loc, "invalid number of opening quotes");
                 };
                 (
                     triple_quote_token,
                     NumStringQuoteChars::Many(num_quote_chars),
                 )
             }
             _ => {
                 return self.tokenizer_error(error_loc, "invalid string literal opening");
             }
         };

         let settings = TokenizeQuotedStringSettings {
             quote_style,
             num_quote_chars,
             num_opening_quotes_to_consume: 0,
             backslash_escape,
         };

         self.tokenize_quoted_string(chars, settings)
             .map(token_fn)
             .map(Some)
     }

     /// Reads a string literal quoted by a single quote character.
     fn tokenize_single_quoted_string(
         &self,
         chars: &mut State,
         quote_style: char,
         backslash_escape: bool,
     ) -> Result<String, TokenizerError> {
         self.tokenize_quoted_string(
             chars,
             TokenizeQuotedStringSettings {
                 quote_style,
                 num_quote_chars: NumStringQuoteChars::One,
                 num_opening_quotes_to_consume: 1,
                 backslash_escape,
             },
         )
     }

     /// Read a quoted string.
     fn tokenize_quoted_string(
         &self,
         chars: &mut State,
         settings: TokenizeQuotedStringSettings,
     ) -> Result<String, TokenizerError> {
         let mut s = String::new();
         let error_loc = chars.location();

         // Consume any opening quotes.
         for _ in 0..settings.num_opening_quotes_to_consume {
             if Some(settings.quote_style) != chars.next() {
                 return self.tokenizer_error(error_loc, "invalid string literal opening");
             }
         }

         let mut num_consecutive_quotes = 0;
         while let Some(&ch) = chars.peek() {
             let pending_final_quote = match settings.num_quote_chars {
                 NumStringQuoteChars::One => Some(NumStringQuoteChars::One),
                 n @ NumStringQuoteChars::Many(count)
                     if num_consecutive_quotes + 1 == count.get() =>
                 {
                     Some(n)
                 }
                 NumStringQuoteChars::Many(_) => None,
             };

             match ch {
                 char if char == settings.quote_style && pending_final_quote.is_some() => {
                     chars.next(); // consume

                     if let Some(NumStringQuoteChars::Many(count)) = pending_final_quote {
                         // For an initial string like `"""abc"""`, at this point we have
                         // `abc""` in the buffer and have now matched the final `"`.
                         // However, the string to return is simply `abc`, so we strip off
                         // the trailing quotes before returning.
                         let mut buf = s.chars();
                         for _ in 1..count.get() {
                             buf.next_back();
                         }
                         return Ok(buf.as_str().to_string());
                     } else if chars
                         .peek()
                         .map(|c| *c == settings.quote_style)
                         .unwrap_or(false)
                     {
                         s.push(ch);
                         if !self.unescape {
                             // In no-escape mode, the given query has to be saved completely
                             s.push(ch);
                         }
                         chars.next();
                     } else {
                         return Ok(s);
                     }
                 }
                 '\\' if settings.backslash_escape => {
                     // consume backslash
                     chars.next();

                     num_consecutive_quotes = 0;

                     if let Some(next) = chars.peek() {
                         if !self.unescape
                             || (self.dialect.ignores_wildcard_escapes()
                                 && (*next == '%' || *next == '_'))
                         {
                             // In no-escape mode, the given query has to be saved completely
                             // including backslashes. Similarly, with ignore_like_wildcard_escapes,
                             // the backslash is not stripped.
                             s.push(ch);
                             s.push(*next);
                             chars.next(); // consume next
                         } else {
                             let n = match next {
                                 '0' => '\0',
                                 'a' => '\u{7}',
                                 'b' => '\u{8}',
                                 'f' => '\u{c}',
                                 'n' => '\n',
                                 'r' => '\r',
                                 't' => '\t',
                                 'Z' => '\u{1a}',
                                 _ => *next,
                             };
                             s.push(n);
                             chars.next(); // consume next
                         }
                     }
                 }
                 ch => {
                     chars.next(); // consume ch

                     if ch == settings.quote_style {
                         num_consecutive_quotes += 1;
                     } else {
                         num_consecutive_quotes = 0;
                     }

                     s.push(ch);
                 }
             }
         }
         self.tokenizer_error(error_loc, "Unterminated string literal")
     }

     fn tokenize_multiline_comment(
         &self,
         chars: &mut State,
     ) -> Result<Option<Token>, TokenizerError> {
         let mut s = String::new();
         let mut nested = 1;
         let supports_nested_comments = self.dialect.supports_nested_comments();

         loop {
             match chars.next() {
                 Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
                     chars.next(); // consume the '*'
                     s.push('/');
                     s.push('*');
                     nested += 1;
                 }
                 Some('*') if matches!(chars.peek(), Some('/')) => {
                     chars.next(); // consume the '/'
                     nested -= 1;
                     if nested == 0 {
                         break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
                     }
                     s.push('*');
                     s.push('/');
                 }
                 Some(ch) => {
                     s.push(ch);
                 }
                 None => {
                     break self.tokenizer_error(
                         chars.location(),
                         "Unexpected EOF while in a multi-line comment",
                     );
                 }
             }
         }
     }

     fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option<char>) {
         let mut last_char = None;
         let mut s = String::new();
         while let Some(ch) = chars.next() {
             if ch == quote_end {
                 if chars.peek() == Some(&quote_end) {
                     chars.next();
                     s.push(ch);
                     if !self.unescape {
                         // In no-escape mode, the given query has to be saved completely
                         s.push(ch);
                     }
                 } else {
                     last_char = Some(quote_end);
                     break;
                 }
             } else {
                 s.push(ch);
             }
         }
         (s, last_char)
     }

     #[allow(clippy::unnecessary_wraps)]
     fn consume_and_return(
         &self,
         chars: &mut State,
         t: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next();
         Ok(Some(t))
     }
 }

 /// Read from `chars` until `predicate` returns `false` or EOF is hit.
 /// Return the characters read as String, and keep the first non-matching
 /// char available as `chars.next()`.
 fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool) -> String {
     let mut s = String::new();
     while let Some(&ch) = chars.peek() {
         if predicate(ch) {
             chars.next(); // consume
             s.push(ch);
         } else {
             break;
         }
     }
     s
 }

 /// Same as peeking_take_while, but also passes the next character to the predicate.
 fn peeking_next_take_while(
     chars: &mut State,
     mut predicate: impl FnMut(char, Option<char>) -> bool,
 ) -> String {
     let mut s = String::new();
     while let Some(&ch) = chars.peek() {
         let next_char = chars.peekable.clone().nth(1);
         if predicate(ch, next_char) {
             chars.next(); // consume
             s.push(ch);
         } else {
             break;
         }
     }
     s
 }

 fn unescape_single_quoted_string(chars: &mut State<'_>) -> Option<String> {
     Unescape::new(chars).unescape()
 }

 struct Unescape<'a: 'b, 'b> {
     chars: &'b mut State<'a>,
 }

 impl<'a: 'b, 'b> Unescape<'a, 'b> {
     fn new(chars: &'b mut State<'a>) -> Self {
         Self { chars }
     }
     fn unescape(mut self) -> Option<String> {
         let mut unescaped = String::new();

         self.chars.next();

         while let Some(c) = self.chars.next() {
             if c == '\'' {
                 // case: ''''
                 if self.chars.peek().map(|c| *c == '\'').unwrap_or(false) {
                     self.chars.next();
                     unescaped.push('\'');
                     continue;
                 }
                 return Some(unescaped);
             }

             if c != '\\' {
                 unescaped.push(c);
                 continue;
             }

             let c = match self.chars.next()? {
                 'b' => '\u{0008}',
                 'f' => '\u{000C}',
                 'n' => '\n',
                 'r' => '\r',
                 't' => '\t',
                 'u' => self.unescape_unicode_16()?,
                 'U' => self.unescape_unicode_32()?,
                 'x' => self.unescape_hex()?,
                 c if c.is_digit(8) => self.unescape_octal(c)?,
                 c => c,
             };

             unescaped.push(Self::check_null(c)?);
         }

         None
     }

     #[inline]
     fn check_null(c: char) -> Option<char> {
         if c == '\0' {
             None
         } else {
             Some(c)
         }
     }

     #[inline]
     fn byte_to_char<const RADIX: u32>(s: &str) -> Option<char> {
         // u32 is used here because Pg has an overflow operation rather than throwing an exception directly.
         match u32::from_str_radix(s, RADIX) {
             Err(_) => None,
             Ok(n) => {
                 let n = n & 0xFF;
                 if n <= 127 {
                     char::from_u32(n)
                 } else {
                     None
                 }
             }
         }
     }

     // Hexadecimal byte value. \xh, \xhh (h = 0–9, A–F)
     fn unescape_hex(&mut self) -> Option<char> {
         let mut s = String::new();

         for _ in 0..2 {
             match self.next_hex_digit() {
                 Some(c) => s.push(c),
                 None => break,
             }
         }

         if s.is_empty() {
             return Some('x');
         }

         Self::byte_to_char::<16>(&s)
     }

     #[inline]
     fn next_hex_digit(&mut self) -> Option<char> {
         match self.chars.peek() {
             Some(c) if c.is_ascii_hexdigit() => self.chars.next(),
             _ => None,
         }
     }

     // Octal byte value. \o, \oo, \ooo (o = 0–7)
     fn unescape_octal(&mut self, c: char) -> Option<char> {
         let mut s = String::new();

         s.push(c);
         for _ in 0..2 {
             match self.next_octal_digest() {
                 Some(c) => s.push(c),
                 None => break,
             }
         }

         Self::byte_to_char::<8>(&s)
     }

     #[inline]
     fn next_octal_digest(&mut self) -> Option<char> {
         match self.chars.peek() {
             Some(c) if c.is_digit(8) => self.chars.next(),
             _ => None,
         }
     }

     // 16-bit hexadecimal Unicode character value. \uxxxx (x = 0–9, A–F)
     fn unescape_unicode_16(&mut self) -> Option<char> {
         self.unescape_unicode::<4>()
     }

     // 32-bit hexadecimal Unicode character value. \Uxxxxxxxx (x = 0–9, A–F)
     fn unescape_unicode_32(&mut self) -> Option<char> {
         self.unescape_unicode::<8>()
     }

     fn unescape_unicode<const NUM: usize>(&mut self) -> Option<char> {
         let mut s = String::new();
         for _ in 0..NUM {
             s.push(self.chars.next()?);
         }
         match u32::from_str_radix(&s, 16) {
             Err(_) => None,
             Ok(n) => char::from_u32(n),
         }
     }
 }

 fn unescape_unicode_single_quoted_string(chars: &mut State<'_>) -> Result<String, TokenizerError> {
     let mut unescaped = String::new();
     chars.next(); // consume the opening quote
     while let Some(c) = chars.next() {
         match c {
             '\'' => {
                 if chars.peek() == Some(&'\'') {
                     chars.next();
                     unescaped.push('\'');
                 } else {
                     return Ok(unescaped);
                 }
             }
             '\\' => match chars.peek() {
                 Some('\\') => {
                     chars.next();
                     unescaped.push('\\');
                 }
                 Some('+') => {
                     chars.next();
                     unescaped.push(take_char_from_hex_digits(chars, 6)?);
                 }
                 _ => unescaped.push(take_char_from_hex_digits(chars, 4)?),
             },
             _ => {
                 unescaped.push(c);
             }
         }
     }
     Err(TokenizerError {
         message: "Unterminated unicode encoded string literal".to_string(),
         location: chars.location(),
     })
 }

 fn take_char_from_hex_digits(
     chars: &mut State<'_>,
     max_digits: usize,
 ) -> Result<char, TokenizerError> {
     let mut result = 0u32;
     for _ in 0..max_digits {
         let next_char = chars.next().ok_or_else(|| TokenizerError {
             message: "Unexpected EOF while parsing hex digit in escaped unicode string."
                 .to_string(),
             location: chars.location(),
         })?;
         let digit = next_char.to_digit(16).ok_or_else(|| TokenizerError {
             message: format!("Invalid hex digit in escaped unicode string: {next_char}"),
             location: chars.location(),
         })?;
         result = result * 16 + digit;
     }
     char::from_u32(result).ok_or_else(|| TokenizerError {
         message: format!("Invalid unicode character: {result:x}"),
         location: chars.location(),
     })
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::dialect::{
         BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
     };
     use crate::test_utils::{all_dialects_except, all_dialects_where};
     use core::fmt::Debug;

     #[test]
     fn tokenizer_error_impl() {
         let err = TokenizerError {
             message: "test".into(),
             location: Location { line: 1, column: 1 },
         };
         #[cfg(feature = "std")]
         {
             use std::error::Error;
             assert!(err.source().is_none());
         }
         assert_eq!(err.to_string(), "test at Line: 1, Column: 1");
     }

     #[test]
     fn tokenize_select_1() {
         let sql = String::from("SELECT 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_select_float() {
         let sql = String::from("SELECT .1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from(".1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_clickhouse_double_equal() {
         let sql = String::from("SELECT foo=='1'");
         let dialect = ClickHouseDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         let tokens = tokenizer.tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Word(Word {
                 value: "foo".to_string(),
                 quote_style: None,
                 keyword: Keyword::NoKeyword,
             }),
             Token::DoubleEq,
             Token::SingleQuotedString("1".to_string()),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_numeric_literal_underscore() {
         let dialect = GenericDialect {};
         let sql = String::from("SELECT 10_000");
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         let tokens = tokenizer.tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number("10".to_string(), false),
             Token::make_word("_000", None),
         ];
         compare(expected, tokens);

         all_dialects_where(|dialect| dialect.supports_numeric_literal_underscores()).tokenizes_to(
             "SELECT 10_000, _10_000, 10_00_, 10___0",
             vec![
                 Token::make_keyword("SELECT"),
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("10_000".to_string(), false),
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::make_word("_10_000", None), // leading underscore tokenizes as a word (parsed as column identifier)
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("10_00".to_string(), false),
                 Token::make_word("_", None), // trailing underscores tokenizes as a word (syntax error in some dialects)
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("10".to_string(), false),
                 Token::make_word("___0", None), // multiple underscores tokenizes as a word (syntax error in some dialects)
             ],
         );
     }

     #[test]
     fn tokenize_select_exponent() {
         let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e+10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
             Token::make_word("ea", None),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::make_word("a", None),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Minus,
             Token::Number(String::from("10"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_scalar_function() {
         let sql = String::from("SELECT sqrt(1)");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("sqrt", None),
             Token::LParen,
             Token::Number(String::from("1"), false),
             Token::RParen,
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_string_string_concat() {
         let sql = String::from("SELECT 'a' || 'b'");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("a")),
             Token::Whitespace(Whitespace::Space),
             Token::StringConcat,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("b")),
         ];

         compare(expected, tokens);
     }
     #[test]
     fn tokenize_bitwise_op() {
         let sql = String::from("SELECT one | two ^ three");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("one", None),
             Token::Whitespace(Whitespace::Space),
             Token::Pipe,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("two", None),
             Token::Whitespace(Whitespace::Space),
             Token::Caret,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("three", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_logical_xor() {
         let sql =
             String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_simple_select() {
         let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("LIMIT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_explain_select() {
         let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("EXPLAIN"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_explain_analyze_select() {
         let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("EXPLAIN"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("ANALYZE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_string_predicate() {
         let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("salary", None),
             Token::Whitespace(Whitespace::Space),
             Token::Neq,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("Not Provided")),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_invalid_string() {
         let sql = String::from("\n💝مصطفىh");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_newline_in_string_literal() {
         let sql = String::from("'foo\r\nbar\nbaz'");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_unterminated_string_literal() {
         let sql = String::from("select 'foo");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Unterminated string literal".to_string(),
                 location: Location { line: 1, column: 8 },
             })
         );
     }

     #[test]
     fn tokenize_unterminated_string_literal_utf8() {
         let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Unterminated string literal".to_string(),
                 location: Location {
                     line: 1,
                     column: 35
                 }
             })
         );
     }

     #[test]
     fn tokenize_invalid_string_cols() {
         let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Whitespace(Whitespace::Newline),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("table"),
             Token::Whitespace(Whitespace::Tab),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_tagged() {
         let test_cases = vec![
             (
                 String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
                         tag: Some("tag".into()),
                     })
                 ]
             ),
             (
                 String::from("SELECT $abc$x$ab$abc$"),
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "x$ab".into(),
                         tag: Some("abc".into()),
                     })
                 ]
             ),
             (
                 String::from("SELECT $abc$$abc$"),
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "".into(),
                         tag: Some("abc".into()),
                     })
                 ]
             ),
             (
                 String::from("0$abc$$abc$1"),
                 vec![
                     Token::Number("0".into(), false),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "".into(),
                         tag: Some("abc".into()),
                     }),
                     Token::Number("1".into(), false),
                 ]
             ),
             (
                 String::from("$function$abc$q$data$q$$function$"),
                 vec![
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "abc$q$data$q$".into(),
                         tag: Some("function".into()),
                     }),
                 ]
             ),
         ];

         let dialect = GenericDialect {};
         for (sql, expected) in test_cases {
             let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
             compare(expected, tokens);
         }
     }

     #[test]
     fn tokenize_dollar_quoted_string_tagged_unterminated() {
         let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$");
         let dialect = GenericDialect {};
         assert_eq!(
             Tokenizer::new(&dialect, &sql).tokenize(),
             Err(TokenizerError {
                 message: "Unterminated dollar-quoted, expected $".into(),
                 location: Location {
                     line: 1,
                     column: 91
                 }
             })
         );
     }

     #[test]
     fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
         let sql = String::from("SELECT $abc$abc$");
         let dialect = GenericDialect {};
         assert_eq!(
             Tokenizer::new(&dialect, &sql).tokenize(),
             Err(TokenizerError {
                 message: "Unterminated dollar-quoted, expected $".into(),
                 location: Location {
                     line: 1,
                     column: 17
                 }
             })
         );
     }

     #[test]
     fn tokenize_dollar_placeholder() {
         let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
         let dialect = SQLiteDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         assert_eq!(
             tokens,
             vec![
                 Token::make_keyword("SELECT"),
                 Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$$".into()),
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$$ABC$$".into()),
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$ABC$".into()),
                 Token::Comma,
                 Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$ABC".into()),
             ]
         );
     }

     #[test]
     fn tokenize_nested_dollar_quoted_strings() {
         let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "dollar $nested$ string".into(),
                 tag: Some("tag".into()),
             }),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_untagged_empty() {
         let sql = String::from("SELECT $$$$");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "".into(),
                 tag: None,
             }),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_untagged() {
         let sql =
             String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "within dollar '$' quoted strings have $tags like this$ ".into(),
                 tag: None,
             }),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_untagged_unterminated() {
         let sql = String::from(
             "SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$",
         );
         let dialect = GenericDialect {};
         assert_eq!(
             Tokenizer::new(&dialect, &sql).tokenize(),
             Err(TokenizerError {
                 message: "Unterminated dollar-quoted string".into(),
                 location: Location {
                     line: 1,
                     column: 86
                 }
             })
         );
     }

     #[test]
     fn tokenize_right_arrow() {
         let sql = String::from("FUNCTION(key=>value)");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word("FUNCTION", None),
             Token::LParen,
             Token::make_word("key", None),
             Token::RArrow,
             Token::make_word("value", None),
             Token::RParen,
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_is_null() {
         let sql = String::from("a IS NULL");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_word("a", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("IS"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("NULL"),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_comment() {
         let test_cases = vec![
             (
                 String::from("0--this is a comment\n1"),
                 vec![
                     Token::Number("0".to_string(), false),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: "this is a comment\n".to_string(),
                     }),
                     Token::Number("1".to_string(), false),
                 ],
             ),
             (
                 String::from("0--this is a comment\r1"),
                 vec![
                     Token::Number("0".to_string(), false),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: "this is a comment\r1".to_string(),
                     }),
                 ],
             ),
             (
                 String::from("0--this is a comment\r\n1"),
                 vec![
                     Token::Number("0".to_string(), false),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: "this is a comment\r\n".to_string(),
                     }),
                     Token::Number("1".to_string(), false),
                 ],
             ),
         ];

         let dialect = GenericDialect {};

         for (sql, expected) in test_cases {
             let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
             compare(expected, tokens);
         }
     }

     #[test]
     fn tokenize_comment_postgres() {
         let sql = String::from("1--\r0");

         let dialect = PostgreSqlDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("1".to_string(), false),
             Token::Whitespace(Whitespace::SingleLineComment {
                 prefix: "--".to_string(),
                 comment: "\r".to_string(),
             }),
             Token::Number("0".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_comment_at_eof() {
         let sql = String::from("--this is a comment");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
             prefix: "--".to_string(),
             comment: "this is a comment".to_string(),
         })];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_multiline_comment() {
         let sql = String::from("0/*multi-line\n* /comment*/1");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("0".to_string(), false),
             Token::Whitespace(Whitespace::MultiLineComment(
                 "multi-line\n* /comment".to_string(),
             )),
             Token::Number("1".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_nested_multiline_comment() {
         all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
             "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
             vec![
                 Token::Number("0".to_string(), false),
                 Token::Whitespace(Whitespace::MultiLineComment(
                     "multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
                 )),
                 Token::Whitespace(Whitespace::Space),
                 Token::Div,
                 Token::Word(Word {
                     value: "comment".to_string(),
                     quote_style: None,
                     keyword: Keyword::COMMENT,
                 }),
                 Token::Mul,
                 Token::Div,
                 Token::Number("1".to_string(), false),
             ],
         );

         all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
             "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
             vec![
                 Token::Number("0".to_string(), false),
                 Token::Whitespace(Whitespace::MultiLineComment(
                     "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
                 )),
                 Token::Number("1".to_string(), false),
             ],
         );

         all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
             "SELECT 1/* a /* b */ c */0",
             vec![
                 Token::make_keyword("SELECT"),
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
                 Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
                 Token::Number("0".to_string(), false),
             ],
         );
     }

     #[test]
     fn tokenize_nested_multiline_comment_empty() {
         all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
             "select 1/*/**/*/0",
             vec![
                 Token::make_keyword("select"),
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
                 Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
                 Token::Number("0".to_string(), false),
             ],
         );
     }

     #[test]
     fn tokenize_nested_comments_if_not_supported() {
         all_dialects_except(|d| d.supports_nested_comments()).tokenizes_to(
             "SELECT 1/*/* nested comment */*/0",
             vec![
                 Token::make_keyword("SELECT"),
                 Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
                 Token::Whitespace(Whitespace::MultiLineComment(
                     "/* nested comment ".to_string(),
                 )),
                 Token::Mul,
                 Token::Div,
                 Token::Number("0".to_string(), false),
             ],
         );
     }

     #[test]
     fn tokenize_multiline_comment_with_even_asterisks() {
         let sql = String::from("\n/** Comment **/\n");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_unicode_whitespace() {
         let sql = String::from(" \u{2003}\n");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::Whitespace(Whitespace::Space),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_mismatched_quotes() {
         let sql = String::from("\"foo");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Expected close delimiter '\"' before EOF.".to_string(),
                 location: Location { line: 1, column: 1 },
             })
         );
     }

     #[test]
     fn tokenize_newlines() {
         let sql = String::from("line1\nline2\rline3\r\nline4\r");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word("line1", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line2", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line3", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line4", None),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_mssql_top() {
         let sql = "SELECT TOP 5 [bar] FROM foo";
         let dialect = MsSqlDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("TOP"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("bar", Some('[')),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("foo", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_pg_regex_match() {
         let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::Tilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::TildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_pg_like_match() {
         let sql = "SELECT col ~~ '_a%', col ~~* '_a%', col !~~ '_a%', col !~~* '_a%'";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::DoubleTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::DoubleTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_identifier() {
         let sql = r#" "a "" b" "a """ "c """"" "#;
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a " b"#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a ""#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """#, Some('"')),
             Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_snowflake_div() {
         let sql = r#"field/1000"#;
         let dialect = SnowflakeDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word(r#"field"#, None),
             Token::Div,
             Token::Number("1000".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_identifier_with_no_escape() {
         let sql = r#" "a "" b" "a """ "c """"" "#;
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(false)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a "" b"#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a """#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """""#, Some('"')),
             Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_with_location() {
         let sql = "SELECT a,\n b";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql)
             .tokenize_with_location()
             .unwrap();
         let expected = vec![
             TokenWithSpan::at(Token::make_keyword("SELECT"), (1, 1).into(), (1, 7).into()),
             TokenWithSpan::at(
                 Token::Whitespace(Whitespace::Space),
                 (1, 7).into(),
                 (1, 8).into(),
             ),
             TokenWithSpan::at(Token::make_word("a", None), (1, 8).into(), (1, 9).into()),
             TokenWithSpan::at(Token::Comma, (1, 9).into(), (1, 10).into()),
             TokenWithSpan::at(
                 Token::Whitespace(Whitespace::Newline),
                 (1, 10).into(),
                 (2, 1).into(),
             ),
             TokenWithSpan::at(
                 Token::Whitespace(Whitespace::Space),
                 (2, 1).into(),
                 (2, 2).into(),
             ),
             TokenWithSpan::at(Token::make_word("b", None), (2, 2).into(), (2, 3).into()),
         ];
         compare(expected, tokens);
     }

     fn compare<T: PartialEq + fmt::Debug>(expected: Vec<T>, actual: Vec<T>) {
         //println!("------------------------------");
         //println!("tokens   = {:?}", actual);
         //println!("expected = {:?}", expected);
         //println!("------------------------------");
         assert_eq!(expected, actual);
     }

     fn check_unescape(s: &str, expected: Option<&str>) {
         let s = format!("'{s}'");
         let mut state = State {
             peekable: s.chars().peekable(),
             line: 0,
             col: 0,
         };

         assert_eq!(
             unescape_single_quoted_string(&mut state),
             expected.map(|s| s.to_string())
         );
     }

     #[test]
     fn test_unescape() {
         check_unescape(r"\b", Some("\u{0008}"));
         check_unescape(r"\f", Some("\u{000C}"));
         check_unescape(r"\t", Some("\t"));
         check_unescape(r"\r\n", Some("\r\n"));
         check_unescape(r"\/", Some("/"));
         check_unescape(r"/", Some("/"));
         check_unescape(r"\\", Some("\\"));

         // 16 and 32-bit hexadecimal Unicode character value
         check_unescape(r"\u0001", Some("\u{0001}"));
         check_unescape(r"\u4c91", Some("\u{4c91}"));
         check_unescape(r"\u4c916", Some("\u{4c91}6"));
         check_unescape(r"\u4c", None);
         check_unescape(r"\u0000", None);
         check_unescape(r"\U0010FFFF", Some("\u{10FFFF}"));
         check_unescape(r"\U00110000", None);
         check_unescape(r"\U00000000", None);
         check_unescape(r"\u", None);
         check_unescape(r"\U", None);
         check_unescape(r"\U1010FFFF", None);

         // hexadecimal byte value
         check_unescape(r"\x4B", Some("\u{004b}"));
         check_unescape(r"\x4", Some("\u{0004}"));
         check_unescape(r"\x4L", Some("\u{0004}L"));
         check_unescape(r"\x", Some("x"));
         check_unescape(r"\xP", Some("xP"));
         check_unescape(r"\x0", None);
         check_unescape(r"\xCAD", None);
         check_unescape(r"\xA9", None);

         // octal byte value
         check_unescape(r"\1", Some("\u{0001}"));
         check_unescape(r"\12", Some("\u{000a}"));
         check_unescape(r"\123", Some("\u{0053}"));
         check_unescape(r"\1232", Some("\u{0053}2"));
         check_unescape(r"\4", Some("\u{0004}"));
         check_unescape(r"\45", Some("\u{0025}"));
         check_unescape(r"\450", Some("\u{0028}"));
         check_unescape(r"\603", None);
         check_unescape(r"\0", None);
         check_unescape(r"\080", None);

         // others
         check_unescape(r"\9", Some("9"));
         check_unescape(r"''", Some("'"));
         check_unescape(
             r"Hello\r\nRust/\u4c91 SQL Parser\U0010ABCD\1232",
             Some("Hello\r\nRust/\u{4c91} SQL Parser\u{10abcd}\u{0053}2"),
         );
         check_unescape(r"Hello\0", None);
         check_unescape(r"Hello\xCADRust", None);
     }

     #[test]
     fn tokenize_numeric_prefix_trait() {
         #[derive(Debug)]
         struct NumericPrefixDialect;

         impl Dialect for NumericPrefixDialect {
             fn is_identifier_start(&self, ch: char) -> bool {
                 ch.is_ascii_lowercase()
                     || ch.is_ascii_uppercase()
                     || ch.is_ascii_digit()
                     || ch == '$'
             }

             fn is_identifier_part(&self, ch: char) -> bool {
                 ch.is_ascii_lowercase()
                     || ch.is_ascii_uppercase()
                     || ch.is_ascii_digit()
                     || ch == '_'
                     || ch == '$'
                     || ch == '{'
                     || ch == '}'
             }

             fn supports_numeric_prefix(&self) -> bool {
                 true
             }
         }

         tokenize_numeric_prefix_inner(&NumericPrefixDialect {});
         tokenize_numeric_prefix_inner(&HiveDialect {});
         tokenize_numeric_prefix_inner(&MySqlDialect {});
     }

     fn tokenize_numeric_prefix_inner(dialect: &dyn Dialect) {
         let sql = r#"SELECT * FROM 1"#;
         let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_string_escape() {
         let dialect = SnowflakeDialect {};
         for (sql, expected, expected_unescaped) in [
             (r#"'%a\'%b'"#, r#"%a\'%b"#, r#"%a'%b"#),
             (r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#, r#"a''b'c'd"#),
             (r#"'\\'"#, r#"\\"#, r#"\"#),
             (
                 r#"'\0\a\b\f\n\r\t\Z'"#,
                 r#"\0\a\b\f\n\r\t\Z"#,
                 "\0\u{7}\u{8}\u{c}\n\r\t\u{1a}",
             ),
             (r#"'\"'"#, r#"\""#, "\""),
             (r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
             (r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
             (r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
             (r#"'\q'"#, r#"\q"#, r#"q"#),
             (r#"'\%\_'"#, r#"\%\_"#, r#"%_"#),
             (r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#),
         ] {
             let tokens = Tokenizer::new(&dialect, sql)
                 .with_unescape(false)
                 .tokenize()
                 .unwrap();
             let expected = vec![Token::SingleQuotedString(expected.to_string())];
             compare(expected, tokens);

             let tokens = Tokenizer::new(&dialect, sql)
                 .with_unescape(true)
                 .tokenize()
                 .unwrap();
             let expected = vec![Token::SingleQuotedString(expected_unescaped.to_string())];
             compare(expected, tokens);
         }

         for sql in [r#"'\'"#, r#"'ab\'"#] {
             let mut tokenizer = Tokenizer::new(&dialect, sql);
             assert_eq!(
                 "Unterminated string literal",
                 tokenizer.tokenize().unwrap_err().message.as_str(),
             );
         }

         // Non-escape dialect
         for (sql, expected) in [(r#"'\'"#, r#"\"#), (r#"'ab\'"#, r#"ab\"#)] {
             let dialect = GenericDialect {};
             let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

             let expected = vec![Token::SingleQuotedString(expected.to_string())];

             compare(expected, tokens);
         }

         // MySQL special case for LIKE escapes
         for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] {
             let dialect = MySqlDialect {};
             let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

             let expected = vec![Token::SingleQuotedString(expected.to_string())];

             compare(expected, tokens);
         }
     }

     #[test]
     fn tokenize_triple_quoted_string() {
         fn check<F>(
             q: char, // The quote character to test
             r: char, // An alternate quote character.
             quote_token: F,
         ) where
             F: Fn(String) -> Token,
         {
             let dialect = BigQueryDialect {};

             for (sql, expected, expected_unescaped) in [
                 // Empty string
                 (format!(r#"{q}{q}{q}{q}{q}{q}"#), "".into(), "".into()),
                 // Should not count escaped quote as end of string.
                 (
                     format!(r#"{q}{q}{q}ab{q}{q}\{q}{q}cd{q}{q}{q}"#),
                     format!(r#"ab{q}{q}\{q}{q}cd"#),
                     format!(r#"ab{q}{q}{q}{q}cd"#),
                 ),
                 // Simple string
                 (
                     format!(r#"{q}{q}{q}abc{q}{q}{q}"#),
                     "abc".into(),
                     "abc".into(),
                 ),
                 // Mix single-double quotes unescaped.
                 (
                     format!(r#"{q}{q}{q}ab{r}{r}{r}c{r}def{r}{r}{r}{q}{q}{q}"#),
                     format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                     format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                 ),
                 // Escaped quote.
                 (
                     format!(r#"{q}{q}{q}ab{q}{q}c{q}{q}\{q}de{q}{q}f{q}{q}{q}"#),
                     format!(r#"ab{q}{q}c{q}{q}\{q}de{q}{q}f"#),
                     format!(r#"ab{q}{q}c{q}{q}{q}de{q}{q}f"#),
                 ),
                 // backslash-escaped quote characters.
                 (
                     format!(r#"{q}{q}{q}a\'\'b\'c\'d{q}{q}{q}"#),
                     r#"a\'\'b\'c\'d"#.into(),
                     r#"a''b'c'd"#.into(),
                 ),
                 // backslash-escaped characters
                 (
                     format!(r#"{q}{q}{q}abc\0\n\rdef{q}{q}{q}"#),
                     r#"abc\0\n\rdef"#.into(),
                     "abc\0\n\rdef".into(),
                 ),
             ] {
                 let tokens = Tokenizer::new(&dialect, sql.as_str())
                     .with_unescape(false)
                     .tokenize()
                     .unwrap();
                 let expected = vec![quote_token(expected.to_string())];
                 compare(expected, tokens);

                 let tokens = Tokenizer::new(&dialect, sql.as_str())
                     .with_unescape(true)
                     .tokenize()
                     .unwrap();
                 let expected = vec![quote_token(expected_unescaped.to_string())];
                 compare(expected, tokens);
             }

             for sql in [
                 format!(r#"{q}{q}{q}{q}{q}\{q}"#),
                 format!(r#"{q}{q}{q}abc{q}{q}\{q}"#),
                 format!(r#"{q}{q}{q}{q}"#),
                 format!(r#"{q}{q}{q}{r}{r}"#),
                 format!(r#"{q}{q}{q}abc{q}"#),
                 format!(r#"{q}{q}{q}abc{q}{q}"#),
                 format!(r#"{q}{q}{q}abc"#),
             ] {
                 let dialect = BigQueryDialect {};
                 let mut tokenizer = Tokenizer::new(&dialect, sql.as_str());
                 assert_eq!(
                     "Unterminated string literal",
                     tokenizer.tokenize().unwrap_err().message.as_str(),
                 );
             }
         }

         check('"', '\'', Token::TripleDoubleQuotedString);

         check('\'', '"', Token::TripleSingleQuotedString);

         let dialect = BigQueryDialect {};

         let sql = r#"""''"#;
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(true)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::DoubleQuotedString("".to_string()),
             Token::SingleQuotedString("".to_string()),
         ];
         compare(expected, tokens);

         let sql = r#"''"""#;
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(true)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::SingleQuotedString("".to_string()),
             Token::DoubleQuotedString("".to_string()),
         ];
         compare(expected, tokens);

         // Non-triple quoted string dialect
         let dialect = SnowflakeDialect {};
         let sql = r#"''''''"#;
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![Token::SingleQuotedString("''".to_string())];
         compare(expected, tokens);
     }

     #[test]
     fn test_mysql_users_grantees() {
         let dialect = MySqlDialect {};

         let sql = "CREATE USER `root`@`%`";
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("CREATE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("USER"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("root", Some('`')),
             Token::AtSign,
             Token::make_word("%", Some('`')),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn test_postgres_abs_without_space_and_string_literal() {
         let dialect = MySqlDialect {};

         let sql = "SELECT @'1'";
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::AtSign,
             Token::SingleQuotedString("1".to_string()),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn test_postgres_abs_without_space_and_quoted_column() {
         let dialect = MySqlDialect {};

         let sql = r#"SELECT @"bar" FROM foo"#;
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::AtSign,
             Token::DoubleQuotedString("bar".to_string()),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("foo", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn test_national_strings_backslash_escape_not_supported() {
         all_dialects_where(|dialect| !dialect.supports_string_literal_backslash_escape())
             .tokenizes_to(
                 "select n'''''\\'",
                 vec![
                     Token::make_keyword("select"),
                     Token::Whitespace(Whitespace::Space),
                     Token::NationalStringLiteral("''\\".to_string()),
                 ],
             );
     }

     #[test]
     fn test_national_strings_backslash_escape_supported() {
         all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape())
             .tokenizes_to(
                 "select n'''''\\''",
                 vec![
                     Token::make_keyword("select"),
                     Token::Whitespace(Whitespace::Space),
                     Token::NationalStringLiteral("'''".to_string()),
                 ],
             );
     }

     #[test]
     fn test_string_escape_constant_not_supported() {
         all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
             "select e'...'",
             vec![
                 Token::make_keyword("select"),
                 Token::Whitespace(Whitespace::Space),
                 Token::make_word("e", None),
                 Token::SingleQuotedString("...".to_string()),
             ],
         );

         all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
             "select E'...'",
             vec![
                 Token::make_keyword("select"),
                 Token::Whitespace(Whitespace::Space),
                 Token::make_word("E", None),
                 Token::SingleQuotedString("...".to_string()),
             ],
         );
     }

     #[test]
     fn test_string_escape_constant_supported() {
         all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
             "select e'\\''",
             vec![
                 Token::make_keyword("select"),
                 Token::Whitespace(Whitespace::Space),
                 Token::EscapedStringLiteral("'".to_string()),
             ],
         );

         all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
             "select E'\\''",
             vec![
                 Token::make_keyword("select"),
                 Token::Whitespace(Whitespace::Space),
                 Token::EscapedStringLiteral("'".to_string()),
             ],
         );
     }

     #[test]
     fn test_whitespace_required_after_single_line_comment() {
         all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT --'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Minus,
                     Token::Minus,
                     Token::SingleQuotedString("abc".to_string()),
                 ],
             );

         all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT -- 'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: " 'abc'".to_string(),
                     }),
                 ],
             );

         all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT --",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Minus,
                     Token::Minus,
                 ],
             );
     }

     #[test]
     fn test_whitespace_not_required_after_single_line_comment() {
         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT --'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: "'abc'".to_string(),
                     }),
                 ],
             );

         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT -- 'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: " 'abc'".to_string(),
                     }),
                 ],
             );

         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT --",
                 vec![
                     Token::make_keyword("SELECT"),
                     Token::Whitespace(Whitespace::Space),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
                         comment: "".to_string(),
                     }),
                 ],
             );
     }

     #[test]
     fn test_tokenize_identifiers_numeric_prefix() {
         all_dialects_where(|dialect| dialect.supports_numeric_prefix())
             .tokenizes_to("123abc", vec![Token::make_word("123abc", None)]);

         all_dialects_where(|dialect| dialect.supports_numeric_prefix())
             .tokenizes_to("12e34", vec![Token::Number("12e34".to_string(), false)]);

         all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
             "t.12e34",
             vec![
                 Token::make_word("t", None),
                 Token::Period,
                 Token::make_word("12e34", None),
             ],
         );

         all_dialects_where(|dialect| dialect.supports_numeric_prefix()).tokenizes_to(
             "t.1two3",
             vec![
                 Token::make_word("t", None),
                 Token::Period,
                 Token::make_word("1two3", None),
             ],
         );
     }

     #[test]
     fn tokenize_period_underscore() {
         let sql = String::from("SELECT table._col");
         // a dialect that supports underscores in numeric literals
         let dialect = PostgreSqlDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Word(Word {
                 value: "table".to_string(),
                 quote_style: None,
                 keyword: Keyword::TABLE,
             }),
             Token::Period,
             Token::Word(Word {
                 value: "_col".to_string(),
                 quote_style: None,
                 keyword: Keyword::NoKeyword,
             }),
         ];

         compare(expected, tokens);

         let sql = String::from("SELECT ._123");
         if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
             panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
         }

         let sql = String::from("SELECT ._abc");
         if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
             panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
         }
     }
 }