src/tokenizer.rs - datafusion-sqlparser-rs - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 //! SQL Tokenizer
 //!
 //! The tokenizer (a.k.a. lexer) converts a string into a sequence of tokens.
 //!
 //! The tokens then form the input for the parser, which outputs an Abstract Syntax Tree (AST).

 #[cfg(not(feature = "std"))]
 use alloc::{
     borrow::ToOwned,
     format,
     string::{String, ToString},
     vec,
     vec::Vec,
 };
 use core::fmt;
 use core::iter::Peekable;
 use core::num::NonZeroU8;
 use core::str::Chars;

 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 #[cfg(feature = "visitor")]
 use sqlparser_derive::{Visit, VisitMut};

 use crate::ast::DollarQuotedString;
 use crate::dialect::Dialect;
 use crate::dialect::{
     BigQueryDialect, DuckDbDialect, GenericDialect, PostgreSqlDialect, SnowflakeDialect,
 };
 use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};

 /// SQL Token enumeration
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub enum Token {
     /// An end-of-file marker, not a real token
     EOF,
     /// A keyword (like SELECT) or an optionally quoted SQL identifier
     Word(Word),
     /// An unsigned numeric literal
     Number(String, bool),
     /// A character that could not be tokenized
     Char(char),
     /// Single quoted string: i.e: 'string'
     SingleQuotedString(String),
     /// Double quoted string: i.e: "string"
     DoubleQuotedString(String),
     /// Triple single quoted strings: Example '''abc'''
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedString(String),
     /// Triple double quoted strings: Example """abc"""
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedString(String),
     /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
     DollarQuotedString(DollarQuotedString),
     /// Byte string literal: i.e: b'string' or B'string' (note that some backends, such as
     /// PostgreSQL, may treat this syntax as a bit string literal instead, i.e: b'10010101')
     SingleQuotedByteStringLiteral(String),
     /// Byte string literal: i.e: b"string" or B"string"
     DoubleQuotedByteStringLiteral(String),
     /// Triple single quoted literal with byte string prefix. Example `B'''abc'''`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedByteStringLiteral(String),
     /// Triple double quoted literal with byte string prefix. Example `B"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedByteStringLiteral(String),
     /// Single quoted literal with raw string prefix. Example `R'abc'`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     SingleQuotedRawStringLiteral(String),
     /// Double quoted literal with raw string prefix. Example `R"abc"`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     DoubleQuotedRawStringLiteral(String),
     /// Triple single quoted literal with raw string prefix. Example `R'''abc'''`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleSingleQuotedRawStringLiteral(String),
     /// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedRawStringLiteral(String),
     /// "National" string literal: i.e: N'string'
     NationalStringLiteral(String),
     /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
     EscapedStringLiteral(String),
     /// Unicode string literal: i.e: U&'first \000A second'
     UnicodeStringLiteral(String),
     /// Hexadecimal string literal: i.e.: X'deadbeef'
     HexStringLiteral(String),
     /// Comma
     Comma,
     /// Whitespace (space, tab, etc)
     Whitespace(Whitespace),
     /// Double equals sign `==`
     DoubleEq,
     /// Equality operator `=`
     Eq,
     /// Not Equals operator `<>` (or `!=` in some dialects)
     Neq,
     /// Less Than operator `<`
     Lt,
     /// Greater Than operator `>`
     Gt,
     /// Less Than Or Equals operator `<=`
     LtEq,
     /// Greater Than Or Equals operator `>=`
     GtEq,
     /// Spaceship operator <=>
     Spaceship,
     /// Plus operator `+`
     Plus,
     /// Minus operator `-`
     Minus,
     /// Multiplication operator `*`
     Mul,
     /// Division operator `/`
     Div,
     /// Integer division operator `//` in DuckDB
     DuckIntDiv,
     /// Modulo Operator `%`
     Mod,
     /// String concatenation `||`
     StringConcat,
     /// Left parenthesis `(`
     LParen,
     /// Right parenthesis `)`
     RParen,
     /// Period (used for compound identifiers or projections into nested types)
     Period,
     /// Colon `:`
     Colon,
     /// DoubleColon `::` (used for casting in PostgreSQL)
     DoubleColon,
     /// Assignment `:=` (used for keyword argument in DuckDB macros and some functions, and for variable declarations in DuckDB and Snowflake)
     Assignment,
     /// SemiColon `;` used as separator for COPY and payload
     SemiColon,
     /// Backslash `\` used in terminating the COPY payload with `\.`
     Backslash,
     /// Left bracket `[`
     LBracket,
     /// Right bracket `]`
     RBracket,
     /// Ampersand `&`
     Ampersand,
     /// Pipe `|`
     Pipe,
     /// Caret `^`
     Caret,
     /// Left brace `{`
     LBrace,
     /// Right brace `}`
     RBrace,
     /// Right Arrow `=>`
     RArrow,
     /// Sharp `#` used for PostgreSQL Bitwise XOR operator
     Sharp,
     /// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator
     Tilde,
     /// `~*` , a case insensitive match regular expression operator in PostgreSQL
     TildeAsterisk,
     /// `!~` , a case sensitive not match regular expression operator in PostgreSQL
     ExclamationMarkTilde,
     /// `!~*` , a case insensitive not match regular expression operator in PostgreSQL
     ExclamationMarkTildeAsterisk,
     /// `~~`, a case sensitive match pattern operator in PostgreSQL
     DoubleTilde,
     /// `~~*`, a case insensitive match pattern operator in PostgreSQL
     DoubleTildeAsterisk,
     /// `!~~`, a case sensitive not match pattern operator in PostgreSQL
     ExclamationMarkDoubleTilde,
     /// `!~~*`, a case insensitive not match pattern operator in PostgreSQL
     ExclamationMarkDoubleTildeAsterisk,
     /// `<<`, a bitwise shift left operator in PostgreSQL
     ShiftLeft,
     /// `>>`, a bitwise shift right operator in PostgreSQL
     ShiftRight,
     /// `&&`, an overlap operator in PostgreSQL
     Overlap,
     /// Exclamation Mark `!` used for PostgreSQL factorial operator
     ExclamationMark,
     /// Double Exclamation Mark `!!` used for PostgreSQL prefix factorial operator
     DoubleExclamationMark,
     /// AtSign `@` used for PostgreSQL abs operator
     AtSign,
     /// `^@`, a "starts with" string operator in PostgreSQL
     CaretAt,
     /// `|/`, a square root math operator in PostgreSQL
     PGSquareRoot,
     /// `||/`, a cube root math operator in PostgreSQL
     PGCubeRoot,
     /// `?` or `$` , a prepared statement arg placeholder
     Placeholder(String),
     /// `->`, used as a operator to extract json field in PostgreSQL
     Arrow,
     /// `->>`, used as a operator to extract json field as text in PostgreSQL
     LongArrow,
     /// `#>`, extracts JSON sub-object at the specified path
     HashArrow,
     /// `#>>`, extracts JSON sub-object at the specified path as text
     HashLongArrow,
     /// jsonb @> jsonb -> boolean: Test whether left json contains the right json
     AtArrow,
     /// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
     ArrowAt,
     /// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
     /// path, where path elements can be either field keys or array indexes.
     HashMinus,
     /// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
     /// JSON value?
     AtQuestion,
     /// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
     /// for the specified JSON value. Only the first item of the result is taken into
     /// account. If the result is not Boolean, then NULL is returned.
     AtAt,
     /// jsonb ? text -> boolean: Checks whether the string exists as a top-level key within the
     /// jsonb object
     Question,
     /// jsonb ?& text[] -> boolean: Check whether all members of the text array exist as top-level
     /// keys within the jsonb object
     QuestionAnd,
     /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
     /// keys within the jsonb object
     QuestionPipe,
     /// Custom binary operator
     /// This is used to represent any custom binary operator that is not part of the SQL standard.
     /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
     CustomBinaryOperator(String),
 }

 impl fmt::Display for Token {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Token::EOF => f.write_str("EOF"),
             Token::Word(ref w) => write!(f, "{w}"),
             Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }),
             Token::Char(ref c) => write!(f, "{c}"),
             Token::SingleQuotedString(ref s) => write!(f, "'{s}'"),
             Token::TripleSingleQuotedString(ref s) => write!(f, "'''{s}'''"),
             Token::DoubleQuotedString(ref s) => write!(f, "\"{s}\""),
             Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
             Token::DollarQuotedString(ref s) => write!(f, "{s}"),
             Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
             Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
             Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
             Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
             Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
             Token::TripleSingleQuotedByteStringLiteral(ref s) => write!(f, "B'''{s}'''"),
             Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
             Token::TripleDoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"\"\"{s}\"\"\""),
             Token::SingleQuotedRawStringLiteral(ref s) => write!(f, "R'{s}'"),
             Token::DoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"{s}\""),
             Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
             Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
             Token::Comma => f.write_str(","),
             Token::Whitespace(ws) => write!(f, "{ws}"),
             Token::DoubleEq => f.write_str("=="),
             Token::Spaceship => f.write_str("<=>"),
             Token::Eq => f.write_str("="),
             Token::Neq => f.write_str("<>"),
             Token::Lt => f.write_str("<"),
             Token::Gt => f.write_str(">"),
             Token::LtEq => f.write_str("<="),
             Token::GtEq => f.write_str(">="),
             Token::Plus => f.write_str("+"),
             Token::Minus => f.write_str("-"),
             Token::Mul => f.write_str("*"),
             Token::Div => f.write_str("/"),
             Token::DuckIntDiv => f.write_str("//"),
             Token::StringConcat => f.write_str("||"),
             Token::Mod => f.write_str("%"),
             Token::LParen => f.write_str("("),
             Token::RParen => f.write_str(")"),
             Token::Period => f.write_str("."),
             Token::Colon => f.write_str(":"),
             Token::DoubleColon => f.write_str("::"),
             Token::Assignment => f.write_str(":="),
             Token::SemiColon => f.write_str(";"),
             Token::Backslash => f.write_str("\\"),
             Token::LBracket => f.write_str("["),
             Token::RBracket => f.write_str("]"),
             Token::Ampersand => f.write_str("&"),
             Token::Caret => f.write_str("^"),
             Token::Pipe => f.write_str("|"),
             Token::LBrace => f.write_str("{"),
             Token::RBrace => f.write_str("}"),
             Token::RArrow => f.write_str("=>"),
             Token::Sharp => f.write_str("#"),
             Token::ExclamationMark => f.write_str("!"),
             Token::DoubleExclamationMark => f.write_str("!!"),
             Token::Tilde => f.write_str("~"),
             Token::TildeAsterisk => f.write_str("~*"),
             Token::ExclamationMarkTilde => f.write_str("!~"),
             Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"),
             Token::DoubleTilde => f.write_str("~~"),
             Token::DoubleTildeAsterisk => f.write_str("~~*"),
             Token::ExclamationMarkDoubleTilde => f.write_str("!~~"),
             Token::ExclamationMarkDoubleTildeAsterisk => f.write_str("!~~*"),
             Token::AtSign => f.write_str("@"),
             Token::CaretAt => f.write_str("^@"),
             Token::ShiftLeft => f.write_str("<<"),
             Token::ShiftRight => f.write_str(">>"),
             Token::Overlap => f.write_str("&&"),
             Token::PGSquareRoot => f.write_str("|/"),
             Token::PGCubeRoot => f.write_str("||/"),
             Token::Placeholder(ref s) => write!(f, "{s}"),
             Token::Arrow => write!(f, "->"),
             Token::LongArrow => write!(f, "->>"),
             Token::HashArrow => write!(f, "#>"),
             Token::HashLongArrow => write!(f, "#>>"),
             Token::AtArrow => write!(f, "@>"),
             Token::ArrowAt => write!(f, "<@"),
             Token::HashMinus => write!(f, "#-"),
             Token::AtQuestion => write!(f, "@?"),
             Token::AtAt => write!(f, "@@"),
             Token::Question => write!(f, "?"),
             Token::QuestionAnd => write!(f, "?&"),
             Token::QuestionPipe => write!(f, "?|"),
             Token::CustomBinaryOperator(s) => f.write_str(s),
         }
     }
 }

 impl Token {
     pub fn make_keyword(keyword: &str) -> Self {
         Token::make_word(keyword, None)
     }

     pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
         let word_uppercase = word.to_uppercase();
         Token::Word(Word {
             value: word.to_string(),
             quote_style,
             keyword: if quote_style.is_none() {
                 let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str());
                 keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x])
             } else {
                 Keyword::NoKeyword
             },
         })
     }
 }

 /// A keyword (like SELECT) or an optionally quoted SQL identifier
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Word {
     /// The value of the token, without the enclosing quotes, and with the
     /// escape sequences (if any) processed (TODO: escapes are not handled)
     pub value: String,
     /// An identifier can be "quoted" (&lt;delimited identifier> in ANSI parlance).
     /// The standard and most implementations allow using double quotes for this,
     /// but some implementations support other quoting styles as well (e.g. \[MS SQL])
     pub quote_style: Option<char>,
     /// If the word was not quoted and it matched one of the known keywords,
     /// this will have one of the values from dialect::keywords, otherwise empty
     pub keyword: Keyword,
 }

 impl fmt::Display for Word {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.quote_style {
             Some(s) if s == '"' || s == '[' || s == '`' => {
                 write!(f, "{}{}{}", s, self.value, Word::matching_end_quote(s))
             }
             None => f.write_str(&self.value),
             _ => panic!("Unexpected quote_style!"),
         }
     }
 }

 impl Word {
     fn matching_end_quote(ch: char) -> char {
         match ch {
             '"' => '"', // ANSI and most dialects
             '[' => ']', // MS SQL
             '`' => '`', // MySQL
             _ => panic!("unexpected quoting style!"),
         }
     }
 }

 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub enum Whitespace {
     Space,
     Newline,
     Tab,
     SingleLineComment { comment: String, prefix: String },
     MultiLineComment(String),
 }

 impl fmt::Display for Whitespace {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Whitespace::Space => f.write_str(" "),
             Whitespace::Newline => f.write_str("\n"),
             Whitespace::Tab => f.write_str("\t"),
             Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"),
             Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"),
         }
     }
 }

 /// Location in input string
 #[derive(Debug, Eq, PartialEq, Clone, Copy)]
 pub struct Location {
     /// Line number, starting from 1
     pub line: u64,
     /// Line column, starting from 1
     pub column: u64,
 }

 impl fmt::Display for Location {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if self.line == 0 {
             return Ok(());
         }
         write!(
             f,
             // TODO: use standard compiler location syntax (<path>:<line>:<col>)
             " at Line: {}, Column: {}",
             self.line, self.column,
         )
     }
 }

 /// A [Token] with [Location] attached to it
 #[derive(Debug, Eq, PartialEq, Clone)]
 pub struct TokenWithLocation {
     pub token: Token,
     pub location: Location,
 }

 impl TokenWithLocation {
     pub fn new(token: Token, line: u64, column: u64) -> TokenWithLocation {
         TokenWithLocation {
             token,
             location: Location { line, column },
         }
     }

     pub fn wrap(token: Token) -> TokenWithLocation {
         TokenWithLocation::new(token, 0, 0)
     }
 }

 impl PartialEq<Token> for TokenWithLocation {
     fn eq(&self, other: &Token) -> bool {
         &self.token == other
     }
 }

 impl PartialEq<TokenWithLocation> for Token {
     fn eq(&self, other: &TokenWithLocation) -> bool {
         self == &other.token
     }
 }

 impl fmt::Display for TokenWithLocation {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         self.token.fmt(f)
     }
 }

 /// Tokenizer error
 #[derive(Debug, PartialEq, Eq)]
 pub struct TokenizerError {
     pub message: String,
     pub location: Location,
 }

 impl fmt::Display for TokenizerError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "{}{}", self.message, self.location,)
     }
 }

 #[cfg(feature = "std")]
 impl std::error::Error for TokenizerError {}

 struct State<'a> {
     peekable: Peekable<Chars<'a>>,
     pub line: u64,
     pub col: u64,
 }

 impl<'a> State<'a> {
     /// return the next character and advance the stream
     pub fn next(&mut self) -> Option<char> {
         match self.peekable.next() {
             None => None,
             Some(s) => {
                 if s == '\n' {
                     self.line += 1;
                     self.col = 1;
                 } else {
                     self.col += 1;
                 }
                 Some(s)
             }
         }
     }

     /// return the next character but do not advance the stream
     pub fn peek(&mut self) -> Option<&char> {
         self.peekable.peek()
     }

     pub fn location(&self) -> Location {
         Location {
             line: self.line,
             column: self.col,
         }
     }
 }

 /// Represents how many quote characters enclose a string literal.
 #[derive(Copy, Clone)]
 enum NumStringQuoteChars {
     /// e.g. `"abc"`, `'abc'`, `r'abc'`
     One,
     /// e.g. `"""abc"""`, `'''abc'''`, `r'''abc'''`
     Many(NonZeroU8),
 }

 /// Settings for tokenizing a quoted string literal.
 struct TokenizeQuotedStringSettings {
     /// The character used to quote the string.
     quote_style: char,
     /// Represents how many quotes characters enclose the string literal.
     num_quote_chars: NumStringQuoteChars,
     /// The number of opening quotes left to consume, before parsing
     /// the remaining string literal.
     /// For example: given initial string `"""abc"""`. If the caller has
     /// already parsed the first quote for some reason, then this value
     /// is set to 1, flagging to look to consume only 2 leading quotes.
     num_opening_quotes_to_consume: u8,
     /// True if the string uses backslash escaping of special characters
     /// e.g `'abc\ndef\'ghi'
     backslash_escape: bool,
 }

 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
     dialect: &'a dyn Dialect,
     query: &'a str,
     /// If true (the default), the tokenizer will un-escape literal
     /// SQL strings See [`Tokenizer::with_unescape`] for more details.
     unescape: bool,
 }

 impl<'a> Tokenizer<'a> {
     /// Create a new SQL tokenizer for the specified SQL statement
     ///
     /// ```
     /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer};
     /// # use sqlparser::dialect::GenericDialect;
     /// # let dialect = GenericDialect{};
     /// let query = r#"SELECT 'foo'"#;
     ///
     /// // Parsing the query
     /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
     ///
     /// assert_eq!(tokens, vec![
     ///   Token::make_word("SELECT", None),
     ///   Token::Whitespace(Whitespace::Space),
     ///   Token::SingleQuotedString("foo".to_string()),
     /// ]);
     pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
         Self {
             dialect,
             query,
             unescape: true,
         }
     }

     /// Set unescape mode
     ///
     /// When true (default) the tokenizer unescapes literal values
     /// (for example, `""` in SQL is unescaped to the literal `"`).
     ///
     /// When false, the tokenizer provides the raw strings as provided
     /// in the query.  This can be helpful for programs that wish to
     /// recover the *exact* original query text without normalizing
     /// the escaping
     ///
     /// # Example
     ///
     /// ```
     /// # use sqlparser::tokenizer::{Token, Tokenizer};
     /// # use sqlparser::dialect::GenericDialect;
     /// # let dialect = GenericDialect{};
     /// let query = r#""Foo "" Bar""#;
     /// let unescaped = Token::make_word(r#"Foo " Bar"#, Some('"'));
     /// let original  = Token::make_word(r#"Foo "" Bar"#, Some('"'));
     ///
     /// // Parsing with unescaping (default)
     /// let tokens = Tokenizer::new(&dialect, &query).tokenize().unwrap();
     /// assert_eq!(tokens, vec![unescaped]);
     ///
     /// // Parsing with unescape = false
     /// let tokens = Tokenizer::new(&dialect, &query)
     ///    .with_unescape(false)
     ///    .tokenize().unwrap();
     /// assert_eq!(tokens, vec![original]);
     /// ```
     pub fn with_unescape(mut self, unescape: bool) -> Self {
         self.unescape = unescape;
         self
     }

     /// Tokenize the statement and produce a vector of tokens
     pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
         let twl = self.tokenize_with_location()?;
         Ok(twl.into_iter().map(|t| t.token).collect())
     }

     /// Tokenize the statement and produce a vector of tokens with location information
     pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
         let mut tokens: Vec<TokenWithLocation> = vec![];
         self.tokenize_with_location_into_buf(&mut tokens)
             .map(|_| tokens)
     }

     /// Tokenize the statement and append tokens with location information into the provided buffer.
     /// If an error is thrown, the buffer will contain all tokens that were successfully parsed before the error.
     pub fn tokenize_with_location_into_buf(
         &mut self,
         buf: &mut Vec<TokenWithLocation>,
     ) -> Result<(), TokenizerError> {
         let mut state = State {
             peekable: self.query.chars().peekable(),
             line: 1,
             col: 1,
         };

         let mut location = state.location();
         while let Some(token) = self.next_token(&mut state)? {
             buf.push(TokenWithLocation { token, location });

             location = state.location();
         }
         Ok(())
     }

     // Tokenize the identifier or keywords in `ch`
     fn tokenize_identifier_or_keyword(
         &self,
         ch: impl IntoIterator<Item = char>,
         chars: &mut State,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next(); // consume the first char
         let ch: String = ch.into_iter().collect();
         let word = self.tokenize_word(ch, chars);

         // TODO: implement parsing of exponent here
         if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
             let mut inner_state = State {
                 peekable: word.chars().peekable(),
                 line: 0,
                 col: 0,
             };
             let mut s = peeking_take_while(&mut inner_state, |ch| matches!(ch, '0'..='9' | '.'));
             let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.'));
             s += s2.as_str();
             return Ok(Some(Token::Number(s, false)));
         }

         Ok(Some(Token::make_word(&word, None)))
     }

     /// Get the next token or return None
     fn next_token(&self, chars: &mut State) -> Result<Option<Token>, TokenizerError> {
         match chars.peek() {
             Some(&ch) => match ch {
                 ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
                 '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
                 '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
                 '\r' => {
                     // Emit a single Whitespace::Newline token for \r and \r\n
                     chars.next();
                     if let Some('\n') = chars.peek() {
                         chars.next();
                     }
                     Ok(Some(Token::Whitespace(Whitespace::Newline)))
                 }
                 // BigQuery uses b or B for byte string literal
                 b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('\'') => {
                             if self.dialect.supports_triple_quoted_string() {
                                 return self
                                     .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                         chars,
                                         '\'',
                                         false,
                                         Token::SingleQuotedByteStringLiteral,
                                         Token::TripleSingleQuotedByteStringLiteral,
                                     );
                             }
                             let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
                             Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
                         }
                         Some('\"') => {
                             if self.dialect.supports_triple_quoted_string() {
                                 return self
                                     .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                         chars,
                                         '"',
                                         false,
                                         Token::DoubleQuotedByteStringLiteral,
                                         Token::TripleDoubleQuotedByteStringLiteral,
                                     );
                             }
                             let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
                             Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "b" or "B"
                             let s = self.tokenize_word(b, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // BigQuery uses r or R for raw string literal
                 b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('\'') => self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '\'',
                                 false,
                                 Token::SingleQuotedRawStringLiteral,
                                 Token::TripleSingleQuotedRawStringLiteral,
                             ),
                         Some('\"') => self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '"',
                                 false,
                                 Token::DoubleQuotedRawStringLiteral,
                                 Token::TripleDoubleQuotedRawStringLiteral,
                             ),
                         _ => {
                             // regular identifier starting with an "r" or "R"
                             let s = self.tokenize_word(b, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // Redshift uses lower case n for national string literal
                 n @ 'N' | n @ 'n' => {
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             // N'...' - a <national character string literal>
                             let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
                             Ok(Some(Token::NationalStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "N"
                             let s = self.tokenize_word(n, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
                 x @ 'e' | x @ 'E' => {
                     let starting_loc = chars.location();
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             let s =
                                 self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
                             Ok(Some(Token::EscapedStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "E" or "e"
                             let s = self.tokenize_word(x, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL
                 x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => {
                     chars.next(); // consume, to check the next char
                     if chars.peek() == Some(&'&') {
                         // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier
                         let mut chars_clone = chars.peekable.clone();
                         chars_clone.next(); // consume the '&' in the clone
                         if chars_clone.peek() == Some(&'\'') {
                             chars.next(); // consume the '&' in the original iterator
                             let s = unescape_unicode_single_quoted_string(chars)?;
                             return Ok(Some(Token::UnicodeStringLiteral(s)));
                         }
                     }
                     // regular identifier starting with an "U" or "u"
                     let s = self.tokenize_word(x, chars);
                     Ok(Some(Token::make_word(&s, None)))
                 }
                 // The spec only allows an uppercase 'X' to introduce a hex
                 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
                 x @ 'x' | x @ 'X' => {
                     chars.next(); // consume, to check the next char
                     match chars.peek() {
                         Some('\'') => {
                             // X'...' - a <binary string literal>
                             let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
                             Ok(Some(Token::HexStringLiteral(s)))
                         }
                         _ => {
                             // regular identifier starting with an "X"
                             let s = self.tokenize_word(x, chars);
                             Ok(Some(Token::make_word(&s, None)))
                         }
                     }
                 }
                 // single quoted string
                 '\'' => {
                     if self.dialect.supports_triple_quoted_string() {
                         return self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '\'',
                                 self.dialect.supports_string_literal_backslash_escape(),
                                 Token::SingleQuotedString,
                                 Token::TripleSingleQuotedString,
                             );
                     }
                     let s = self.tokenize_single_quoted_string(
                         chars,
                         '\'',
                         self.dialect.supports_string_literal_backslash_escape(),
                     )?;

                     Ok(Some(Token::SingleQuotedString(s)))
                 }
                 // double quoted string
                 '\"' if !self.dialect.is_delimited_identifier_start(ch)
                     && !self.dialect.is_identifier_start(ch) =>
                 {
                     if self.dialect.supports_triple_quoted_string() {
                         return self
                             .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                 chars,
                                 '"',
                                 self.dialect.supports_string_literal_backslash_escape(),
                                 Token::DoubleQuotedString,
                                 Token::TripleDoubleQuotedString,
                             );
                     }
                     let s = self.tokenize_single_quoted_string(
                         chars,
                         '"',
                         self.dialect.supports_string_literal_backslash_escape(),
                     )?;

                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
                 quote_start
                     if self.dialect.is_delimited_identifier_start(ch)
                         && self
                             .dialect
                             .is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
                 {
                     let error_loc = chars.location();
                     chars.next(); // consume the opening quote
                     let quote_end = Word::matching_end_quote(quote_start);
                     let (s, last_char) = self.parse_quoted_ident(chars, quote_end);

                     if last_char == Some(quote_end) {
                         Ok(Some(Token::make_word(&s, Some(quote_start))))
                     } else {
                         self.tokenizer_error(
                             error_loc,
                             format!("Expected close delimiter '{quote_end}' before EOF."),
                         )
                     }
                 }
                 // numbers and period
                 '0'..='9' | '.' => {
                     let mut s = peeking_take_while(chars, |ch| ch.is_ascii_digit());

                     // match binary literal that starts with 0x
                     if s == "0" && chars.peek() == Some(&'x') {
                         chars.next();
                         let s2 = peeking_take_while(chars, |ch| ch.is_ascii_hexdigit());
                         return Ok(Some(Token::HexStringLiteral(s2)));
                     }

                     // match one period
                     if let Some('.') = chars.peek() {
                         s.push('.');
                         chars.next();
                     }
                     s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());

                     // No number -> Token::Period
                     if s == "." {
                         return Ok(Some(Token::Period));
                     }

                     let mut exponent_part = String::new();
                     // Parse exponent as number
                     if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
                         let mut char_clone = chars.peekable.clone();
                         exponent_part.push(char_clone.next().unwrap());

                         // Optional sign
                         match char_clone.peek() {
                             Some(&c) if matches!(c, '+' | '-') => {
                                 exponent_part.push(c);
                                 char_clone.next();
                             }
                             _ => (),
                         }

                         match char_clone.peek() {
                             // Definitely an exponent, get original iterator up to speed and use it
                             Some(&c) if c.is_ascii_digit() => {
                                 for _ in 0..exponent_part.len() {
                                     chars.next();
                                 }
                                 exponent_part +=
                                     &peeking_take_while(chars, |ch| ch.is_ascii_digit());
                                 s += exponent_part.as_str();
                             }
                             // Not an exponent, discard the work done
                             _ => (),
                         }
                     }

                     // mysql dialect supports identifiers that start with a numeric prefix,
                     // as long as they aren't an exponent number.
                     if self.dialect.supports_numeric_prefix() && exponent_part.is_empty() {
                         let word =
                             peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));

                         if !word.is_empty() {
                             s += word.as_str();
                             return Ok(Some(Token::make_word(s.as_str(), None)));
                         }
                     }

                     let long = if chars.peek() == Some(&'L') {
                         chars.next();
                         true
                     } else {
                         false
                     };
                     Ok(Some(Token::Number(s, long)))
                 }
                 // punctuation
                 '(' => self.consume_and_return(chars, Token::LParen),
                 ')' => self.consume_and_return(chars, Token::RParen),
                 ',' => self.consume_and_return(chars, Token::Comma),
                 // operators
                 '-' => {
                     chars.next(); // consume the '-'
                     match chars.peek() {
                         Some('-') => {
                             chars.next(); // consume the second '-', starting a single-line comment
                             let comment = self.tokenize_single_line_comment(chars);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                 prefix: "--".to_owned(),
                                 comment,
                             })))
                         }
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
                                 _ => self.start_binop(chars, "->", Token::Arrow),
                             }
                         }
                         // a regular '-' operator
                         _ => self.start_binop(chars, "-", Token::Minus),
                     }
                 }
                 '/' => {
                     chars.next(); // consume the '/'
                     match chars.peek() {
                         Some('*') => {
                             chars.next(); // consume the '*', starting a multi-line comment
                             self.tokenize_multiline_comment(chars)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
                             let comment = self.tokenize_single_line_comment(chars);
                             Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                 prefix: "//".to_owned(),
                                 comment,
                             })))
                         }
                         Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
                             self.consume_and_return(chars, Token::DuckIntDiv)
                         }
                         // a regular '/' operator
                         _ => Ok(Some(Token::Div)),
                     }
                 }
                 '+' => self.consume_and_return(chars, Token::Plus),
                 '*' => self.consume_and_return(chars, Token::Mul),
                 '%' => {
                     chars.next(); // advance past '%'
                     match chars.peek() {
                         Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
                         Some(sch) if self.dialect.is_identifier_start('%') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => self.start_binop(chars, "%", Token::Mod),
                     }
                 }
                 '|' => {
                     chars.next(); // consume the '|'
                     match chars.peek() {
                         Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
                         Some('|') => {
                             chars.next(); // consume the second '|'
                             match chars.peek() {
                                 Some('/') => {
                                     self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
                                 }
                                 _ => self.start_binop(chars, "||", Token::StringConcat),
                             }
                         }
                         // Bitshift '|' operator
                         _ => self.start_binop(chars, "|", Token::Pipe),
                     }
                 }
                 '=' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('>') => self.consume_and_return(chars, Token::RArrow),
                         Some('=') => self.consume_and_return(chars, Token::DoubleEq),
                         _ => Ok(Some(Token::Eq)),
                     }
                 }
                 '!' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => self.consume_and_return(chars, Token::Neq),
                         Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => self
                                     .consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
                                 Some('~') => {
                                     chars.next();
                                     match chars.peek() {
                                         Some('*') => self.consume_and_return(
                                             chars,
                                             Token::ExclamationMarkDoubleTildeAsterisk,
                                         ),
                                         _ => Ok(Some(Token::ExclamationMarkDoubleTilde)),
                                     }
                                 }
                                 _ => Ok(Some(Token::ExclamationMarkTilde)),
                             }
                         }
                         _ => Ok(Some(Token::ExclamationMark)),
                     }
                 }
                 '<' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
                                 _ => self.start_binop(chars, "<=", Token::LtEq),
                             }
                         }
                         Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
                         Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
                         Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
                         _ => self.start_binop(chars, "<", Token::Lt),
                     }
                 }
                 '>' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
                         Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
                         _ => self.start_binop(chars, ">", Token::Gt),
                     }
                 }
                 ':' => {
                     chars.next();
                     match chars.peek() {
                         Some(':') => self.consume_and_return(chars, Token::DoubleColon),
                         Some('=') => self.consume_and_return(chars, Token::Assignment),
                         _ => Ok(Some(Token::Colon)),
                     }
                 }
                 ';' => self.consume_and_return(chars, Token::SemiColon),
                 '\\' => self.consume_and_return(chars, Token::Backslash),
                 '[' => self.consume_and_return(chars, Token::LBracket),
                 ']' => self.consume_and_return(chars, Token::RBracket),
                 '&' => {
                     chars.next(); // consume the '&'
                     match chars.peek() {
                         Some('&') => {
                             chars.next(); // consume the second '&'
                             self.start_binop(chars, "&&", Token::Overlap)
                         }
                         // Bitshift '&' operator
                         _ => self.start_binop(chars, "&", Token::Ampersand),
                     }
                 }
                 '^' => {
                     chars.next(); // consume the '^'
                     match chars.peek() {
                         Some('@') => self.consume_and_return(chars, Token::CaretAt),
                         _ => Ok(Some(Token::Caret)),
                     }
                 }
                 '{' => self.consume_and_return(chars, Token::LBrace),
                 '}' => self.consume_and_return(chars, Token::RBrace),
                 '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect) => {
                     chars.next(); // consume the '#', starting a snowflake single-line comment
                     let comment = self.tokenize_single_line_comment(chars);
                     Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "#".to_owned(),
                         comment,
                     })))
                 }
                 '~' => {
                     chars.next(); // consume
                     match chars.peek() {
                         Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
                         Some('~') => {
                             chars.next();
                             match chars.peek() {
                                 Some('*') => {
                                     self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
                                 }
                                 _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                             }
                         }
                         _ => self.start_binop(chars, "~", Token::Tilde),
                     }
                 }
                 '#' => {
                     chars.next();
                     match chars.peek() {
                         Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
                         Some('>') => {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => {
                                     self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
                                 }
                                 _ => self.start_binop(chars, "#>", Token::HashArrow),
                             }
                         }
                         Some(' ') => Ok(Some(Token::Sharp)),
                         Some(sch) if self.dialect.is_identifier_start('#') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => self.start_binop(chars, "#", Token::Sharp),
                     }
                 }
                 '@' => {
                     chars.next();
                     match chars.peek() {
                         Some('>') => self.consume_and_return(chars, Token::AtArrow),
                         Some('?') => self.consume_and_return(chars, Token::AtQuestion),
                         Some('@') => {
                             chars.next();
                             match chars.peek() {
                                 Some(' ') => Ok(Some(Token::AtAt)),
                                 Some(tch) if self.dialect.is_identifier_start('@') => {
                                     self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
                                 }
                                 _ => Ok(Some(Token::AtAt)),
                             }
                         }
                         Some(' ') => Ok(Some(Token::AtSign)),
                         Some(sch) if self.dialect.is_identifier_start('@') => {
                             self.tokenize_identifier_or_keyword([ch, *sch], chars)
                         }
                         _ => Ok(Some(Token::AtSign)),
                     }
                 }
                 // Postgres uses ? for jsonb operators, not prepared statements
                 '?' if dialect_of!(self is PostgreSqlDialect) => {
                     chars.next();
                     match chars.peek() {
                         Some('|') => self.consume_and_return(chars, Token::QuestionPipe),
                         Some('&') => self.consume_and_return(chars, Token::QuestionAnd),
                         _ => self.consume_and_return(chars, Token::Question),
                     }
                 }
                 '?' => {
                     chars.next();
                     let s = peeking_take_while(chars, |ch| ch.is_numeric());
                     Ok(Some(Token::Placeholder(String::from("?") + &s)))
                 }

                 // identifier or keyword
                 ch if self.dialect.is_identifier_start(ch) => {
                     self.tokenize_identifier_or_keyword([ch], chars)
                 }
                 '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),

                 //whitespace check (including unicode chars) should be last as it covers some of the chars above
                 ch if ch.is_whitespace() => {
                     self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
                 }
                 other => self.consume_and_return(chars, Token::Char(other)),
             },
             None => Ok(None),
         }
     }

     /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
     fn consume_for_binop(
         &self,
         chars: &mut State,
         prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next(); // consume the first char
         self.start_binop(chars, prefix, default)
     }

     /// parse a custom binary operator
     fn start_binop(
         &self,
         chars: &mut State,
         prefix: &str,
         default: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         let mut custom = None;
         while let Some(&ch) = chars.peek() {
             if !self.dialect.is_custom_operator_part(ch) {
                 break;
             }

             custom.get_or_insert_with(|| prefix.to_string()).push(ch);
             chars.next();
         }

         Ok(Some(
             custom.map(Token::CustomBinaryOperator).unwrap_or(default),
         ))
     }

     /// Tokenize dollar preceded value (i.e: a string/placeholder)
     fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
         let mut s = String::new();
         let mut value = String::new();

         chars.next();

         if let Some('$') = chars.peek() {
             chars.next();

             let mut is_terminated = false;
             let mut prev: Option<char> = None;

             while let Some(&ch) = chars.peek() {
                 if prev == Some('$') {
                     if ch == '$' {
                         chars.next();
                         is_terminated = true;
                         break;
                     } else {
                         s.push('$');
                         s.push(ch);
                     }
                 } else if ch != '$' {
                     s.push(ch);
                 }

                 prev = Some(ch);
                 chars.next();
             }

             return if chars.peek().is_none() && !is_terminated {
                 self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
             } else {
                 Ok(Token::DollarQuotedString(DollarQuotedString {
                     value: s,
                     tag: None,
                 }))
             };
         } else {
             value.push_str(&peeking_take_while(chars, |ch| {
                 ch.is_alphanumeric() || ch == '_'
             }));

             if let Some('$') = chars.peek() {
                 chars.next();

                 'searching_for_end: loop {
                     s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
                     match chars.peek() {
                         Some('$') => {
                             chars.next();
                             let mut maybe_s = String::from("$");
                             for c in value.chars() {
                                 if let Some(next_char) = chars.next() {
                                     maybe_s.push(next_char);
                                     if next_char != c {
                                         // This doesn't match the dollar quote delimiter so this
                                         // is not the end of the string.
                                         s.push_str(&maybe_s);
                                         continue 'searching_for_end;
                                     }
                                 } else {
                                     return self.tokenizer_error(
                                         chars.location(),
                                         "Unterminated dollar-quoted, expected $",
                                     );
                                 }
                             }
                             if chars.peek() == Some(&'$') {
                                 chars.next();
                                 maybe_s.push('$');
                                 // maybe_s matches the end delimiter
                                 break 'searching_for_end;
                             } else {
                                 // This also doesn't match the dollar quote delimiter as there are
                                 // more characters before the second dollar so this is not the end
                                 // of the string.
                                 s.push_str(&maybe_s);
                                 continue 'searching_for_end;
                             }
                         }
                         _ => {
                             return self.tokenizer_error(
                                 chars.location(),
                                 "Unterminated dollar-quoted, expected $",
                             )
                         }
                     }
                 }
             } else {
                 return Ok(Token::Placeholder(String::from("$") + &value));
             }
         }

         Ok(Token::DollarQuotedString(DollarQuotedString {
             value: s,
             tag: if value.is_empty() { None } else { Some(value) },
         }))
     }

     fn tokenizer_error<R>(
         &self,
         loc: Location,
         message: impl Into<String>,
     ) -> Result<R, TokenizerError> {
         Err(TokenizerError {
             message: message.into(),
             location: loc,
         })
     }

     // Consume characters until newline
     fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
         let mut comment = peeking_take_while(chars, |ch| ch != '\n');
         if let Some(ch) = chars.next() {
             assert_eq!(ch, '\n');
             comment.push(ch);
         }
         comment
     }

     /// Tokenize an identifier or keyword, after the first char is already consumed.
     fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
         let mut s = first_chars.into();
         s.push_str(&peeking_take_while(chars, |ch| {
             self.dialect.is_identifier_part(ch)
         }));
         s
     }

     /// Read a single quoted string, starting with the opening quote.
     fn tokenize_escaped_single_quoted_string(
         &self,
         starting_loc: Location,
         chars: &mut State,
     ) -> Result<String, TokenizerError> {
         if let Some(s) = unescape_single_quoted_string(chars) {
             return Ok(s);
         }

         self.tokenizer_error(starting_loc, "Unterminated encoded string literal")
     }

     /// Reads a string literal quoted by a single or triple quote characters.
     /// Examples: `'abc'`, `'''abc'''`, `"""abc"""`.
     fn tokenize_single_or_triple_quoted_string<F>(
         &self,
         chars: &mut State,
         quote_style: char,
         backslash_escape: bool,
         single_quote_token: F,
         triple_quote_token: F,
     ) -> Result<Option<Token>, TokenizerError>
     where
         F: Fn(String) -> Token,
     {
         let error_loc = chars.location();

         let mut num_opening_quotes = 0u8;
         for _ in 0..3 {
             if Some(&quote_style) == chars.peek() {
                 chars.next(); // Consume quote.
                 num_opening_quotes += 1;
             } else {
                 break;
             }
         }

         let (token_fn, num_quote_chars) = match num_opening_quotes {
             1 => (single_quote_token, NumStringQuoteChars::One),
             2 => {
                 // If we matched double quotes, then this is an empty string.
                 return Ok(Some(single_quote_token("".into())));
             }
             3 => {
                 let Some(num_quote_chars) = NonZeroU8::new(3) else {
                     return self.tokenizer_error(error_loc, "invalid number of opening quotes");
                 };
                 (
                     triple_quote_token,
                     NumStringQuoteChars::Many(num_quote_chars),
                 )
             }
             _ => {
                 return self.tokenizer_error(error_loc, "invalid string literal opening");
             }
         };

         let settings = TokenizeQuotedStringSettings {
             quote_style,
             num_quote_chars,
             num_opening_quotes_to_consume: 0,
             backslash_escape,
         };

         self.tokenize_quoted_string(chars, settings)
             .map(token_fn)
             .map(Some)
     }

     /// Reads a string literal quoted by a single quote character.
     fn tokenize_single_quoted_string(
         &self,
         chars: &mut State,
         quote_style: char,
         backslash_escape: bool,
     ) -> Result<String, TokenizerError> {
         self.tokenize_quoted_string(
             chars,
             TokenizeQuotedStringSettings {
                 quote_style,
                 num_quote_chars: NumStringQuoteChars::One,
                 num_opening_quotes_to_consume: 1,
                 backslash_escape,
             },
         )
     }

     /// Read a quoted string.
     fn tokenize_quoted_string(
         &self,
         chars: &mut State,
         settings: TokenizeQuotedStringSettings,
     ) -> Result<String, TokenizerError> {
         let mut s = String::new();
         let error_loc = chars.location();

         // Consume any opening quotes.
         for _ in 0..settings.num_opening_quotes_to_consume {
             if Some(settings.quote_style) != chars.next() {
                 return self.tokenizer_error(error_loc, "invalid string literal opening");
             }
         }

         let mut num_consecutive_quotes = 0;
         while let Some(&ch) = chars.peek() {
             let pending_final_quote = match settings.num_quote_chars {
                 NumStringQuoteChars::One => Some(NumStringQuoteChars::One),
                 n @ NumStringQuoteChars::Many(count)
                     if num_consecutive_quotes + 1 == count.get() =>
                 {
                     Some(n)
                 }
                 NumStringQuoteChars::Many(_) => None,
             };

             match ch {
                 char if char == settings.quote_style && pending_final_quote.is_some() => {
                     chars.next(); // consume

                     if let Some(NumStringQuoteChars::Many(count)) = pending_final_quote {
                         // For an initial string like `"""abc"""`, at this point we have
                         // `abc""` in the buffer and have now matched the final `"`.
                         // However, the string to return is simply `abc`, so we strip off
                         // the trailing quotes before returning.
                         let mut buf = s.chars();
                         for _ in 1..count.get() {
                             buf.next_back();
                         }
                         return Ok(buf.as_str().to_string());
                     } else if chars
                         .peek()
                         .map(|c| *c == settings.quote_style)
                         .unwrap_or(false)
                     {
                         s.push(ch);
                         if !self.unescape {
                             // In no-escape mode, the given query has to be saved completely
                             s.push(ch);
                         }
                         chars.next();
                     } else {
                         return Ok(s);
                     }
                 }
                 '\\' if settings.backslash_escape => {
                     // consume backslash
                     chars.next();

                     num_consecutive_quotes = 0;

                     if let Some(next) = chars.peek() {
                         if !self.unescape {
                             // In no-escape mode, the given query has to be saved completely including backslashes.
                             s.push(ch);
                             s.push(*next);
                             chars.next(); // consume next
                         } else {
                             let n = match next {
                                 '0' => '\0',
                                 'a' => '\u{7}',
                                 'b' => '\u{8}',
                                 'f' => '\u{c}',
                                 'n' => '\n',
                                 'r' => '\r',
                                 't' => '\t',
                                 'Z' => '\u{1a}',
                                 _ => *next,
                             };
                             s.push(n);
                             chars.next(); // consume next
                         }
                     }
                 }
                 ch => {
                     chars.next(); // consume ch

                     if ch == settings.quote_style {
                         num_consecutive_quotes += 1;
                     } else {
                         num_consecutive_quotes = 0;
                     }

                     s.push(ch);
                 }
             }
         }
         self.tokenizer_error(error_loc, "Unterminated string literal")
     }

     fn tokenize_multiline_comment(
         &self,
         chars: &mut State,
     ) -> Result<Option<Token>, TokenizerError> {
         let mut s = String::new();
         let mut nested = 1;
         let mut last_ch = ' ';

         loop {
             match chars.next() {
                 Some(ch) => {
                     if last_ch == '/' && ch == '*' {
                         nested += 1;
                     } else if last_ch == '*' && ch == '/' {
                         nested -= 1;
                         if nested == 0 {
                             s.pop();
                             break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
                         }
                     }
                     s.push(ch);
                     last_ch = ch;
                 }
                 None => {
                     break self.tokenizer_error(
                         chars.location(),
                         "Unexpected EOF while in a multi-line comment",
                     )
                 }
             }
         }
     }

     fn parse_quoted_ident(&self, chars: &mut State, quote_end: char) -> (String, Option<char>) {
         let mut last_char = None;
         let mut s = String::new();
         while let Some(ch) = chars.next() {
             if ch == quote_end {
                 if chars.peek() == Some(&quote_end) {
                     chars.next();
                     s.push(ch);
                     if !self.unescape {
                         // In no-escape mode, the given query has to be saved completely
                         s.push(ch);
                     }
                 } else {
                     last_char = Some(quote_end);
                     break;
                 }
             } else {
                 s.push(ch);
             }
         }
         (s, last_char)
     }

     #[allow(clippy::unnecessary_wraps)]
     fn consume_and_return(
         &self,
         chars: &mut State,
         t: Token,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next();
         Ok(Some(t))
     }
 }

 /// Read from `chars` until `predicate` returns `false` or EOF is hit.
 /// Return the characters read as String, and keep the first non-matching
 /// char available as `chars.next()`.
 fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool) -> String {
     let mut s = String::new();
     while let Some(&ch) = chars.peek() {
         if predicate(ch) {
             chars.next(); // consume
             s.push(ch);
         } else {
             break;
         }
     }
     s
 }

 fn unescape_single_quoted_string(chars: &mut State<'_>) -> Option<String> {
     Unescape::new(chars).unescape()
 }

 struct Unescape<'a: 'b, 'b> {
     chars: &'b mut State<'a>,
 }

 impl<'a: 'b, 'b> Unescape<'a, 'b> {
     fn new(chars: &'b mut State<'a>) -> Self {
         Self { chars }
     }
     fn unescape(mut self) -> Option<String> {
         let mut unescaped = String::new();

         self.chars.next();

         while let Some(c) = self.chars.next() {
             if c == '\'' {
                 // case: ''''
                 if self.chars.peek().map(|c| *c == '\'').unwrap_or(false) {
                     self.chars.next();
                     unescaped.push('\'');
                     continue;
                 }
                 return Some(unescaped);
             }

             if c != '\\' {
                 unescaped.push(c);
                 continue;
             }

             let c = match self.chars.next()? {
                 'b' => '\u{0008}',
                 'f' => '\u{000C}',
                 'n' => '\n',
                 'r' => '\r',
                 't' => '\t',
                 'u' => self.unescape_unicode_16()?,
                 'U' => self.unescape_unicode_32()?,
                 'x' => self.unescape_hex()?,
                 c if c.is_digit(8) => self.unescape_octal(c)?,
                 c => c,
             };

             unescaped.push(Self::check_null(c)?);
         }

         None
     }

     #[inline]
     fn check_null(c: char) -> Option<char> {
         if c == '\0' {
             None
         } else {
             Some(c)
         }
     }

     #[inline]
     fn byte_to_char<const RADIX: u32>(s: &str) -> Option<char> {
         // u32 is used here because Pg has an overflow operation rather than throwing an exception directly.
         match u32::from_str_radix(s, RADIX) {
             Err(_) => None,
             Ok(n) => {
                 let n = n & 0xFF;
                 if n <= 127 {
                     char::from_u32(n)
                 } else {
                     None
                 }
             }
         }
     }

     // Hexadecimal byte value. \xh, \xhh (h = 0–9, A–F)
     fn unescape_hex(&mut self) -> Option<char> {
         let mut s = String::new();

         for _ in 0..2 {
             match self.next_hex_digit() {
                 Some(c) => s.push(c),
                 None => break,
             }
         }

         if s.is_empty() {
             return Some('x');
         }

         Self::byte_to_char::<16>(&s)
     }

     #[inline]
     fn next_hex_digit(&mut self) -> Option<char> {
         match self.chars.peek() {
             Some(c) if c.is_ascii_hexdigit() => self.chars.next(),
             _ => None,
         }
     }

     // Octal byte value. \o, \oo, \ooo (o = 0–7)
     fn unescape_octal(&mut self, c: char) -> Option<char> {
         let mut s = String::new();

         s.push(c);
         for _ in 0..2 {
             match self.next_octal_digest() {
                 Some(c) => s.push(c),
                 None => break,
             }
         }

         Self::byte_to_char::<8>(&s)
     }

     #[inline]
     fn next_octal_digest(&mut self) -> Option<char> {
         match self.chars.peek() {
             Some(c) if c.is_digit(8) => self.chars.next(),
             _ => None,
         }
     }

     // 16-bit hexadecimal Unicode character value. \uxxxx (x = 0–9, A–F)
     fn unescape_unicode_16(&mut self) -> Option<char> {
         self.unescape_unicode::<4>()
     }

     // 32-bit hexadecimal Unicode character value. \Uxxxxxxxx (x = 0–9, A–F)
     fn unescape_unicode_32(&mut self) -> Option<char> {
         self.unescape_unicode::<8>()
     }

     fn unescape_unicode<const NUM: usize>(&mut self) -> Option<char> {
         let mut s = String::new();
         for _ in 0..NUM {
             s.push(self.chars.next()?);
         }
         match u32::from_str_radix(&s, 16) {
             Err(_) => None,
             Ok(n) => char::from_u32(n),
         }
     }
 }

 fn unescape_unicode_single_quoted_string(chars: &mut State<'_>) -> Result<String, TokenizerError> {
     let mut unescaped = String::new();
     chars.next(); // consume the opening quote
     while let Some(c) = chars.next() {
         match c {
             '\'' => {
                 if chars.peek() == Some(&'\'') {
                     chars.next();
                     unescaped.push('\'');
                 } else {
                     return Ok(unescaped);
                 }
             }
             '\\' => match chars.peek() {
                 Some('\\') => {
                     chars.next();
                     unescaped.push('\\');
                 }
                 Some('+') => {
                     chars.next();
                     unescaped.push(take_char_from_hex_digits(chars, 6)?);
                 }
                 _ => unescaped.push(take_char_from_hex_digits(chars, 4)?),
             },
             _ => {
                 unescaped.push(c);
             }
         }
     }
     Err(TokenizerError {
         message: "Unterminated unicode encoded string literal".to_string(),
         location: chars.location(),
     })
 }

 fn take_char_from_hex_digits(
     chars: &mut State<'_>,
     max_digits: usize,
 ) -> Result<char, TokenizerError> {
     let mut result = 0u32;
     for _ in 0..max_digits {
         let next_char = chars.next().ok_or_else(|| TokenizerError {
             message: "Unexpected EOF while parsing hex digit in escaped unicode string."
                 .to_string(),
             location: chars.location(),
         })?;
         let digit = next_char.to_digit(16).ok_or_else(|| TokenizerError {
             message: format!("Invalid hex digit in escaped unicode string: {}", next_char),
             location: chars.location(),
         })?;
         result = result * 16 + digit;
     }
     char::from_u32(result).ok_or_else(|| TokenizerError {
         message: format!("Invalid unicode character: {:x}", result),
         location: chars.location(),
     })
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::dialect::{
         BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect,
     };
     use core::fmt::Debug;

     #[test]
     fn tokenizer_error_impl() {
         let err = TokenizerError {
             message: "test".into(),
             location: Location { line: 1, column: 1 },
         };
         #[cfg(feature = "std")]
         {
             use std::error::Error;
             assert!(err.source().is_none());
         }
         assert_eq!(err.to_string(), "test at Line: 1, Column: 1");
     }

     #[test]
     fn tokenize_select_1() {
         let sql = String::from("SELECT 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_select_float() {
         let sql = String::from("SELECT .1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from(".1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_clickhouse_double_equal() {
         let sql = String::from("SELECT foo=='1'");
         let dialect = ClickHouseDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         let tokens = tokenizer.tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Word(Word {
                 value: "foo".to_string(),
                 quote_style: None,
                 keyword: Keyword::NoKeyword,
             }),
             Token::DoubleEq,
             Token::SingleQuotedString("1".to_string()),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_select_exponent() {
         let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e+10"), false),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
             Token::make_word("ea", None),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::make_word("a", None),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Minus,
             Token::Number(String::from("10"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_scalar_function() {
         let sql = String::from("SELECT sqrt(1)");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("sqrt", None),
             Token::LParen,
             Token::Number(String::from("1"), false),
             Token::RParen,
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_string_string_concat() {
         let sql = String::from("SELECT 'a' || 'b'");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("a")),
             Token::Whitespace(Whitespace::Space),
             Token::StringConcat,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("b")),
         ];

         compare(expected, tokens);
     }
     #[test]
     fn tokenize_bitwise_op() {
         let sql = String::from("SELECT one | two ^ three");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("one", None),
             Token::Whitespace(Whitespace::Space),
             Token::Pipe,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("two", None),
             Token::Whitespace(Whitespace::Space),
             Token::Caret,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("three", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_logical_xor() {
         let sql =
             String::from("SELECT true XOR true, false XOR false, true XOR false, false XOR true");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_simple_select() {
         let sql = String::from("SELECT * FROM customer WHERE id = 1 LIMIT 5");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("LIMIT"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_explain_select() {
         let sql = String::from("EXPLAIN SELECT * FROM customer WHERE id = 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("EXPLAIN"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_explain_analyze_select() {
         let sql = String::from("EXPLAIN ANALYZE SELECT * FROM customer WHERE id = 1");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("EXPLAIN"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("ANALYZE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
             Token::Whitespace(Whitespace::Space),
             Token::Eq,
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_string_predicate() {
         let sql = String::from("SELECT * FROM customer WHERE salary != 'Not Provided'");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("salary", None),
             Token::Whitespace(Whitespace::Space),
             Token::Neq,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("Not Provided")),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_invalid_string() {
         let sql = String::from("\n💝مصطفىh");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_newline_in_string_literal() {
         let sql = String::from("'foo\r\nbar\nbaz'");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_unterminated_string_literal() {
         let sql = String::from("select 'foo");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Unterminated string literal".to_string(),
                 location: Location { line: 1, column: 8 },
             })
         );
     }

     #[test]
     fn tokenize_unterminated_string_literal_utf8() {
         let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Unterminated string literal".to_string(),
                 location: Location {
                     line: 1,
                     column: 35
                 }
             })
         );
     }

     #[test]
     fn tokenize_invalid_string_cols() {
         let sql = String::from("\n\nSELECT * FROM table\t💝مصطفىh");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Whitespace(Whitespace::Newline),
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("table"),
             Token::Whitespace(Whitespace::Tab),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_tagged() {
         let sql = String::from(
             "SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
         );
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
                 tag: Some("tag".into()),
             }),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_tagged_unterminated() {
         let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$");
         let dialect = GenericDialect {};
         assert_eq!(
             Tokenizer::new(&dialect, &sql).tokenize(),
             Err(TokenizerError {
                 message: "Unterminated dollar-quoted, expected $".into(),
                 location: Location {
                     line: 1,
                     column: 91
                 }
             })
         );
     }

     #[test]
     fn tokenize_dollar_quoted_string_untagged() {
         let sql =
             String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "within dollar '$' quoted strings have $tags like this$ ".into(),
                 tag: None,
             }),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_dollar_quoted_string_untagged_unterminated() {
         let sql = String::from(
             "SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$",
         );
         let dialect = GenericDialect {};
         assert_eq!(
             Tokenizer::new(&dialect, &sql).tokenize(),
             Err(TokenizerError {
                 message: "Unterminated dollar-quoted string".into(),
                 location: Location {
                     line: 1,
                     column: 86
                 }
             })
         );
     }

     #[test]
     fn tokenize_right_arrow() {
         let sql = String::from("FUNCTION(key=>value)");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word("FUNCTION", None),
             Token::LParen,
             Token::make_word("key", None),
             Token::RArrow,
             Token::make_word("value", None),
             Token::RParen,
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_is_null() {
         let sql = String::from("a IS NULL");
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

         let expected = vec![
             Token::make_word("a", None),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("IS"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("NULL"),
         ];

         compare(expected, tokens);
     }

     #[test]
     fn tokenize_comment() {
         let sql = String::from("0--this is a comment\n1");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("0".to_string(), false),
             Token::Whitespace(Whitespace::SingleLineComment {
                 prefix: "--".to_string(),
                 comment: "this is a comment\n".to_string(),
             }),
             Token::Number("1".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_comment_at_eof() {
         let sql = String::from("--this is a comment");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
             prefix: "--".to_string(),
             comment: "this is a comment".to_string(),
         })];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_multiline_comment() {
         let sql = String::from("0/*multi-line\n* /comment*/1");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("0".to_string(), false),
             Token::Whitespace(Whitespace::MultiLineComment(
                 "multi-line\n* /comment".to_string(),
             )),
             Token::Number("1".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_nested_multiline_comment() {
         let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("0".to_string(), false),
             Token::Whitespace(Whitespace::MultiLineComment(
                 "multi-line\n* \n/* comment \n /*comment*/*/ */ /comment".to_string(),
             )),
             Token::Number("1".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_multiline_comment_with_even_asterisks() {
         let sql = String::from("\n/** Comment **/\n");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Newline),
             Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_unicode_whitespace() {
         let sql = String::from(" \u{2003}\n");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::Whitespace(Whitespace::Space),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_mismatched_quotes() {
         let sql = String::from("\"foo");

         let dialect = GenericDialect {};
         let mut tokenizer = Tokenizer::new(&dialect, &sql);
         assert_eq!(
             tokenizer.tokenize(),
             Err(TokenizerError {
                 message: "Expected close delimiter '\"' before EOF.".to_string(),
                 location: Location { line: 1, column: 1 },
             })
         );
     }

     #[test]
     fn tokenize_newlines() {
         let sql = String::from("line1\nline2\rline3\r\nline4\r");

         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word("line1", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line2", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line3", None),
             Token::Whitespace(Whitespace::Newline),
             Token::make_word("line4", None),
             Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_mssql_top() {
         let sql = "SELECT TOP 5 [bar] FROM foo";
         let dialect = MsSqlDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("TOP"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("bar", Some('[')),
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("foo", None),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_pg_regex_match() {
         let sql = "SELECT col ~ '^a', col ~* '^a', col !~ '^a', col !~* '^a'";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::Tilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::TildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_pg_like_match() {
         let sql = "SELECT col ~~ '_a%', col ~~* '_a%', col !~~ '_a%', col !~~* '_a%'";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::DoubleTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::DoubleTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTilde,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
             Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
             Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTildeAsterisk,
             Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_identifier() {
         let sql = r#" "a "" b" "a """ "c """"" "#;
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a " b"#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a ""#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """#, Some('"')),
             Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_snowflake_div() {
         let sql = r#"field/1000"#;
         let dialect = SnowflakeDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word(r#"field"#, None),
             Token::Div,
             Token::Number("1000".to_string(), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_identifier_with_no_escape() {
         let sql = r#" "a "" b" "a """ "c """"" "#;
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(false)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a "" b"#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a """#, Some('"')),
             Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """""#, Some('"')),
             Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_with_location() {
         let sql = "SELECT a,\n b";
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql)
             .tokenize_with_location()
             .unwrap();
         let expected = vec![
             TokenWithLocation::new(Token::make_keyword("SELECT"), 1, 1),
             TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 1, 7),
             TokenWithLocation::new(Token::make_word("a", None), 1, 8),
             TokenWithLocation::new(Token::Comma, 1, 9),
             TokenWithLocation::new(Token::Whitespace(Whitespace::Newline), 1, 10),
             TokenWithLocation::new(Token::Whitespace(Whitespace::Space), 2, 1),
             TokenWithLocation::new(Token::make_word("b", None), 2, 2),
         ];
         compare(expected, tokens);
     }

     fn compare<T: PartialEq + std::fmt::Debug>(expected: Vec<T>, actual: Vec<T>) {
         //println!("------------------------------");
         //println!("tokens   = {:?}", actual);
         //println!("expected = {:?}", expected);
         //println!("------------------------------");
         assert_eq!(expected, actual);
     }

     fn check_unescape(s: &str, expected: Option<&str>) {
         let s = format!("'{}'", s);
         let mut state = State {
             peekable: s.chars().peekable(),
             line: 0,
             col: 0,
         };

         assert_eq!(
             unescape_single_quoted_string(&mut state),
             expected.map(|s| s.to_string())
         );
     }

     #[test]
     fn test_unescape() {
         check_unescape(r"\b", Some("\u{0008}"));
         check_unescape(r"\f", Some("\u{000C}"));
         check_unescape(r"\t", Some("\t"));
         check_unescape(r"\r\n", Some("\r\n"));
         check_unescape(r"\/", Some("/"));
         check_unescape(r"/", Some("/"));
         check_unescape(r"\\", Some("\\"));

         // 16 and 32-bit hexadecimal Unicode character value
         check_unescape(r"\u0001", Some("\u{0001}"));
         check_unescape(r"\u4c91", Some("\u{4c91}"));
         check_unescape(r"\u4c916", Some("\u{4c91}6"));
         check_unescape(r"\u4c", None);
         check_unescape(r"\u0000", None);
         check_unescape(r"\U0010FFFF", Some("\u{10FFFF}"));
         check_unescape(r"\U00110000", None);
         check_unescape(r"\U00000000", None);
         check_unescape(r"\u", None);
         check_unescape(r"\U", None);
         check_unescape(r"\U1010FFFF", None);

         // hexadecimal byte value
         check_unescape(r"\x4B", Some("\u{004b}"));
         check_unescape(r"\x4", Some("\u{0004}"));
         check_unescape(r"\x4L", Some("\u{0004}L"));
         check_unescape(r"\x", Some("x"));
         check_unescape(r"\xP", Some("xP"));
         check_unescape(r"\x0", None);
         check_unescape(r"\xCAD", None);
         check_unescape(r"\xA9", None);

         // octal byte value
         check_unescape(r"\1", Some("\u{0001}"));
         check_unescape(r"\12", Some("\u{000a}"));
         check_unescape(r"\123", Some("\u{0053}"));
         check_unescape(r"\1232", Some("\u{0053}2"));
         check_unescape(r"\4", Some("\u{0004}"));
         check_unescape(r"\45", Some("\u{0025}"));
         check_unescape(r"\450", Some("\u{0028}"));
         check_unescape(r"\603", None);
         check_unescape(r"\0", None);
         check_unescape(r"\080", None);

         // others
         check_unescape(r"\9", Some("9"));
         check_unescape(r"''", Some("'"));
         check_unescape(
             r"Hello\r\nRust/\u4c91 SQL Parser\U0010ABCD\1232",
             Some("Hello\r\nRust/\u{4c91} SQL Parser\u{10abcd}\u{0053}2"),
         );
         check_unescape(r"Hello\0", None);
         check_unescape(r"Hello\xCADRust", None);
     }

     #[test]
     fn tokenize_numeric_prefix_trait() {
         #[derive(Debug)]
         struct NumericPrefixDialect;

         impl Dialect for NumericPrefixDialect {
             fn is_identifier_start(&self, ch: char) -> bool {
                 ch.is_ascii_lowercase()
                     || ch.is_ascii_uppercase()
                     || ch.is_ascii_digit()
                     || ch == '$'
             }

             fn is_identifier_part(&self, ch: char) -> bool {
                 ch.is_ascii_lowercase()
                     || ch.is_ascii_uppercase()
                     || ch.is_ascii_digit()
                     || ch == '_'
                     || ch == '$'
                     || ch == '{'
                     || ch == '}'
             }

             fn supports_numeric_prefix(&self) -> bool {
                 true
             }
         }

         tokenize_numeric_prefix_inner(&NumericPrefixDialect {});
         tokenize_numeric_prefix_inner(&HiveDialect {});
         tokenize_numeric_prefix_inner(&MySqlDialect {});
     }

     fn tokenize_numeric_prefix_inner(dialect: &dyn Dialect) {
         let sql = r#"SELECT * FROM 1"#;
         let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
             Token::Whitespace(Whitespace::Space),
             Token::Mul,
             Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
             Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
         compare(expected, tokens);
     }

     #[test]
     fn tokenize_quoted_string_escape() {
         let dialect = SnowflakeDialect {};
         for (sql, expected, expected_unescaped) in [
             (r#"'%a\'%b'"#, r#"%a\'%b"#, r#"%a'%b"#),
             (r#"'a\'\'b\'c\'d'"#, r#"a\'\'b\'c\'d"#, r#"a''b'c'd"#),
             (r#"'\\'"#, r#"\\"#, r#"\"#),
             (
                 r#"'\0\a\b\f\n\r\t\Z'"#,
                 r#"\0\a\b\f\n\r\t\Z"#,
                 "\0\u{7}\u{8}\u{c}\n\r\t\u{1a}",
             ),
             (r#"'\"'"#, r#"\""#, "\""),
             (r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
             (r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
             (r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
         ] {
             let tokens = Tokenizer::new(&dialect, sql)
                 .with_unescape(false)
                 .tokenize()
                 .unwrap();
             let expected = vec![Token::SingleQuotedString(expected.to_string())];
             compare(expected, tokens);

             let tokens = Tokenizer::new(&dialect, sql)
                 .with_unescape(true)
                 .tokenize()
                 .unwrap();
             let expected = vec![Token::SingleQuotedString(expected_unescaped.to_string())];
             compare(expected, tokens);
         }

         for sql in [r#"'\'"#, r#"'ab\'"#] {
             let mut tokenizer = Tokenizer::new(&dialect, sql);
             assert_eq!(
                 "Unterminated string literal",
                 tokenizer.tokenize().unwrap_err().message.as_str(),
             );
         }

         // Non-escape dialect
         for (sql, expected) in [(r#"'\'"#, r#"\"#), (r#"'ab\'"#, r#"ab\"#)] {
             let dialect = GenericDialect {};
             let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

             let expected = vec![Token::SingleQuotedString(expected.to_string())];

             compare(expected, tokens);
         }
     }

     #[test]
     fn tokenize_triple_quoted_string() {
         fn check<F>(
             q: char, // The quote character to test
             r: char, // An alternate quote character.
             quote_token: F,
         ) where
             F: Fn(String) -> Token,
         {
             let dialect = BigQueryDialect {};

             for (sql, expected, expected_unescaped) in [
                 // Empty string
                 (format!(r#"{q}{q}{q}{q}{q}{q}"#), "".into(), "".into()),
                 // Should not count escaped quote as end of string.
                 (
                     format!(r#"{q}{q}{q}ab{q}{q}\{q}{q}cd{q}{q}{q}"#),
                     format!(r#"ab{q}{q}\{q}{q}cd"#),
                     format!(r#"ab{q}{q}{q}{q}cd"#),
                 ),
                 // Simple string
                 (
                     format!(r#"{q}{q}{q}abc{q}{q}{q}"#),
                     "abc".into(),
                     "abc".into(),
                 ),
                 // Mix single-double quotes unescaped.
                 (
                     format!(r#"{q}{q}{q}ab{r}{r}{r}c{r}def{r}{r}{r}{q}{q}{q}"#),
                     format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                     format!("ab{r}{r}{r}c{r}def{r}{r}{r}"),
                 ),
                 // Escaped quote.
                 (
                     format!(r#"{q}{q}{q}ab{q}{q}c{q}{q}\{q}de{q}{q}f{q}{q}{q}"#),
                     format!(r#"ab{q}{q}c{q}{q}\{q}de{q}{q}f"#),
                     format!(r#"ab{q}{q}c{q}{q}{q}de{q}{q}f"#),
                 ),
                 // backslash-escaped quote characters.
                 (
                     format!(r#"{q}{q}{q}a\'\'b\'c\'d{q}{q}{q}"#),
                     r#"a\'\'b\'c\'d"#.into(),
                     r#"a''b'c'd"#.into(),
                 ),
                 // backslash-escaped characters
                 (
                     format!(r#"{q}{q}{q}abc\0\n\rdef{q}{q}{q}"#),
                     r#"abc\0\n\rdef"#.into(),
                     "abc\0\n\rdef".into(),
                 ),
             ] {
                 let tokens = Tokenizer::new(&dialect, sql.as_str())
                     .with_unescape(false)
                     .tokenize()
                     .unwrap();
                 let expected = vec![quote_token(expected.to_string())];
                 compare(expected, tokens);

                 let tokens = Tokenizer::new(&dialect, sql.as_str())
                     .with_unescape(true)
                     .tokenize()
                     .unwrap();
                 let expected = vec![quote_token(expected_unescaped.to_string())];
                 compare(expected, tokens);
             }

             for sql in [
                 format!(r#"{q}{q}{q}{q}{q}\{q}"#),
                 format!(r#"{q}{q}{q}abc{q}{q}\{q}"#),
                 format!(r#"{q}{q}{q}{q}"#),
                 format!(r#"{q}{q}{q}{r}{r}"#),
                 format!(r#"{q}{q}{q}abc{q}"#),
                 format!(r#"{q}{q}{q}abc{q}{q}"#),
                 format!(r#"{q}{q}{q}abc"#),
             ] {
                 let dialect = BigQueryDialect {};
                 let mut tokenizer = Tokenizer::new(&dialect, sql.as_str());
                 assert_eq!(
                     "Unterminated string literal",
                     tokenizer.tokenize().unwrap_err().message.as_str(),
                 );
             }
         }

         check('"', '\'', Token::TripleDoubleQuotedString);

         check('\'', '"', Token::TripleSingleQuotedString);

         let dialect = BigQueryDialect {};

         let sql = r#"""''"#;
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(true)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::DoubleQuotedString("".to_string()),
             Token::SingleQuotedString("".to_string()),
         ];
         compare(expected, tokens);

         let sql = r#"''"""#;
         let tokens = Tokenizer::new(&dialect, sql)
             .with_unescape(true)
             .tokenize()
             .unwrap();
         let expected = vec![
             Token::SingleQuotedString("".to_string()),
             Token::DoubleQuotedString("".to_string()),
         ];
         compare(expected, tokens);

         // Non-triple quoted string dialect
         let dialect = SnowflakeDialect {};
         let sql = r#"''''''"#;
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![Token::SingleQuotedString("''".to_string())];
         compare(expected, tokens);
     }
 }