include/tscore/ParseRules.h - trafficserver - Git at Google

 /** @file

   A brief file description

   @section license License

   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
  */

 #pragma once

 #include <cstring>
 // I had to move this here, because I really needed to avoid including ink_platform.h in here,
 // because of a conflict on linux/tcp.h vs netinet/tcp.h.
 #include <limits.h> // NOLINT(modernize-deprecated-headers)

 #include "tscore/ink_defs.h"
 #include "tscore/ink_apidefs.h"

 typedef unsigned int CTypeResult;

 // Set this to 0 to disable SI
 // decimal multipliers
 #define USE_SI_MULTIPLIERS 1

 #define is_char_BIT (1 << 0)
 #define is_upalpha_BIT (1 << 1)
 #define is_loalpha_BIT (1 << 2)
 #define is_alpha_BIT (1 << 3)
 #define is_digit_BIT (1 << 4)
 #define is_ctl_BIT (1 << 5)
 #define is_ws_BIT (1 << 6)
 #define is_hex_BIT (1 << 7)
 #define is_pchar_BIT (1 << 8)
 #define is_extra_BIT (1 << 9)
 #define is_safe_BIT (1 << 10)
 #define is_unsafe_BIT (1 << 11)
 #define is_national_BIT (1 << 12)
 #define is_reserved_BIT (1 << 13)
 #define is_unreserved_BIT (1 << 14)
 #define is_punct_BIT (1 << 15)
 #define is_end_of_url_BIT (1 << 16)
 #define is_tspecials_BIT (1 << 17)
 #define is_spcr_BIT (1 << 18)
 #define is_splf_BIT (1 << 19)
 #define is_wslfcr_BIT (1 << 20)
 #define is_eow_BIT (1 << 21)
 #define is_token_BIT (1 << 22)
 #define is_uri_BIT (1 << 23)
 #define is_sep_BIT (1 << 24)
 #define is_empty_BIT (1 << 25)
 #define is_alnum_BIT (1 << 26)
 #define is_space_BIT (1 << 27)
 #define is_control_BIT (1 << 28)
 #define is_mime_sep_BIT (1 << 29)
 #define is_http_field_name_BIT (1 << 30)
 /* shut up the DEC compiler */
 #define is_http_field_value_BIT (((CTypeResult)1) << 31)

 extern ink_undoc_liapi const CTypeResult parseRulesCType[];
 inkcoreapi extern const char parseRulesCTypeToUpper[];
 inkcoreapi extern const char parseRulesCTypeToLower[];

 class ParseRules
 {
 public:
   ParseRules();

   ////////////////////////////
   // whitespace definitions //
   ////////////////////////////

   enum {
     CHAR_SP = 32, /* space           */
     CHAR_HT = 9,  /* horizontal tab  */
     CHAR_LF = 10, /* line feed       */
     CHAR_VT = 11, /* vertical tab    */
     CHAR_NP = 12, /* new page        */
     CHAR_CR = 13  /* carriage return */
   };

   /////////////////////
   // character tests //
   /////////////////////

   static CTypeResult is_type(char c, uint32_t bit);

   static CTypeResult is_char(char c);             // ASCII 0-127
   static CTypeResult is_upalpha(char c);          // A-Z
   static CTypeResult is_loalpha(char c);          // a-z
   static CTypeResult is_alpha(char c);            // A-Z,a-z
   static CTypeResult is_digit(char c);            // 0-9
   static CTypeResult is_ctl(char c);              // ASCII 0-31,127 (includes ws)
   static CTypeResult is_hex(char c);              // 0-9,A-F,a-f
   static CTypeResult is_ws(char c);               // SP,HT
   static CTypeResult is_cr(char c);               // CR
   static CTypeResult is_lf(char c);               // LF
   static CTypeResult is_spcr(char c);             // SP,CR
   static CTypeResult is_splf(char c);             // SP,LF
   static CTypeResult is_wslfcr(char c);           // SP,HT,LF,CR
   static CTypeResult is_tspecials(char c);        // HTTP chars that need quoting
   static CTypeResult is_token(char c);            // token (not CTL or specials)
   static CTypeResult is_extra(char c);            // !,*,QUOT,(,),COMMA
   static CTypeResult is_safe(char c);             // [$-_.+]
   static CTypeResult is_unsafe(char c);           // SP,DBLQUOT,#,%,<,>
   static CTypeResult is_national(char c);         // {,},|,BACKSLASH,^,~,[,],`
   static CTypeResult is_reserved(char c);         // :,/,?,:,@,&,=
   static CTypeResult is_unreserved(char c);       // alpha,digit,safe,extra,nat.
   static CTypeResult is_punct(char c);            // !"#$%&'()*+,-./:;<>=?@_{}|~
   static CTypeResult is_end_of_url(char c);       // NUL,CR,SP
   static CTypeResult is_eow(char c);              // NUL,CR,LF
   static CTypeResult is_uri(char c);              // A-Z,a-z,0-9 :/?#[]@!$&'()*+,;=-._~%
   static CTypeResult is_sep(char c);              // nullptr,COMMA,':','!',wslfcr
   static CTypeResult is_empty(char c);            // wslfcr,#
   static CTypeResult is_alnum(char c);            // 0-9,A-Z,a-z
   static CTypeResult is_space(char c);            // ' ' HT,VT,NP,CR,LF
   static CTypeResult is_control(char c);          // 0-31 127
   static CTypeResult is_mime_sep(char c);         // ()<>,;\"/[]?{} \t
   static CTypeResult is_http_field_name(char c);  // not : or mime_sep except for @
   static CTypeResult is_http_field_value(char c); // not CR, LF, comma, or "

   //////////////////
   // string tests //
   //////////////////

   static CTypeResult is_escape(const char *seq); // %<hex><hex>
   static CTypeResult is_uchar(const char *seq);  // starts unreserved or is escape
   static CTypeResult is_pchar(const char *seq);  // uchar,:,@,&,=,+ (see code)

   ///////////////////
   // unimplemented //
   ///////////////////

   // static CTypeResult   is_comment(const char * str);
   // static CTypeResult   is_ctext(const char * str);

   ////////////////
   // operations //
   ////////////////

   static CTypeResult strncasecmp_eow(const char *s1, const char *s2, int n);
   static const char *strcasestr(const char *s1, const char *s2);
   static int strlen_eow(const char *s);
   static const char *strstr_eow(const char *s1, const char *s2);

   static char ink_toupper(char c);
   static char ink_tolower(char c);
   static const char *memchr(const char *s, char c, int max_length);
   static const char *strchr(const char *s, char c);

   // noncopyable
   ParseRules(const ParseRules &) = delete;
   ParseRules &operator=(const ParseRules &) = delete;
 };

 /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
  * inline functions definitions
  * * * * * * * * * * * * * * * * * * * * * * * * * * * */

 inline CTypeResult
 ParseRules::is_type(char c, uint32_t bitmask)
 {
   return (parseRulesCType[(unsigned char)c] & bitmask);
 }

 inline CTypeResult
 ParseRules::is_char(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_char_BIT);
 #else
   return ((c & 0x80) == 0);
 #endif
 }

 inline CTypeResult
 ParseRules::is_upalpha(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_upalpha_BIT);
 #else
   return (c >= 'A' && c <= 'Z');
 #endif
 }

 inline CTypeResult
 ParseRules::is_loalpha(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_loalpha_BIT);
 #else
   return (c >= 'a' && c <= 'z');
 #endif
 }

 inline CTypeResult
 ParseRules::is_alpha(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_alpha_BIT);
 #else
   return (is_upalpha(c) || is_loalpha(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_digit(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_digit_BIT);
 #else
   return (c >= '0' && c <= '9');
 #endif
 }

 inline CTypeResult
 ParseRules::is_alnum(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_alnum_BIT);
 #else
   return (is_alpha(c) || is_digit(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_ctl(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_ctl_BIT);
 #else
   return ((!(c & 0x80) && c <= 31) || c == 127);
 #endif
 }

 inline CTypeResult
 ParseRules::is_ws(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_ws_BIT);
 #else
   return (c == CHAR_SP || c == CHAR_HT);
 #endif
 }

 inline CTypeResult
 ParseRules::is_hex(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_hex_BIT);
 #else
   return ((c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9'));
 #endif
 }

 inline CTypeResult
 ParseRules::is_cr(char c)
 {
   return (c == CHAR_CR);
 }

 inline CTypeResult
 ParseRules::is_lf(char c)
 {
   return (c == CHAR_LF);
 }

 inline CTypeResult
 ParseRules::is_splf(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_splf_BIT);
 #else
   return (c == CHAR_SP || c == CHAR_LF);
 #endif
 }

 inline CTypeResult
 ParseRules::is_spcr(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_spcr_BIT);
 #else
   return (c == CHAR_SP || c == CHAR_CR);
 #endif
 }

 inline CTypeResult
 ParseRules::is_wslfcr(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_wslfcr_BIT);
 #else
   return ParseRules::is_ws(c) || ParseRules::is_splf(c) || ParseRules::is_spcr(c);
 #endif
 }

 inline CTypeResult
 ParseRules::is_extra(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_extra_BIT);
 #else
   switch (c) {
   case '!':
   case '*':
   case '\'':
   case '(':
   case ')':
   case ',':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_safe(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_safe_BIT);
 #else
   return (c == '$' || c == '-' || c == '_' || c == '.' || c == '+');
 #endif
 }

 inline CTypeResult
 ParseRules::is_unsafe(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_unsafe_BIT);
 #else
   if (is_ctl(c))
     return (true);

   switch (c) {
   case ' ':
   case '\"':
   case '#':
   case '%':
   case '<':
   case '>':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_reserved(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_reserved_BIT);
 #else
   switch (c) {
   case ';':
   case '/':
   case '?':
   case ':':
   case '@':
   case '&':
   case '=':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_national(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_national_BIT);
 #else
   switch (c) {
   case '{':
   case '}':
   case '|':
   case '\\':
   case '^':
   case '~':
   case '[':
   case ']':
   case '`':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_unreserved(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_unreserved_BIT);
 #else
   return (is_alpha(c) || is_digit(c) || is_safe(c) || is_extra(c) || is_national(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_punct(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_punct_BIT);
 #else
   switch (c) {
   case '!':
   case '"':
   case '#':
   case '%':
   case '&':
   case '\'':
   case '(':
   case ')':
   case '*':
   case '+':
   case ',':
   case '-':
   case '.':
   case '/':
   case ':':
   case ';':
   case '<':
   case '=':
   case '>':
   case '?':
   case '@':
   case '[':
   case '\\':
   case ']':
   case '^':
   case '_':
   case '`':
   case '{':
   case '|':
   case '}':
   case '~':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_end_of_url(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_end_of_url_BIT);
 #else
   return (c == '\0' || c == '\n' || c == ' ' || ParseRules::is_ctl(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_escape(const char *seq)
 {
   return (seq[0] == '%' && is_hex(seq[1]) && is_hex(seq[2]));
 }

 inline CTypeResult
 ParseRules::is_uchar(const char *seq)
 {
   return (is_unreserved(seq[0]) || is_escape(seq));
 }

 //
 // have to cheat on this one
 //
 inline CTypeResult
 ParseRules::is_pchar(const char *seq)
 {
 #ifndef COMPILE_PARSE_RULES
   if (*seq != '%')
     return (parseRulesCType[(uint8_t)*seq] & is_pchar_BIT);
   else
     return is_hex(seq[1]) && is_hex(seq[2]);
 #else
   if (is_unreserved(*seq))
     return (true);

   switch (seq[0]) {
   case ':':
   case '@':
   case '&':
   case '=':
   case '+':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_tspecials(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_tspecials_BIT);
 #else
   switch (c) {
   case '(':
   case ')':
   case '<':
   case '>':
   case '@':
   case ',':
   case ';':
   case ':':
   case '\\':
   case '"':
   case '/':
   case '[':
   case ']':
   case '?':
   case '=':
   case '{':
   case '}':
   case CHAR_SP:
   case CHAR_HT:
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_token(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_token_BIT);
 #else
   return (is_char(c) && !(is_ctl(c) || is_tspecials(c)));
 #endif
 }

 inline char
 ParseRules::ink_toupper(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return parseRulesCTypeToUpper[(unsigned char)c];
 #else
   int up_case            = c;
   const int up_case_diff = 'a' - 'A';

   if (c >= 'a' && c <= 'z') {
     up_case = c - up_case_diff;
   }
   return (up_case);
 #endif
 }

 inline char
 ParseRules::ink_tolower(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return parseRulesCTypeToLower[(unsigned char)c];
 #else
   int lo_case            = c;
   const int lo_case_diff = 'a' - 'A';

   if (c >= 'A' && c <= 'Z') {
     lo_case = c + lo_case_diff;
   }
   return (lo_case);
 #endif
 }

 inline CTypeResult
 ParseRules::is_eow(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_eow_BIT);
 #else
   return (c == '\0' || c == '\r' || c == '\n');
 #endif
 }

 inline CTypeResult
 ParseRules::is_uri(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_uri_BIT);
 #else
   if (is_alnum(c))
     return (true);

   switch (c) {
   case ':':
   case '/':
   case '?':
   case '#':
   case '[':
   case ']':
   case '@':
   case '!':
   case '$':
   case '&':
   case '\'':
   case '(':
   case ')':
   case '*':
   case '+':
   case ',':
   case ';':
   case '=':
   case '-':
   case '.':
   case '_':
   case '~':
   case '%':
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_sep(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_sep_BIT);
 #else
   return (!c || c == ',' || c == ':' || c == '!' || is_wslfcr(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_empty(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_empty_BIT);
 #else
   return (c == '#' || is_wslfcr(c));
 #endif
 }

 inline CTypeResult
 ParseRules::is_space(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_space_BIT);
 #else
   switch (c) {
   case CHAR_SP:
   case CHAR_HT:
   case CHAR_LF:
   case CHAR_VT:
   case CHAR_NP:
   case CHAR_CR:
     return (true);
   }
   return (false);
 #endif
 }

 inline CTypeResult
 ParseRules::is_control(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_control_BIT);
 #else
   if (((unsigned char)c) < 32 || ((unsigned char)c) == 127)
     return true;
   return false;
 #endif
 }

 inline CTypeResult
 ParseRules::is_mime_sep(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_mime_sep_BIT);
 #else
   if ((c == '(') || (c == ')') || (c == '<') || (c == '>') || (c == '@') || (c == ',') || (c == ';') || (c == '\\') ||
       (c == '\"') || (c == '/') || (c == '[') || (c == ']') || (c == '?') || (c == '{') || (c == '}') || (c == ' ') || (c == '\t'))
     return true;
   return false;
 #endif
 }

 inline CTypeResult
 ParseRules::is_http_field_name(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_http_field_name_BIT);
 #else
   if ((c == ':') || (is_mime_sep(c) && (c != '@')))
     return false;
   return true;
 #endif
 }

 inline CTypeResult
 ParseRules::is_http_field_value(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (CTypeResult)(parseRulesCType[(unsigned char)c] & is_http_field_value_BIT);
 #else
   switch (c) {
   case CHAR_CR:
   case CHAR_LF:
   case '\"':
   case ',':
     return false;
   }
   return true;
 #endif
 }

 //////////////////////////////////////////////////////////////////////////////
 //
 //      inline CTypeResult ParseRules::strncasecmp_eol(s1, s2, count)
 //
 //      This wacky little function compares if two strings <s1> and <s2> match
 //      (case-insensitively) up to <count> characters long, stopping not only
 //      at the end of string ('\0'), but also at end of line (CR or LF).
 //
 //////////////////////////////////////////////////////////////////////////////

 inline CTypeResult
 ParseRules::strncasecmp_eow(const char *s1, const char *s2, int count)
 {
   for (int i = 0; i < count; i++) {
     const char &a = s1[i];
     const char &b = s2[i];

     ///////////////////////////////////////////////////////////////
     // if they are different; only match if both are terminators //
     ///////////////////////////////////////////////////////////////
     if (ink_tolower(a) != ink_tolower(b))
       return (is_eow(a) && is_eow(b));
   }
   return (true);
 }

 //////////////////////////////////////////////////////////////////////////////
 //
 //  strlen_eow()
 //
 //  return the length of a string
 //////////////////////////////////////////////////////////////////////////////
 inline int
 ParseRules::strlen_eow(const char *s)
 {
   for (int i = 0; true; i++) {
     if (is_eow(s[i]))
       return (i);
   }
 }

 //////////////////////////////////////////////////////////////////////////////
 //
 //  strstr_eow()
 //
 //  This function is the same as strstr(), except that it accepts strings
 //  that are terminated with '\r', '\n' or null.
 //  It returns a pointer to the first occurrence of s2 within s1 (or null).
 //////////////////////////////////////////////////////////////////////////////
 inline const char *
 ParseRules::strstr_eow(const char *s1, const char *s2)
 {
   int i1;

   int s2_len = strlen_eow(s2);

   for (i1 = 0; !is_eow(s1[i1]); i1++)
     if (ink_tolower(s1[i1]) == ink_tolower(s2[0]))
       if (strncasecmp_eow(&s1[i1], &s2[0], s2_len))
         return (&s1[i1]);

   return (nullptr);
 }

 inline const char *
 ParseRules::strcasestr(const char *s1, const char *s2)
 {
   int i1;

   size_t s2_len = strlen(s2);

   for (i1 = 0; s1[i1] != '\0'; i1++)
     if (ink_tolower(s1[i1]) == ink_tolower(s2[0]))
       if (strncasecmp_eow(&s1[i1], &s2[0], (int)s2_len))
         return (&s1[i1]);

   return (nullptr);
 }

 inline const char *
 ParseRules::memchr(const char *s, char c, int max_length)
 {
   for (int i = 0; i < max_length; i++)
     if (s[i] == c)
       return (&s[i]);
   return (nullptr);
 }

 inline const char *
 ParseRules::strchr(const char *s, char c)
 {
   for (int i = 0; s[i] != '\0'; i++)
     if (s[i] == c)
       return (&s[i]);
   return (nullptr);
 }

 static inline int
 ink_get_hex(char c)
 {
   if (ParseRules::is_digit(c))
     return (int)(c - '0');
   c = ParseRules::ink_tolower(c);
   return (int)((c - 'a') + 10);
 }

 int64_t ink_atoi64(const char *, const char **end = nullptr);
 uint64_t ink_atoui64(const char *);
 int64_t ink_atoi64(const char *, int);

 static inline int
 ink_atoi(const char *str)
 {
   int64_t val = ink_atoi64(str);

   if (val > INT_MAX)
     return INT_MAX;
   else if (val < INT_MIN)
     return INT_MIN;
   else
     return static_cast<int>(val);
 }

 static inline int
 ink_atoi(const char *str, int len)
 {
   int64_t val = ink_atoi64(str, len);

   if (val > INT_MAX)
     return INT_MAX;
   else if (val < INT_MIN)
     return INT_MIN;
   else
     return static_cast<int>(val);
 }

 static inline unsigned int
 ink_atoui(const char *str)
 {
   uint64_t val = ink_atoui64(str);

   if (val > INT_MAX)
     return INT_MAX;
   else
     return static_cast<int>(val);
 }
	/** @file

	A brief file description

	@section license License

	Licensed to the Apache Software Foundation (ASF) under one
	or more contributor license agreements. See the NOTICE file
	distributed with this work for additional information
	regarding copyright ownership. The ASF licenses this file
	to you under the Apache License, Version 2.0 (the
	"License"); you may not use this file except in compliance
	with the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	*/

	#pragma once

	#include <cstring>
	// I had to move this here, because I really needed to avoid including ink_platform.h in here,
	// because of a conflict on linux/tcp.h vs netinet/tcp.h.
	#include <limits.h> // NOLINT(modernize-deprecated-headers)

	#include "tscore/ink_defs.h"
	#include "tscore/ink_apidefs.h"

	typedef unsigned int CTypeResult;

	// Set this to 0 to disable SI
	// decimal multipliers
	#define USE_SI_MULTIPLIERS 1

	#define is_char_BIT (1 << 0)
	#define is_upalpha_BIT (1 << 1)
	#define is_loalpha_BIT (1 << 2)
	#define is_alpha_BIT (1 << 3)
	#define is_digit_BIT (1 << 4)
	#define is_ctl_BIT (1 << 5)
	#define is_ws_BIT (1 << 6)
	#define is_hex_BIT (1 << 7)
	#define is_pchar_BIT (1 << 8)
	#define is_extra_BIT (1 << 9)
	#define is_safe_BIT (1 << 10)
	#define is_unsafe_BIT (1 << 11)
	#define is_national_BIT (1 << 12)
	#define is_reserved_BIT (1 << 13)
	#define is_unreserved_BIT (1 << 14)
	#define is_punct_BIT (1 << 15)
	#define is_end_of_url_BIT (1 << 16)
	#define is_tspecials_BIT (1 << 17)
	#define is_spcr_BIT (1 << 18)
	#define is_splf_BIT (1 << 19)
	#define is_wslfcr_BIT (1 << 20)
	#define is_eow_BIT (1 << 21)
	#define is_token_BIT (1 << 22)
	#define is_uri_BIT (1 << 23)
	#define is_sep_BIT (1 << 24)
	#define is_empty_BIT (1 << 25)
	#define is_alnum_BIT (1 << 26)
	#define is_space_BIT (1 << 27)
	#define is_control_BIT (1 << 28)
	#define is_mime_sep_BIT (1 << 29)
	#define is_http_field_name_BIT (1 << 30)
	/* shut up the DEC compiler */
	#define is_http_field_value_BIT (((CTypeResult)1) << 31)

	extern ink_undoc_liapi const CTypeResult parseRulesCType[];
	inkcoreapi extern const char parseRulesCTypeToUpper[];
	inkcoreapi extern const char parseRulesCTypeToLower[];

	class ParseRules
	{
	public:
	ParseRules();

	////////////////////////////
	// whitespace definitions //
	////////////////////////////

	enum {
	CHAR_SP = 32, /* space */
	CHAR_HT = 9, /* horizontal tab */
	CHAR_LF = 10, /* line feed */
	CHAR_VT = 11, /* vertical tab */
	CHAR_NP = 12, /* new page */
	CHAR_CR = 13 /* carriage return */
	};

	/////////////////////
	// character tests //
	/////////////////////

	static CTypeResult is_type(char c, uint32_t bit);

	static CTypeResult is_char(char c); // ASCII 0-127
	static CTypeResult is_upalpha(char c); // A-Z
	static CTypeResult is_loalpha(char c); // a-z
	static CTypeResult is_alpha(char c); // A-Z,a-z
	static CTypeResult is_digit(char c); // 0-9
	static CTypeResult is_ctl(char c); // ASCII 0-31,127 (includes ws)
	static CTypeResult is_hex(char c); // 0-9,A-F,a-f
	static CTypeResult is_ws(char c); // SP,HT
	static CTypeResult is_cr(char c); // CR
	static CTypeResult is_lf(char c); // LF
	static CTypeResult is_spcr(char c); // SP,CR
	static CTypeResult is_splf(char c); // SP,LF
	static CTypeResult is_wslfcr(char c); // SP,HT,LF,CR
	static CTypeResult is_tspecials(char c); // HTTP chars that need quoting
	static CTypeResult is_token(char c); // token (not CTL or specials)
	static CTypeResult is_extra(char c); // !,*,QUOT,(,),COMMA
	static CTypeResult is_safe(char c); // [$-_.+]
	static CTypeResult is_unsafe(char c); // SP,DBLQUOT,#,%,<,>
	static CTypeResult is_national(char c); // {,},\|,BACKSLASH,^,~,[,],`
	static CTypeResult is_reserved(char c); // :,/,?,:,@,&,=
	static CTypeResult is_unreserved(char c); // alpha,digit,safe,extra,nat.
	static CTypeResult is_punct(char c); // !"#$%&'()*+,-./:;<>=?@_{}\|~
	static CTypeResult is_end_of_url(char c); // NUL,CR,SP
	static CTypeResult is_eow(char c); // NUL,CR,LF
	static CTypeResult is_uri(char c); // A-Z,a-z,0-9 :/?#[]@!$&'()*+,;=-._~%
	static CTypeResult is_sep(char c); // nullptr,COMMA,':','!',wslfcr
	static CTypeResult is_empty(char c); // wslfcr,#
	static CTypeResult is_alnum(char c); // 0-9,A-Z,a-z
	static CTypeResult is_space(char c); // ' ' HT,VT,NP,CR,LF
	static CTypeResult is_control(char c); // 0-31 127
	static CTypeResult is_mime_sep(char c); // ()<>,;\"/[]?{} \t
	static CTypeResult is_http_field_name(char c); // not : or mime_sep except for @
	static CTypeResult is_http_field_value(char c); // not CR, LF, comma, or "

	//////////////////
	// string tests //
	//////////////////

	static CTypeResult is_escape(const char *seq); // %<hex><hex>
	static CTypeResult is_uchar(const char *seq); // starts unreserved or is escape
	static CTypeResult is_pchar(const char *seq); // uchar,:,@,&,=,+ (see code)

	///////////////////
	// unimplemented //
	///////////////////

	// static CTypeResult is_comment(const char * str);
	// static CTypeResult is_ctext(const char * str);

	////////////////
	// operations //
	////////////////

	static CTypeResult strncasecmp_eow(const char s1, const char s2, int n);
	static const char strcasestr(const char s1, const char *s2);
	static int strlen_eow(const char *s);
	static const char strstr_eow(const char s1, const char *s2);

	static char ink_toupper(char c);
	static char ink_tolower(char c);
	static const char memchr(const char s, char c, int max_length);
	static const char strchr(const char s, char c);

	// noncopyable
	ParseRules(const ParseRules &) = delete;
	ParseRules &operator=(const ParseRules &) = delete;
	};

	/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
	* inline functions definitions
	* * * * * * * * * * * * * * * * * * * * * * * * * * * */

	inline CTypeResult
	ParseRules::is_type(char c, uint32_t bitmask)
	{
	return (parseRulesCType[(unsigned char)c] & bitmask);
	}

	inline CTypeResult
	ParseRules::is_char(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_char_BIT);
	#else
	return ((c & 0x80) == 0);
	#endif
	}

	inline CTypeResult
	ParseRules::is_upalpha(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_upalpha_BIT);
	#else
	return (c >= 'A' && c <= 'Z');
	#endif
	}

	inline CTypeResult
	ParseRules::is_loalpha(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_loalpha_BIT);
	#else
	return (c >= 'a' && c <= 'z');
	#endif
	}

	inline CTypeResult
	ParseRules::is_alpha(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_alpha_BIT);
	#else
	return (is_upalpha(c) \|\| is_loalpha(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_digit(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_digit_BIT);
	#else
	return (c >= '0' && c <= '9');
	#endif
	}

	inline CTypeResult
	ParseRules::is_alnum(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_alnum_BIT);
	#else
	return (is_alpha(c) \|\| is_digit(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_ctl(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_ctl_BIT);
	#else
	return ((!(c & 0x80) && c <= 31) \|\| c == 127);
	#endif
	}

	inline CTypeResult
	ParseRules::is_ws(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_ws_BIT);
	#else
	return (c == CHAR_SP \|\| c == CHAR_HT);
	#endif
	}

	inline CTypeResult
	ParseRules::is_hex(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_hex_BIT);
	#else
	return ((c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f') \|\| (c >= '0' && c <= '9'));
	#endif
	}

	inline CTypeResult
	ParseRules::is_cr(char c)
	{
	return (c == CHAR_CR);
	}

	inline CTypeResult
	ParseRules::is_lf(char c)
	{
	return (c == CHAR_LF);
	}

	inline CTypeResult
	ParseRules::is_splf(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_splf_BIT);
	#else
	return (c == CHAR_SP \|\| c == CHAR_LF);
	#endif
	}

	inline CTypeResult
	ParseRules::is_spcr(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_spcr_BIT);
	#else
	return (c == CHAR_SP \|\| c == CHAR_CR);
	#endif
	}

	inline CTypeResult
	ParseRules::is_wslfcr(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_wslfcr_BIT);
	#else
	return ParseRules::is_ws(c) \|\| ParseRules::is_splf(c) \|\| ParseRules::is_spcr(c);
	#endif
	}

	inline CTypeResult
	ParseRules::is_extra(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_extra_BIT);
	#else
	switch (c) {
	case '!':
	case '*':
	case '\'':
	case '(':
	case ')':
	case ',':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_safe(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_safe_BIT);
	#else
	return (c == '$' \|\| c == '-' \|\| c == '_' \|\| c == '.' \|\| c == '+');
	#endif
	}

	inline CTypeResult
	ParseRules::is_unsafe(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_unsafe_BIT);
	#else
	if (is_ctl(c))
	return (true);

	switch (c) {
	case ' ':
	case '\"':
	case '#':
	case '%':
	case '<':
	case '>':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_reserved(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_reserved_BIT);
	#else
	switch (c) {
	case ';':
	case '/':
	case '?':
	case ':':
	case '@':
	case '&':
	case '=':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_national(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_national_BIT);
	#else
	switch (c) {
	case '{':
	case '}':
	case '\|':
	case '\\':
	case '^':
	case '~':
	case '[':
	case ']':
	case '`':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_unreserved(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_unreserved_BIT);
	#else
	return (is_alpha(c) \|\| is_digit(c) \|\| is_safe(c) \|\| is_extra(c) \|\| is_national(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_punct(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_punct_BIT);
	#else
	switch (c) {
	case '!':
	case '"':
	case '#':
	case '%':
	case '&':
	case '\'':
	case '(':
	case ')':
	case '*':
	case '+':
	case ',':
	case '-':
	case '.':
	case '/':
	case ':':
	case ';':
	case '<':
	case '=':
	case '>':
	case '?':
	case '@':
	case '[':
	case '\\':
	case ']':
	case '^':
	case '_':
	case '`':
	case '{':
	case '\|':
	case '}':
	case '~':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_end_of_url(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_end_of_url_BIT);
	#else
	return (c == '\0' \|\| c == '\n' \|\| c == ' ' \|\| ParseRules::is_ctl(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_escape(const char *seq)
	{
	return (seq[0] == '%' && is_hex(seq[1]) && is_hex(seq[2]));
	}

	inline CTypeResult
	ParseRules::is_uchar(const char *seq)
	{
	return (is_unreserved(seq[0]) \|\| is_escape(seq));
	}

	//
	// have to cheat on this one
	//
	inline CTypeResult
	ParseRules::is_pchar(const char *seq)
	{
	#ifndef COMPILE_PARSE_RULES
	if (*seq != '%')
	return (parseRulesCType[(uint8_t)*seq] & is_pchar_BIT);
	else
	return is_hex(seq[1]) && is_hex(seq[2]);
	#else
	if (is_unreserved(*seq))
	return (true);

	switch (seq[0]) {
	case ':':
	case '@':
	case '&':
	case '=':
	case '+':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_tspecials(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_tspecials_BIT);
	#else
	switch (c) {
	case '(':
	case ')':
	case '<':
	case '>':
	case '@':
	case ',':
	case ';':
	case ':':
	case '\\':
	case '"':
	case '/':
	case '[':
	case ']':
	case '?':
	case '=':
	case '{':
	case '}':
	case CHAR_SP:
	case CHAR_HT:
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_token(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_token_BIT);
	#else
	return (is_char(c) && !(is_ctl(c) \|\| is_tspecials(c)));
	#endif
	}

	inline char
	ParseRules::ink_toupper(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return parseRulesCTypeToUpper[(unsigned char)c];
	#else
	int up_case = c;
	const int up_case_diff = 'a' - 'A';

	if (c >= 'a' && c <= 'z') {
	up_case = c - up_case_diff;
	}
	return (up_case);
	#endif
	}

	inline char
	ParseRules::ink_tolower(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return parseRulesCTypeToLower[(unsigned char)c];
	#else
	int lo_case = c;
	const int lo_case_diff = 'a' - 'A';

	if (c >= 'A' && c <= 'Z') {
	lo_case = c + lo_case_diff;
	}
	return (lo_case);
	#endif
	}

	inline CTypeResult
	ParseRules::is_eow(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_eow_BIT);
	#else
	return (c == '\0' \|\| c == '\r' \|\| c == '\n');
	#endif
	}

	inline CTypeResult
	ParseRules::is_uri(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_uri_BIT);
	#else
	if (is_alnum(c))
	return (true);

	switch (c) {
	case ':':
	case '/':
	case '?':
	case '#':
	case '[':
	case ']':
	case '@':
	case '!':
	case '$':
	case '&':
	case '\'':
	case '(':
	case ')':
	case '*':
	case '+':
	case ',':
	case ';':
	case '=':
	case '-':
	case '.':
	case '_':
	case '~':
	case '%':
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_sep(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_sep_BIT);
	#else
	return (!c \|\| c == ',' \|\| c == ':' \|\| c == '!' \|\| is_wslfcr(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_empty(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_empty_BIT);
	#else
	return (c == '#' \|\| is_wslfcr(c));
	#endif
	}

	inline CTypeResult
	ParseRules::is_space(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_space_BIT);
	#else
	switch (c) {
	case CHAR_SP:
	case CHAR_HT:
	case CHAR_LF:
	case CHAR_VT:
	case CHAR_NP:
	case CHAR_CR:
	return (true);
	}
	return (false);
	#endif
	}

	inline CTypeResult
	ParseRules::is_control(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_control_BIT);
	#else
	if (((unsigned char)c) < 32 \|\| ((unsigned char)c) == 127)
	return true;
	return false;
	#endif
	}

	inline CTypeResult
	ParseRules::is_mime_sep(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_mime_sep_BIT);
	#else
	if ((c == '(') \|\| (c == ')') \|\| (c == '<') \|\| (c == '>') \|\| (c == '@') \|\| (c == ',') \|\| (c == ';') \|\| (c == '\\') \|\|
	(c == '\"') \|\| (c == '/') \|\| (c == '[') \|\| (c == ']') \|\| (c == '?') \|\| (c == '{') \|\| (c == '}') \|\| (c == ' ') \|\| (c == '\t'))
	return true;
	return false;
	#endif
	}

	inline CTypeResult
	ParseRules::is_http_field_name(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (parseRulesCType[(unsigned char)c] & is_http_field_name_BIT);
	#else
	if ((c == ':') \|\| (is_mime_sep(c) && (c != '@')))
	return false;
	return true;
	#endif
	}

	inline CTypeResult
	ParseRules::is_http_field_value(char c)
	{
	#ifndef COMPILE_PARSE_RULES
	return (CTypeResult)(parseRulesCType[(unsigned char)c] & is_http_field_value_BIT);
	#else
	switch (c) {
	case CHAR_CR:
	case CHAR_LF:
	case '\"':
	case ',':
	return false;
	}
	return true;
	#endif
	}

	//////////////////////////////////////////////////////////////////////////////
	//
	// inline CTypeResult ParseRules::strncasecmp_eol(s1, s2, count)
	//
	// This wacky little function compares if two strings <s1> and <s2> match
	// (case-insensitively) up to <count> characters long, stopping not only
	// at the end of string ('\0'), but also at end of line (CR or LF).
	//
	//////////////////////////////////////////////////////////////////////////////

	inline CTypeResult
	ParseRules::strncasecmp_eow(const char s1, const char s2, int count)
	{
	for (int i = 0; i < count; i++) {
	const char &a = s1[i];
	const char &b = s2[i];

	///////////////////////////////////////////////////////////////
	// if they are different; only match if both are terminators //
	///////////////////////////////////////////////////////////////
	if (ink_tolower(a) != ink_tolower(b))
	return (is_eow(a) && is_eow(b));
	}
	return (true);
	}

	//////////////////////////////////////////////////////////////////////////////
	//
	// strlen_eow()
	//
	// return the length of a string
	//////////////////////////////////////////////////////////////////////////////
	inline int
	ParseRules::strlen_eow(const char *s)
	{
	for (int i = 0; true; i++) {
	if (is_eow(s[i]))
	return (i);
	}
	}

	//////////////////////////////////////////////////////////////////////////////
	//
	// strstr_eow()
	//
	// This function is the same as strstr(), except that it accepts strings
	// that are terminated with '\r', '\n' or null.
	// It returns a pointer to the first occurrence of s2 within s1 (or null).
	//////////////////////////////////////////////////////////////////////////////
	inline const char *
	ParseRules::strstr_eow(const char s1, const char s2)
	{
	int i1;

	int s2_len = strlen_eow(s2);

	for (i1 = 0; !is_eow(s1[i1]); i1++)
	if (ink_tolower(s1[i1]) == ink_tolower(s2[0]))
	if (strncasecmp_eow(&s1[i1], &s2[0], s2_len))
	return (&s1[i1]);

	return (nullptr);
	}

	inline const char *
	ParseRules::strcasestr(const char s1, const char s2)
	{
	int i1;

	size_t s2_len = strlen(s2);

	for (i1 = 0; s1[i1] != '\0'; i1++)
	if (ink_tolower(s1[i1]) == ink_tolower(s2[0]))
	if (strncasecmp_eow(&s1[i1], &s2[0], (int)s2_len))
	return (&s1[i1]);

	return (nullptr);
	}

	inline const char *
	ParseRules::memchr(const char *s, char c, int max_length)
	{
	for (int i = 0; i < max_length; i++)
	if (s[i] == c)
	return (&s[i]);
	return (nullptr);
	}

	inline const char *
	ParseRules::strchr(const char *s, char c)
	{
	for (int i = 0; s[i] != '\0'; i++)
	if (s[i] == c)
	return (&s[i]);
	return (nullptr);
	}

	static inline int
	ink_get_hex(char c)
	{
	if (ParseRules::is_digit(c))
	return (int)(c - '0');
	c = ParseRules::ink_tolower(c);
	return (int)((c - 'a') + 10);
	}

	int64_t ink_atoi64(const char , const char *end = nullptr);
	uint64_t ink_atoui64(const char *);
	int64_t ink_atoi64(const char *, int);

	static inline int
	ink_atoi(const char *str)
	{
	int64_t val = ink_atoi64(str);

	if (val > INT_MAX)
	return INT_MAX;
	else if (val < INT_MIN)
	return INT_MIN;
	else
	return static_cast<int>(val);
	}

	static inline int
	ink_atoi(const char *str, int len)
	{
	int64_t val = ink_atoi64(str, len);

	if (val > INT_MAX)
	return INT_MAX;
	else if (val < INT_MIN)
	return INT_MIN;
	else
	return static_cast<int>(val);
	}

	static inline unsigned int
	ink_atoui(const char *str)
	{
	uint64_t val = ink_atoui64(str);

	if (val > INT_MAX)
	return INT_MAX;
	else
	return static_cast<int>(val);
	}