| /** @file |
| |
| A brief file description |
| |
| @section license License |
| |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| #include "tscore/ink_platform.h" |
| #include "tscore/HashFNV.h" |
| #include "tscore/Diags.h" |
| #include "tscore/ink_memory.h" |
| #include <cstdio> |
| #include "tscore/Allocator.h" |
| #include "HTTP.h" |
| #include "HdrToken.h" |
| #include "MIME.h" |
| #include "tscore/Regex.h" |
| #include "URL.h" |
| |
| /* |
| You SHOULD add to _hdrtoken_commonly_tokenized_strs, with the same ordering |
| ** important, ordering matters ** |
| |
| You want a regexp like 'Accept' after "greedier" choices so it doesn't match 'Accept-Ranges' earlier than |
| it should. The regexp are anchored (^Accept), but I dont see a way with the current system to |
| match the word ONLY without making _hdrtoken_strs a real PCRE, but then that breaks the hashing |
| hdrtoken_hash("^Accept$") != hdrtoken_hash("Accept") |
| |
| So, the current hack is to have "Accept" follow "Accept-.*", lame, I know |
| |
| /ericb |
| */ |
| |
| static const char *_hdrtoken_strs[] = { |
| // MIME Field names |
| "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow", |
| "Approved", // NNTP |
| "Authorization", |
| "Bytes", // NNTP |
| "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length", |
| "Content-Location", "Content-MD5", "Content-Range", "Content-Type", |
| "Control", // NNTP |
| "Cookie", "Date", |
| "Distribution", // NNTP |
| "Etag", "Expect", "Expires", |
| "Followup-To", // NNTP |
| "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive", |
| "Keywords", // NNTP |
| "Last-Modified", |
| "Lines", // NNTP |
| "Location", "Max-Forwards", |
| "Message-ID", // NNTP |
| "MIME-Version", |
| "Newsgroups", // NNTP |
| "Organization", // NNTP |
| "Path", // NNTP |
| "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range", |
| "References", // NNTP |
| "Referer", |
| "Reply-To", // NNTP |
| "Retry-After", |
| "Sender", // NNTP |
| "Server", "Set-Cookie", |
| "Subject", // NNTP |
| "Summary", // NNTP |
| "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate", |
| "Xref", // NNTP |
| "@Ats-Internal", // Internal Hack |
| |
| // Accept-Encoding |
| "compress", "deflate", "gzip", "identity", |
| |
| // Cache-Control flags |
| "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private", |
| "proxy-revalidate", "s-maxage", "need-revalidate-once", |
| |
| // HTTP miscellaneous |
| "none", "chunked", "close", |
| |
| // WS |
| "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version", |
| |
| // HTTP/2 cleartext |
| MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings", |
| |
| // URL schemes |
| "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu", |
| "rtsp", "mmsu", "mmst", "mms", "wss", "ws", |
| |
| // HTTP methods |
| "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH", |
| |
| // Header extensions |
| "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue", |
| |
| // RFC-2739 |
| "Forwarded", |
| |
| // RFC-8470 |
| "Early-Data"}; |
| |
| static HdrTokenTypeBinding _hdrtoken_strs_type_initializers[] = { |
| {"file", HDRTOKEN_TYPE_SCHEME}, |
| {"ftp", HDRTOKEN_TYPE_SCHEME}, |
| {"gopher", HDRTOKEN_TYPE_SCHEME}, |
| {"http", HDRTOKEN_TYPE_SCHEME}, |
| {"https", HDRTOKEN_TYPE_SCHEME}, |
| {"mailto", HDRTOKEN_TYPE_SCHEME}, |
| {"news", HDRTOKEN_TYPE_SCHEME}, |
| {"nntp", HDRTOKEN_TYPE_SCHEME}, |
| {"prospero", HDRTOKEN_TYPE_SCHEME}, |
| {"telnet", HDRTOKEN_TYPE_SCHEME}, |
| {"tunnel", HDRTOKEN_TYPE_SCHEME}, |
| {"wais", HDRTOKEN_TYPE_SCHEME}, |
| {"pnm", HDRTOKEN_TYPE_SCHEME}, |
| {"rtsp", HDRTOKEN_TYPE_SCHEME}, |
| {"rtspu", HDRTOKEN_TYPE_SCHEME}, |
| {"mms", HDRTOKEN_TYPE_SCHEME}, |
| {"mmsu", HDRTOKEN_TYPE_SCHEME}, |
| {"mmst", HDRTOKEN_TYPE_SCHEME}, |
| {"wss", HDRTOKEN_TYPE_SCHEME}, |
| {"ws", HDRTOKEN_TYPE_SCHEME}, |
| |
| {"CONNECT", HDRTOKEN_TYPE_METHOD}, |
| {"DELETE", HDRTOKEN_TYPE_METHOD}, |
| {"GET", HDRTOKEN_TYPE_METHOD}, |
| {"HEAD", HDRTOKEN_TYPE_METHOD}, |
| {"OPTIONS", HDRTOKEN_TYPE_METHOD}, |
| {"POST", HDRTOKEN_TYPE_METHOD}, |
| {"PURGE", HDRTOKEN_TYPE_METHOD}, |
| {"PUT", HDRTOKEN_TYPE_METHOD}, |
| {"TRACE", HDRTOKEN_TYPE_METHOD}, |
| {"PUSH", HDRTOKEN_TYPE_METHOD}, |
| |
| {"max-age", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"max-stale", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"min-fresh", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"must-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"no-cache", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"no-store", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"no-transform", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"only-if-cached", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"private", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"proxy-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"public", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"s-maxage", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| {"need-revalidate-once", HDRTOKEN_TYPE_CACHE_CONTROL}, |
| |
| {(char *)nullptr, static_cast<HdrTokenType>(0)}, |
| }; |
| |
| static HdrTokenFieldInfo _hdrtoken_strs_field_initializers[] = { |
| {"Accept", MIME_SLOTID_ACCEPT, MIME_PRESENCE_ACCEPT, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Accept-Charset", MIME_SLOTID_ACCEPT_CHARSET, MIME_PRESENCE_ACCEPT_CHARSET, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Accept-Encoding", MIME_SLOTID_ACCEPT_ENCODING, MIME_PRESENCE_ACCEPT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Accept-Language", MIME_SLOTID_ACCEPT_LANGUAGE, MIME_PRESENCE_ACCEPT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Accept-Ranges", MIME_SLOTID_NONE, MIME_PRESENCE_ACCEPT_RANGES, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Age", MIME_SLOTID_AGE, MIME_PRESENCE_AGE, HTIF_NONE}, |
| {"Allow", MIME_SLOTID_NONE, MIME_PRESENCE_ALLOW, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Approved", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Authorization", MIME_SLOTID_AUTHORIZATION, MIME_PRESENCE_AUTHORIZATION, HTIF_NONE}, |
| {"Bytes", MIME_SLOTID_NONE, MIME_PRESENCE_BYTES, HTIF_NONE}, |
| {"Cache-Control", MIME_SLOTID_CACHE_CONTROL, MIME_PRESENCE_CACHE_CONTROL, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Client-ip", MIME_SLOTID_CLIENT_IP, MIME_PRESENCE_CLIENT_IP, HTIF_NONE}, |
| {"Connection", MIME_SLOTID_CONNECTION, MIME_PRESENCE_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"Content-Base", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Content-Encoding", MIME_SLOTID_CONTENT_ENCODING, MIME_PRESENCE_CONTENT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Content-Language", MIME_SLOTID_CONTENT_LANGUAGE, MIME_PRESENCE_CONTENT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Content-Length", MIME_SLOTID_CONTENT_LENGTH, MIME_PRESENCE_CONTENT_LENGTH, HTIF_NONE}, |
| {"Content-Location", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_LOCATION, HTIF_NONE}, |
| {"Content-MD5", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_MD5, HTIF_NONE}, |
| {"Content-Range", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_RANGE, HTIF_NONE}, |
| {"Content-Type", MIME_SLOTID_CONTENT_TYPE, MIME_PRESENCE_CONTENT_TYPE, HTIF_NONE}, |
| {"Control", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Cookie", MIME_SLOTID_COOKIE, MIME_PRESENCE_COOKIE, (HTIF_MULTVALS)}, |
| {"Date", MIME_SLOTID_DATE, MIME_PRESENCE_DATE, HTIF_NONE}, |
| {"Distribution", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Etag", MIME_SLOTID_NONE, MIME_PRESENCE_ETAG, HTIF_NONE}, |
| {"Expires", MIME_SLOTID_EXPIRES, MIME_PRESENCE_EXPIRES, HTIF_NONE}, |
| {"Followup-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"From", MIME_SLOTID_NONE, MIME_PRESENCE_FROM, HTIF_NONE}, |
| {"Host", MIME_SLOTID_NONE, MIME_PRESENCE_HOST, HTIF_NONE}, |
| {"If-Match", MIME_SLOTID_IF_MATCH, MIME_PRESENCE_IF_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"If-Modified-Since", MIME_SLOTID_IF_MODIFIED_SINCE, MIME_PRESENCE_IF_MODIFIED_SINCE, HTIF_NONE}, |
| {"If-None-Match", MIME_SLOTID_IF_NONE_MATCH, MIME_PRESENCE_IF_NONE_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"If-Range", MIME_SLOTID_IF_RANGE, MIME_PRESENCE_IF_RANGE, HTIF_NONE}, |
| {"If-Unmodified-Since", MIME_SLOTID_IF_UNMODIFIED_SINCE, MIME_PRESENCE_IF_UNMODIFIED_SINCE, HTIF_NONE}, |
| {"Keep-Alive", MIME_SLOTID_NONE, MIME_PRESENCE_KEEP_ALIVE, (HTIF_HOPBYHOP)}, |
| {"Keywords", MIME_SLOTID_NONE, MIME_PRESENCE_KEYWORDS, HTIF_NONE}, |
| {"Last-Modified", MIME_SLOTID_LAST_MODIFIED, MIME_PRESENCE_LAST_MODIFIED, HTIF_NONE}, |
| {"Lines", MIME_SLOTID_NONE, MIME_PRESENCE_LINES, HTIF_NONE}, |
| {"Location", MIME_SLOTID_NONE, MIME_PRESENCE_LOCATION, (HTIF_MULTVALS)}, |
| {"Max-Forwards", MIME_SLOTID_NONE, MIME_PRESENCE_MAX_FORWARDS, HTIF_NONE}, |
| {"Message-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Newsgroups", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Organization", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Path", MIME_SLOTID_NONE, MIME_PRESENCE_PATH, HTIF_NONE}, |
| {"Pragma", MIME_SLOTID_PRAGMA, MIME_PRESENCE_PRAGMA, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Proxy-Authenticate", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHENTICATE, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)}, |
| {"Proxy-Authorization", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHORIZATION, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)}, |
| {"Proxy-Connection", MIME_SLOTID_PROXY_CONNECTION, MIME_PRESENCE_PROXY_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"Public", MIME_SLOTID_NONE, MIME_PRESENCE_PUBLIC, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Range", MIME_SLOTID_RANGE, MIME_PRESENCE_RANGE, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"References", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Referer", MIME_SLOTID_NONE, MIME_PRESENCE_REFERER, HTIF_NONE}, |
| {"Reply-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Retry-After", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Sender", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Server", MIME_SLOTID_NONE, MIME_PRESENCE_SERVER, HTIF_NONE}, |
| {"Set-Cookie", MIME_SLOTID_SET_COOKIE, MIME_PRESENCE_SET_COOKIE, (HTIF_MULTVALS)}, |
| {"Strict-Transport-Security", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_MULTVALS)}, |
| {"Subject", MIME_SLOTID_NONE, MIME_PRESENCE_SUBJECT, HTIF_NONE}, |
| {"Summary", MIME_SLOTID_NONE, MIME_PRESENCE_SUMMARY, HTIF_NONE}, |
| {"TE", MIME_SLOTID_TE, MIME_PRESENCE_TE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"Transfer-Encoding", MIME_SLOTID_TRANSFER_ENCODING, MIME_PRESENCE_TRANSFER_ENCODING, |
| (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"Upgrade", MIME_SLOTID_NONE, MIME_PRESENCE_UPGRADE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"User-Agent", MIME_SLOTID_USER_AGENT, MIME_PRESENCE_USER_AGENT, HTIF_NONE}, |
| {"Vary", MIME_SLOTID_VARY, MIME_PRESENCE_VARY, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Via", MIME_SLOTID_VIA, MIME_PRESENCE_VIA, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Warning", MIME_SLOTID_NONE, MIME_PRESENCE_WARNING, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Www-Authenticate", MIME_SLOTID_WWW_AUTHENTICATE, MIME_PRESENCE_WWW_AUTHENTICATE, HTIF_NONE}, |
| {"Xref", MIME_SLOTID_NONE, MIME_PRESENCE_XREF, HTIF_NONE}, |
| {"X-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)}, |
| {"X-Forwarded-For", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Forwarded", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)}, |
| {"Sec-WebSocket-Key", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {"Sec-WebSocket-Version", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE}, |
| {nullptr, 0, 0, 0}, |
| }; |
| |
| const char *_hdrtoken_strs_heap_f = nullptr; // storage first byte |
| const char *_hdrtoken_strs_heap_l = nullptr; // storage last byte |
| |
| int hdrtoken_num_wks = SIZEOF(_hdrtoken_strs); // # of well-known strings |
| |
| const char *hdrtoken_strs[SIZEOF(_hdrtoken_strs)]; // wks_idx -> heap ptr |
| int hdrtoken_str_lengths[SIZEOF(_hdrtoken_strs)]; // wks_idx -> length |
| HdrTokenType hdrtoken_str_token_types[SIZEOF(_hdrtoken_strs)]; // wks_idx -> token type |
| int32_t hdrtoken_str_slotids[SIZEOF(_hdrtoken_strs)]; // wks_idx -> slot id |
| uint64_t hdrtoken_str_masks[SIZEOF(_hdrtoken_strs)]; // wks_idx -> presence mask |
| uint32_t hdrtoken_str_flags[SIZEOF(_hdrtoken_strs)]; // wks_idx -> flags |
| |
| DFA *hdrtoken_strs_dfa = nullptr; |
| |
| /*********************************************************************** |
| * * |
| * H A S H T A B L E * |
| * * |
| ***********************************************************************/ |
| |
| #define HDRTOKEN_HASH_TABLE_SIZE 65536 |
| |
| struct HdrTokenHashBucket { |
| const char *wks; |
| uint32_t hash; |
| }; |
| |
| HdrTokenHashBucket hdrtoken_hash_table[HDRTOKEN_HASH_TABLE_SIZE]; |
| |
| /** |
| basic FNV hash |
| **/ |
| #define TINY_MASK(x) (((uint32_t)1 << (x)) - 1) |
| |
| inline uint32_t |
| hash_to_slot(uint32_t hash) |
| { |
| return ((hash >> 15) ^ hash) & TINY_MASK(15); |
| } |
| |
| inline uint32_t |
| hdrtoken_hash(const unsigned char *string, unsigned int length) |
| { |
| ATSHash32FNV1a fnv; |
| fnv.update(string, length, ATSHash::nocase()); |
| fnv.final(); |
| return fnv.get(); |
| } |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| // WARNING: Indexes into this array are stored on disk for cached objects. New strings must be added at the end of the array to |
| // avoid changing the indexes of pre-existing entries, unless the cache format version number is increased. |
| // |
| static const char *_hdrtoken_commonly_tokenized_strs[] = { |
| // MIME Field names |
| "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow", |
| "Approved", // NNTP |
| "Authorization", |
| "Bytes", // NNTP |
| "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length", |
| "Content-Location", "Content-MD5", "Content-Range", "Content-Type", |
| "Control", // NNTP |
| "Cookie", "Date", |
| "Distribution", // NNTP |
| "Etag", "Expect", "Expires", |
| "Followup-To", // NNTP |
| "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive", |
| "Keywords", // NNTP |
| "Last-Modified", |
| "Lines", // NNTP |
| "Location", "Max-Forwards", |
| "Message-ID", // NNTP |
| "MIME-Version", |
| "Newsgroups", // NNTP |
| "Organization", // NNTP |
| "Path", // NNTP |
| "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range", |
| "References", // NNTP |
| "Referer", |
| "Reply-To", // NNTP |
| "Retry-After", |
| "Sender", // NNTP |
| "Server", "Set-Cookie", |
| "Subject", // NNTP |
| "Summary", // NNTP |
| "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate", |
| "Xref", // NNTP |
| "@Ats-Internal", // Internal Hack |
| |
| // Accept-Encoding |
| "compress", "deflate", "gzip", "identity", |
| |
| // Cache-Control flags |
| "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private", |
| "proxy-revalidate", "s-maxage", "need-revalidate-once", |
| |
| // HTTP miscellaneous |
| "none", "chunked", "close", |
| |
| // WS |
| "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version", |
| |
| // HTTP/2 cleartext |
| MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings", |
| |
| // URL schemes |
| "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu", |
| "rtsp", "mmsu", "mmst", "mms", "wss", "ws", |
| |
| // HTTP methods |
| "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH", |
| |
| // Header extensions |
| "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue", |
| |
| // RFC-2739 |
| "Forwarded", |
| |
| // RFC-8470 |
| "Early-Data"}; |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| void |
| hdrtoken_hash_init() |
| { |
| uint32_t i; |
| int num_collisions; |
| |
| memset(hdrtoken_hash_table, 0, sizeof(hdrtoken_hash_table)); |
| num_collisions = 0; |
| |
| for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_commonly_tokenized_strs); i++) { |
| // convert the common string to the well-known token |
| unsigned const char *wks; |
| int wks_idx = |
| hdrtoken_tokenize_dfa(_hdrtoken_commonly_tokenized_strs[i], static_cast<int>(strlen(_hdrtoken_commonly_tokenized_strs[i])), |
| reinterpret_cast<const char **>(&wks)); |
| ink_release_assert(wks_idx >= 0); |
| |
| uint32_t hash = hdrtoken_hash(wks, hdrtoken_str_lengths[wks_idx]); |
| uint32_t slot = hash_to_slot(hash); |
| |
| if (hdrtoken_hash_table[slot].wks) { |
| printf("ERROR: hdrtoken_hash_table[%u] collision: '%s' replacing '%s'\n", slot, reinterpret_cast<const char *>(wks), |
| hdrtoken_hash_table[slot].wks); |
| ++num_collisions; |
| } |
| hdrtoken_hash_table[slot].wks = reinterpret_cast<const char *>(wks); |
| hdrtoken_hash_table[slot].hash = hash; |
| } |
| |
| if (num_collisions > 0) { |
| abort(); |
| } |
| } |
| |
| /*********************************************************************** |
| * * |
| * M A I N H D R T O K E N C O D E * |
| * * |
| ***********************************************************************/ |
| |
| /** |
| @return returns 0 for n=0, unit*n for n <= unit |
| */ |
| |
| static inline unsigned int |
| snap_up_to_multiple(unsigned int n, unsigned int unit) |
| { |
| return ((n + (unit - 1)) / unit) * unit; |
| } |
| |
| /** |
| */ |
| void |
| hdrtoken_init() |
| { |
| static int inited = 0; |
| |
| int i; |
| |
| if (!inited) { |
| inited = 1; |
| |
| hdrtoken_strs_dfa = new DFA; |
| hdrtoken_strs_dfa->compile(_hdrtoken_strs, SIZEOF(_hdrtoken_strs), (RE_CASE_INSENSITIVE)); |
| |
| // all the tokenized hdrtoken strings are placed in a special heap, |
| // and each string is prepended with a HdrTokenHeapPrefix --- |
| // this makes it easy to tell that a string is a tokenized |
| // string (because its address is within the heap), and |
| // makes it easy to find the length, index, flags, mask, and |
| // other info from the prefix. |
| |
| int heap_size = 0; |
| for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) { |
| hdrtoken_str_lengths[i] = static_cast<int>(strlen(_hdrtoken_strs[i])); |
| int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix)); |
| int packed_prefix_str_len = sizeof(HdrTokenHeapPrefix) + sstr_len; |
| heap_size += packed_prefix_str_len; |
| } |
| |
| _hdrtoken_strs_heap_f = static_cast<const char *>(ats_malloc(heap_size)); |
| _hdrtoken_strs_heap_l = _hdrtoken_strs_heap_f + heap_size - 1; |
| |
| char *heap_ptr = const_cast<char *>(_hdrtoken_strs_heap_f); |
| |
| for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) { |
| HdrTokenHeapPrefix prefix; |
| |
| memset(&prefix, 0, sizeof(HdrTokenHeapPrefix)); |
| |
| prefix.wks_idx = i; |
| prefix.wks_length = hdrtoken_str_lengths[i]; |
| prefix.wks_token_type = HDRTOKEN_TYPE_OTHER; // default, can override later |
| prefix.wks_info.name = nullptr; // default, can override later |
| prefix.wks_info.slotid = MIME_SLOTID_NONE; // default, can override later |
| prefix.wks_info.mask = TOK_64_CONST(0); // default, can override later |
| prefix.wks_info.flags = HTIF_MULTVALS; // default, can override later |
| |
| int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix)); |
| |
| *reinterpret_cast<HdrTokenHeapPrefix *>(heap_ptr) = prefix; // set string prefix |
| heap_ptr += sizeof(HdrTokenHeapPrefix); // advance heap ptr past index |
| hdrtoken_strs[i] = heap_ptr; // record string pointer |
| // coverity[secure_coding] |
| ink_strlcpy(const_cast<char *>(hdrtoken_strs[i]), _hdrtoken_strs[i], |
| heap_size - sizeof(HdrTokenHeapPrefix)); // copy string into heap |
| heap_ptr += sstr_len; // advance heap ptr past string |
| heap_size -= sstr_len; |
| } |
| |
| // Set the token types for certain tokens |
| for (i = 0; _hdrtoken_strs_type_initializers[i].name != nullptr; i++) { |
| int wks_idx; |
| HdrTokenHeapPrefix *prefix; |
| |
| wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_type_initializers[i].name, |
| static_cast<int>(strlen(_hdrtoken_strs_type_initializers[i].name))); |
| |
| ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs))); |
| // coverity[negative_returns] |
| prefix = hdrtoken_index_to_prefix(wks_idx); |
| prefix->wks_token_type = _hdrtoken_strs_type_initializers[i].type; |
| } |
| |
| // Set special data for field names |
| for (i = 0; _hdrtoken_strs_field_initializers[i].name != nullptr; i++) { |
| int wks_idx; |
| HdrTokenHeapPrefix *prefix; |
| |
| wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_field_initializers[i].name, |
| static_cast<int>(strlen(_hdrtoken_strs_field_initializers[i].name))); |
| |
| ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs))); |
| prefix = hdrtoken_index_to_prefix(wks_idx); |
| prefix->wks_info.slotid = _hdrtoken_strs_field_initializers[i].slotid; |
| prefix->wks_info.flags = _hdrtoken_strs_field_initializers[i].flags; |
| prefix->wks_info.mask = _hdrtoken_strs_field_initializers[i].mask; |
| } |
| |
| for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) { |
| HdrTokenHeapPrefix *prefix = hdrtoken_index_to_prefix(i); |
| prefix->wks_info.name = hdrtoken_strs[i]; |
| hdrtoken_str_token_types[i] = prefix->wks_token_type; // parallel array for speed |
| hdrtoken_str_slotids[i] = prefix->wks_info.slotid; // parallel array for speed |
| hdrtoken_str_masks[i] = prefix->wks_info.mask; // parallel array for speed |
| hdrtoken_str_flags[i] = prefix->wks_info.flags; // parallel array for speed |
| } |
| |
| hdrtoken_hash_init(); |
| } |
| } |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| int |
| hdrtoken_tokenize_dfa(const char *string, int string_len, const char **wks_string_out) |
| { |
| int wks_idx; |
| |
| wks_idx = hdrtoken_strs_dfa->match({string, size_t(string_len)}); |
| |
| if (wks_idx < 0) { |
| wks_idx = -1; |
| } |
| if (wks_string_out) { |
| if (wks_idx >= 0) { |
| *wks_string_out = hdrtoken_index_to_wks(wks_idx); |
| } else { |
| *wks_string_out = nullptr; |
| } |
| } |
| // printf("hdrtoken_tokenize_dfa(%d,*s) - return %d\n",string_len,string,wks_idx); |
| |
| return wks_idx; |
| } |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| int |
| hdrtoken_tokenize(const char *string, int string_len, const char **wks_string_out) |
| { |
| int wks_idx; |
| HdrTokenHashBucket *bucket; |
| |
| ink_assert(string != nullptr); |
| |
| if (hdrtoken_is_wks(string)) { |
| wks_idx = hdrtoken_wks_to_index(string); |
| if (wks_string_out) { |
| *wks_string_out = string; |
| } |
| return wks_idx; |
| } |
| |
| uint32_t hash = hdrtoken_hash(reinterpret_cast<const unsigned char *>(string), static_cast<unsigned int>(string_len)); |
| uint32_t slot = hash_to_slot(hash); |
| |
| bucket = &(hdrtoken_hash_table[slot]); |
| if ((bucket->wks != nullptr) && (bucket->hash == hash) && (hdrtoken_wks_to_length(bucket->wks) == string_len)) { |
| wks_idx = hdrtoken_wks_to_index(bucket->wks); |
| if (wks_string_out) { |
| *wks_string_out = bucket->wks; |
| } |
| return wks_idx; |
| } |
| |
| Debug("hdr_token", "Did not find a WKS for '%.*s'", string_len, string); |
| return -1; |
| } |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| const char * |
| hdrtoken_string_to_wks(const char *string) |
| { |
| const char *wks = nullptr; |
| hdrtoken_tokenize(string, static_cast<int>(strlen(string)), &wks); |
| return wks; |
| } |
| |
| /*------------------------------------------------------------------------- |
| -------------------------------------------------------------------------*/ |
| |
| const char * |
| hdrtoken_string_to_wks(const char *string, int length) |
| { |
| const char *wks = nullptr; |
| hdrtoken_tokenize(string, length, &wks); |
| return wks; |
| } |