blob: 5fe2280988aa923826fc6b487e57c89403af7365 [file] [log] [blame]
/** @file
A brief file description
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "tscore/ink_platform.h"
#include "tscore/HashFNV.h"
#include "tscore/Diags.h"
#include "tscore/ink_memory.h"
#include <cstdio>
#include "tscore/Allocator.h"
#include "HTTP.h"
#include "HdrToken.h"
#include "MIME.h"
#include "tscore/Regex.h"
#include "URL.h"
/*
You SHOULD add to _hdrtoken_commonly_tokenized_strs, with the same ordering
** important, ordering matters **
You want a regexp like 'Accept' after "greedier" choices so it doesn't match 'Accept-Ranges' earlier than
it should. The regexp are anchored (^Accept), but I dont see a way with the current system to
match the word ONLY without making _hdrtoken_strs a real PCRE, but then that breaks the hashing
hdrtoken_hash("^Accept$") != hdrtoken_hash("Accept")
So, the current hack is to have "Accept" follow "Accept-.*", lame, I know
/ericb
*/
static const char *_hdrtoken_strs[] = {
// MIME Field names
"Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
"Approved", // NNTP
"Authorization",
"Bytes", // NNTP
"Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
"Content-Location", "Content-MD5", "Content-Range", "Content-Type",
"Control", // NNTP
"Cookie", "Date",
"Distribution", // NNTP
"Etag", "Expect", "Expires",
"Followup-To", // NNTP
"From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
"Keywords", // NNTP
"Last-Modified",
"Lines", // NNTP
"Location", "Max-Forwards",
"Message-ID", // NNTP
"MIME-Version",
"Newsgroups", // NNTP
"Organization", // NNTP
"Path", // NNTP
"Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
"References", // NNTP
"Referer",
"Reply-To", // NNTP
"Retry-After",
"Sender", // NNTP
"Server", "Set-Cookie",
"Subject", // NNTP
"Summary", // NNTP
"Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
"Xref", // NNTP
"@Ats-Internal", // Internal Hack
// Accept-Encoding
"compress", "deflate", "gzip", "identity",
// Cache-Control flags
"max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
"proxy-revalidate", "s-maxage", "need-revalidate-once",
// HTTP miscellaneous
"none", "chunked", "close",
// WS
"websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
// HTTP/2 cleartext
MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
// URL schemes
"file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
"rtsp", "mmsu", "mmst", "mms", "wss", "ws",
// HTTP methods
"CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
// Header extensions
"X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
// RFC-2739
"Forwarded",
// RFC-8470
"Early-Data"};
static HdrTokenTypeBinding _hdrtoken_strs_type_initializers[] = {
{"file", HDRTOKEN_TYPE_SCHEME},
{"ftp", HDRTOKEN_TYPE_SCHEME},
{"gopher", HDRTOKEN_TYPE_SCHEME},
{"http", HDRTOKEN_TYPE_SCHEME},
{"https", HDRTOKEN_TYPE_SCHEME},
{"mailto", HDRTOKEN_TYPE_SCHEME},
{"news", HDRTOKEN_TYPE_SCHEME},
{"nntp", HDRTOKEN_TYPE_SCHEME},
{"prospero", HDRTOKEN_TYPE_SCHEME},
{"telnet", HDRTOKEN_TYPE_SCHEME},
{"tunnel", HDRTOKEN_TYPE_SCHEME},
{"wais", HDRTOKEN_TYPE_SCHEME},
{"pnm", HDRTOKEN_TYPE_SCHEME},
{"rtsp", HDRTOKEN_TYPE_SCHEME},
{"rtspu", HDRTOKEN_TYPE_SCHEME},
{"mms", HDRTOKEN_TYPE_SCHEME},
{"mmsu", HDRTOKEN_TYPE_SCHEME},
{"mmst", HDRTOKEN_TYPE_SCHEME},
{"wss", HDRTOKEN_TYPE_SCHEME},
{"ws", HDRTOKEN_TYPE_SCHEME},
{"CONNECT", HDRTOKEN_TYPE_METHOD},
{"DELETE", HDRTOKEN_TYPE_METHOD},
{"GET", HDRTOKEN_TYPE_METHOD},
{"HEAD", HDRTOKEN_TYPE_METHOD},
{"OPTIONS", HDRTOKEN_TYPE_METHOD},
{"POST", HDRTOKEN_TYPE_METHOD},
{"PURGE", HDRTOKEN_TYPE_METHOD},
{"PUT", HDRTOKEN_TYPE_METHOD},
{"TRACE", HDRTOKEN_TYPE_METHOD},
{"PUSH", HDRTOKEN_TYPE_METHOD},
{"max-age", HDRTOKEN_TYPE_CACHE_CONTROL},
{"max-stale", HDRTOKEN_TYPE_CACHE_CONTROL},
{"min-fresh", HDRTOKEN_TYPE_CACHE_CONTROL},
{"must-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
{"no-cache", HDRTOKEN_TYPE_CACHE_CONTROL},
{"no-store", HDRTOKEN_TYPE_CACHE_CONTROL},
{"no-transform", HDRTOKEN_TYPE_CACHE_CONTROL},
{"only-if-cached", HDRTOKEN_TYPE_CACHE_CONTROL},
{"private", HDRTOKEN_TYPE_CACHE_CONTROL},
{"proxy-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
{"public", HDRTOKEN_TYPE_CACHE_CONTROL},
{"s-maxage", HDRTOKEN_TYPE_CACHE_CONTROL},
{"need-revalidate-once", HDRTOKEN_TYPE_CACHE_CONTROL},
{(char *)nullptr, static_cast<HdrTokenType>(0)},
};
static HdrTokenFieldInfo _hdrtoken_strs_field_initializers[] = {
{"Accept", MIME_SLOTID_ACCEPT, MIME_PRESENCE_ACCEPT, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Accept-Charset", MIME_SLOTID_ACCEPT_CHARSET, MIME_PRESENCE_ACCEPT_CHARSET, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Accept-Encoding", MIME_SLOTID_ACCEPT_ENCODING, MIME_PRESENCE_ACCEPT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Accept-Language", MIME_SLOTID_ACCEPT_LANGUAGE, MIME_PRESENCE_ACCEPT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Accept-Ranges", MIME_SLOTID_NONE, MIME_PRESENCE_ACCEPT_RANGES, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Age", MIME_SLOTID_AGE, MIME_PRESENCE_AGE, HTIF_NONE},
{"Allow", MIME_SLOTID_NONE, MIME_PRESENCE_ALLOW, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Approved", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Authorization", MIME_SLOTID_AUTHORIZATION, MIME_PRESENCE_AUTHORIZATION, HTIF_NONE},
{"Bytes", MIME_SLOTID_NONE, MIME_PRESENCE_BYTES, HTIF_NONE},
{"Cache-Control", MIME_SLOTID_CACHE_CONTROL, MIME_PRESENCE_CACHE_CONTROL, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Client-ip", MIME_SLOTID_CLIENT_IP, MIME_PRESENCE_CLIENT_IP, HTIF_NONE},
{"Connection", MIME_SLOTID_CONNECTION, MIME_PRESENCE_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"Content-Base", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Content-Encoding", MIME_SLOTID_CONTENT_ENCODING, MIME_PRESENCE_CONTENT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Content-Language", MIME_SLOTID_CONTENT_LANGUAGE, MIME_PRESENCE_CONTENT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Content-Length", MIME_SLOTID_CONTENT_LENGTH, MIME_PRESENCE_CONTENT_LENGTH, HTIF_NONE},
{"Content-Location", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_LOCATION, HTIF_NONE},
{"Content-MD5", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_MD5, HTIF_NONE},
{"Content-Range", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_RANGE, HTIF_NONE},
{"Content-Type", MIME_SLOTID_CONTENT_TYPE, MIME_PRESENCE_CONTENT_TYPE, HTIF_NONE},
{"Control", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Cookie", MIME_SLOTID_COOKIE, MIME_PRESENCE_COOKIE, (HTIF_MULTVALS)},
{"Date", MIME_SLOTID_DATE, MIME_PRESENCE_DATE, HTIF_NONE},
{"Distribution", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Etag", MIME_SLOTID_NONE, MIME_PRESENCE_ETAG, HTIF_NONE},
{"Expires", MIME_SLOTID_EXPIRES, MIME_PRESENCE_EXPIRES, HTIF_NONE},
{"Followup-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"From", MIME_SLOTID_NONE, MIME_PRESENCE_FROM, HTIF_NONE},
{"Host", MIME_SLOTID_NONE, MIME_PRESENCE_HOST, HTIF_NONE},
{"If-Match", MIME_SLOTID_IF_MATCH, MIME_PRESENCE_IF_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
{"If-Modified-Since", MIME_SLOTID_IF_MODIFIED_SINCE, MIME_PRESENCE_IF_MODIFIED_SINCE, HTIF_NONE},
{"If-None-Match", MIME_SLOTID_IF_NONE_MATCH, MIME_PRESENCE_IF_NONE_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
{"If-Range", MIME_SLOTID_IF_RANGE, MIME_PRESENCE_IF_RANGE, HTIF_NONE},
{"If-Unmodified-Since", MIME_SLOTID_IF_UNMODIFIED_SINCE, MIME_PRESENCE_IF_UNMODIFIED_SINCE, HTIF_NONE},
{"Keep-Alive", MIME_SLOTID_NONE, MIME_PRESENCE_KEEP_ALIVE, (HTIF_HOPBYHOP)},
{"Keywords", MIME_SLOTID_NONE, MIME_PRESENCE_KEYWORDS, HTIF_NONE},
{"Last-Modified", MIME_SLOTID_LAST_MODIFIED, MIME_PRESENCE_LAST_MODIFIED, HTIF_NONE},
{"Lines", MIME_SLOTID_NONE, MIME_PRESENCE_LINES, HTIF_NONE},
{"Location", MIME_SLOTID_NONE, MIME_PRESENCE_LOCATION, (HTIF_MULTVALS)},
{"Max-Forwards", MIME_SLOTID_NONE, MIME_PRESENCE_MAX_FORWARDS, HTIF_NONE},
{"Message-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Newsgroups", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Organization", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Path", MIME_SLOTID_NONE, MIME_PRESENCE_PATH, HTIF_NONE},
{"Pragma", MIME_SLOTID_PRAGMA, MIME_PRESENCE_PRAGMA, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Proxy-Authenticate", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHENTICATE, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
{"Proxy-Authorization", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHORIZATION, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
{"Proxy-Connection", MIME_SLOTID_PROXY_CONNECTION, MIME_PRESENCE_PROXY_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"Public", MIME_SLOTID_NONE, MIME_PRESENCE_PUBLIC, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Range", MIME_SLOTID_RANGE, MIME_PRESENCE_RANGE, (HTIF_COMMAS | HTIF_MULTVALS)},
{"References", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Referer", MIME_SLOTID_NONE, MIME_PRESENCE_REFERER, HTIF_NONE},
{"Reply-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Retry-After", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Sender", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Server", MIME_SLOTID_NONE, MIME_PRESENCE_SERVER, HTIF_NONE},
{"Set-Cookie", MIME_SLOTID_SET_COOKIE, MIME_PRESENCE_SET_COOKIE, (HTIF_MULTVALS)},
{"Strict-Transport-Security", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_MULTVALS)},
{"Subject", MIME_SLOTID_NONE, MIME_PRESENCE_SUBJECT, HTIF_NONE},
{"Summary", MIME_SLOTID_NONE, MIME_PRESENCE_SUMMARY, HTIF_NONE},
{"TE", MIME_SLOTID_TE, MIME_PRESENCE_TE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"Transfer-Encoding", MIME_SLOTID_TRANSFER_ENCODING, MIME_PRESENCE_TRANSFER_ENCODING,
(HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"Upgrade", MIME_SLOTID_NONE, MIME_PRESENCE_UPGRADE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"User-Agent", MIME_SLOTID_USER_AGENT, MIME_PRESENCE_USER_AGENT, HTIF_NONE},
{"Vary", MIME_SLOTID_VARY, MIME_PRESENCE_VARY, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Via", MIME_SLOTID_VIA, MIME_PRESENCE_VIA, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Warning", MIME_SLOTID_NONE, MIME_PRESENCE_WARNING, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Www-Authenticate", MIME_SLOTID_WWW_AUTHENTICATE, MIME_PRESENCE_WWW_AUTHENTICATE, HTIF_NONE},
{"Xref", MIME_SLOTID_NONE, MIME_PRESENCE_XREF, HTIF_NONE},
{"X-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
{"X-Forwarded-For", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Forwarded", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
{"Sec-WebSocket-Key", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{"Sec-WebSocket-Version", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
{nullptr, 0, 0, 0},
};
const char *_hdrtoken_strs_heap_f = nullptr; // storage first byte
const char *_hdrtoken_strs_heap_l = nullptr; // storage last byte
int hdrtoken_num_wks = SIZEOF(_hdrtoken_strs); // # of well-known strings
const char *hdrtoken_strs[SIZEOF(_hdrtoken_strs)]; // wks_idx -> heap ptr
int hdrtoken_str_lengths[SIZEOF(_hdrtoken_strs)]; // wks_idx -> length
HdrTokenType hdrtoken_str_token_types[SIZEOF(_hdrtoken_strs)]; // wks_idx -> token type
int32_t hdrtoken_str_slotids[SIZEOF(_hdrtoken_strs)]; // wks_idx -> slot id
uint64_t hdrtoken_str_masks[SIZEOF(_hdrtoken_strs)]; // wks_idx -> presence mask
uint32_t hdrtoken_str_flags[SIZEOF(_hdrtoken_strs)]; // wks_idx -> flags
DFA *hdrtoken_strs_dfa = nullptr;
/***********************************************************************
* *
* H A S H T A B L E *
* *
***********************************************************************/
#define HDRTOKEN_HASH_TABLE_SIZE 65536
struct HdrTokenHashBucket {
const char *wks;
uint32_t hash;
};
HdrTokenHashBucket hdrtoken_hash_table[HDRTOKEN_HASH_TABLE_SIZE];
/**
basic FNV hash
**/
#define TINY_MASK(x) (((uint32_t)1 << (x)) - 1)
inline uint32_t
hash_to_slot(uint32_t hash)
{
return ((hash >> 15) ^ hash) & TINY_MASK(15);
}
inline uint32_t
hdrtoken_hash(const unsigned char *string, unsigned int length)
{
ATSHash32FNV1a fnv;
fnv.update(string, length, ATSHash::nocase());
fnv.final();
return fnv.get();
}
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
// WARNING: Indexes into this array are stored on disk for cached objects. New strings must be added at the end of the array to
// avoid changing the indexes of pre-existing entries, unless the cache format version number is increased.
//
static const char *_hdrtoken_commonly_tokenized_strs[] = {
// MIME Field names
"Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
"Approved", // NNTP
"Authorization",
"Bytes", // NNTP
"Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
"Content-Location", "Content-MD5", "Content-Range", "Content-Type",
"Control", // NNTP
"Cookie", "Date",
"Distribution", // NNTP
"Etag", "Expect", "Expires",
"Followup-To", // NNTP
"From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
"Keywords", // NNTP
"Last-Modified",
"Lines", // NNTP
"Location", "Max-Forwards",
"Message-ID", // NNTP
"MIME-Version",
"Newsgroups", // NNTP
"Organization", // NNTP
"Path", // NNTP
"Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
"References", // NNTP
"Referer",
"Reply-To", // NNTP
"Retry-After",
"Sender", // NNTP
"Server", "Set-Cookie",
"Subject", // NNTP
"Summary", // NNTP
"Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
"Xref", // NNTP
"@Ats-Internal", // Internal Hack
// Accept-Encoding
"compress", "deflate", "gzip", "identity",
// Cache-Control flags
"max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
"proxy-revalidate", "s-maxage", "need-revalidate-once",
// HTTP miscellaneous
"none", "chunked", "close",
// WS
"websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
// HTTP/2 cleartext
MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
// URL schemes
"file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
"rtsp", "mmsu", "mmst", "mms", "wss", "ws",
// HTTP methods
"CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
// Header extensions
"X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
// RFC-2739
"Forwarded",
// RFC-8470
"Early-Data"};
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
void
hdrtoken_hash_init()
{
uint32_t i;
int num_collisions;
memset(hdrtoken_hash_table, 0, sizeof(hdrtoken_hash_table));
num_collisions = 0;
for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_commonly_tokenized_strs); i++) {
// convert the common string to the well-known token
unsigned const char *wks;
int wks_idx =
hdrtoken_tokenize_dfa(_hdrtoken_commonly_tokenized_strs[i], static_cast<int>(strlen(_hdrtoken_commonly_tokenized_strs[i])),
reinterpret_cast<const char **>(&wks));
ink_release_assert(wks_idx >= 0);
uint32_t hash = hdrtoken_hash(wks, hdrtoken_str_lengths[wks_idx]);
uint32_t slot = hash_to_slot(hash);
if (hdrtoken_hash_table[slot].wks) {
printf("ERROR: hdrtoken_hash_table[%u] collision: '%s' replacing '%s'\n", slot, reinterpret_cast<const char *>(wks),
hdrtoken_hash_table[slot].wks);
++num_collisions;
}
hdrtoken_hash_table[slot].wks = reinterpret_cast<const char *>(wks);
hdrtoken_hash_table[slot].hash = hash;
}
if (num_collisions > 0) {
abort();
}
}
/***********************************************************************
* *
* M A I N H D R T O K E N C O D E *
* *
***********************************************************************/
/**
@return returns 0 for n=0, unit*n for n <= unit
*/
static inline unsigned int
snap_up_to_multiple(unsigned int n, unsigned int unit)
{
return ((n + (unit - 1)) / unit) * unit;
}
/**
*/
void
hdrtoken_init()
{
static int inited = 0;
int i;
if (!inited) {
inited = 1;
hdrtoken_strs_dfa = new DFA;
hdrtoken_strs_dfa->compile(_hdrtoken_strs, SIZEOF(_hdrtoken_strs), (RE_CASE_INSENSITIVE));
// all the tokenized hdrtoken strings are placed in a special heap,
// and each string is prepended with a HdrTokenHeapPrefix ---
// this makes it easy to tell that a string is a tokenized
// string (because its address is within the heap), and
// makes it easy to find the length, index, flags, mask, and
// other info from the prefix.
int heap_size = 0;
for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
hdrtoken_str_lengths[i] = static_cast<int>(strlen(_hdrtoken_strs[i]));
int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
int packed_prefix_str_len = sizeof(HdrTokenHeapPrefix) + sstr_len;
heap_size += packed_prefix_str_len;
}
_hdrtoken_strs_heap_f = static_cast<const char *>(ats_malloc(heap_size));
_hdrtoken_strs_heap_l = _hdrtoken_strs_heap_f + heap_size - 1;
char *heap_ptr = const_cast<char *>(_hdrtoken_strs_heap_f);
for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
HdrTokenHeapPrefix prefix;
memset(&prefix, 0, sizeof(HdrTokenHeapPrefix));
prefix.wks_idx = i;
prefix.wks_length = hdrtoken_str_lengths[i];
prefix.wks_token_type = HDRTOKEN_TYPE_OTHER; // default, can override later
prefix.wks_info.name = nullptr; // default, can override later
prefix.wks_info.slotid = MIME_SLOTID_NONE; // default, can override later
prefix.wks_info.mask = TOK_64_CONST(0); // default, can override later
prefix.wks_info.flags = HTIF_MULTVALS; // default, can override later
int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
*reinterpret_cast<HdrTokenHeapPrefix *>(heap_ptr) = prefix; // set string prefix
heap_ptr += sizeof(HdrTokenHeapPrefix); // advance heap ptr past index
hdrtoken_strs[i] = heap_ptr; // record string pointer
// coverity[secure_coding]
ink_strlcpy(const_cast<char *>(hdrtoken_strs[i]), _hdrtoken_strs[i],
heap_size - sizeof(HdrTokenHeapPrefix)); // copy string into heap
heap_ptr += sstr_len; // advance heap ptr past string
heap_size -= sstr_len;
}
// Set the token types for certain tokens
for (i = 0; _hdrtoken_strs_type_initializers[i].name != nullptr; i++) {
int wks_idx;
HdrTokenHeapPrefix *prefix;
wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_type_initializers[i].name,
static_cast<int>(strlen(_hdrtoken_strs_type_initializers[i].name)));
ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
// coverity[negative_returns]
prefix = hdrtoken_index_to_prefix(wks_idx);
prefix->wks_token_type = _hdrtoken_strs_type_initializers[i].type;
}
// Set special data for field names
for (i = 0; _hdrtoken_strs_field_initializers[i].name != nullptr; i++) {
int wks_idx;
HdrTokenHeapPrefix *prefix;
wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_field_initializers[i].name,
static_cast<int>(strlen(_hdrtoken_strs_field_initializers[i].name)));
ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
prefix = hdrtoken_index_to_prefix(wks_idx);
prefix->wks_info.slotid = _hdrtoken_strs_field_initializers[i].slotid;
prefix->wks_info.flags = _hdrtoken_strs_field_initializers[i].flags;
prefix->wks_info.mask = _hdrtoken_strs_field_initializers[i].mask;
}
for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
HdrTokenHeapPrefix *prefix = hdrtoken_index_to_prefix(i);
prefix->wks_info.name = hdrtoken_strs[i];
hdrtoken_str_token_types[i] = prefix->wks_token_type; // parallel array for speed
hdrtoken_str_slotids[i] = prefix->wks_info.slotid; // parallel array for speed
hdrtoken_str_masks[i] = prefix->wks_info.mask; // parallel array for speed
hdrtoken_str_flags[i] = prefix->wks_info.flags; // parallel array for speed
}
hdrtoken_hash_init();
}
}
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
int
hdrtoken_tokenize_dfa(const char *string, int string_len, const char **wks_string_out)
{
int wks_idx;
wks_idx = hdrtoken_strs_dfa->match({string, size_t(string_len)});
if (wks_idx < 0) {
wks_idx = -1;
}
if (wks_string_out) {
if (wks_idx >= 0) {
*wks_string_out = hdrtoken_index_to_wks(wks_idx);
} else {
*wks_string_out = nullptr;
}
}
// printf("hdrtoken_tokenize_dfa(%d,*s) - return %d\n",string_len,string,wks_idx);
return wks_idx;
}
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
int
hdrtoken_tokenize(const char *string, int string_len, const char **wks_string_out)
{
int wks_idx;
HdrTokenHashBucket *bucket;
ink_assert(string != nullptr);
if (hdrtoken_is_wks(string)) {
wks_idx = hdrtoken_wks_to_index(string);
if (wks_string_out) {
*wks_string_out = string;
}
return wks_idx;
}
uint32_t hash = hdrtoken_hash(reinterpret_cast<const unsigned char *>(string), static_cast<unsigned int>(string_len));
uint32_t slot = hash_to_slot(hash);
bucket = &(hdrtoken_hash_table[slot]);
if ((bucket->wks != nullptr) && (bucket->hash == hash) && (hdrtoken_wks_to_length(bucket->wks) == string_len)) {
wks_idx = hdrtoken_wks_to_index(bucket->wks);
if (wks_string_out) {
*wks_string_out = bucket->wks;
}
return wks_idx;
}
Debug("hdr_token", "Did not find a WKS for '%.*s'", string_len, string);
return -1;
}
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
const char *
hdrtoken_string_to_wks(const char *string)
{
const char *wks = nullptr;
hdrtoken_tokenize(string, static_cast<int>(strlen(string)), &wks);
return wks;
}
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
const char *
hdrtoken_string_to_wks(const char *string, int length)
{
const char *wks = nullptr;
hdrtoken_tokenize(string, length, &wks);
return wks;
}