blob: 34cb50c9f36445a5c009a92e486e568f0bfbfd37 [file] [log] [blame]
/** @file
A brief file description
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "CacheScan.h"
#include "../../proxy/hdrs/HTTP.h"
#include "../../proxy/hdrs/HdrHeap.h"
#include "../../proxy/hdrs/MIME.h"
#include "../../proxy/hdrs/URL.h"
// using namespace ct;
constexpr HdrHeapMarshalBlocks HTTP_ALT_MARSHAL_SIZE = ts::round_up(sizeof(HTTPCacheAlt));
namespace ct
{
Errata
CacheScan::Scan(bool search)
{
int64_t guessed_size = 1048576; // 1M
Errata zret;
std::bitset<65536> dir_bitset;
char *stripe_buff2 = static_cast<char *>(ats_memalign(ats_pagesize(), guessed_size));
for (int s = 0; s < this->stripe->_segments; s++) {
dir_bitset.reset();
for (int b = 0; b < this->stripe->_buckets; b++) {
CacheDirEntry *seg = this->stripe->dir_segment(s);
CacheDirEntry *e = dir_bucket(b, seg);
if (dir_offset(e)) {
do {
// loop detected
if (dir_bitset[dir_to_offset(e, seg)]) {
break;
}
int64_t size = dir_approx_size(e);
if (size > guessed_size) {
ats_free(stripe_buff2);
stripe_buff2 = static_cast<char *>(ats_memalign(ats_pagesize(), dir_approx_size(e)));
}
int fd = this->stripe->_span->_fd;
int64_t offset = this->stripe->stripe_offset(e);
ssize_t n = pread(fd, stripe_buff2, size, offset);
if (n < 0) {
std::cout << "Failed to read content from the Stripe. " << strerror(errno) << std::endl;
} else {
Doc *doc = reinterpret_cast<Doc *>(stripe_buff2);
get_alternates(doc->hdr(), doc->hlen, search);
}
dir_bitset[dir_to_offset(e, seg)] = true;
e = next_dir(e, seg);
} while (e);
}
}
}
ats_free(stripe_buff2);
return zret;
}
Errata
CacheScan::unmarshal(HTTPHdrImpl *obj, intptr_t offset)
{
Errata zret;
if (obj->m_polarity == HTTP_TYPE_REQUEST) {
HDR_UNMARSHAL_STR(obj->u.req.m_ptr_method, offset);
HDR_UNMARSHAL_PTR(obj->u.req.m_url_impl, URLImpl, offset);
} else if (obj->m_polarity == HTTP_TYPE_RESPONSE) {
HDR_UNMARSHAL_STR(obj->u.resp.m_ptr_reason, offset);
} else {
zret.push(0, 0, "Unknown Polarity of HTTPHdrImpl* obj");
return zret;
}
HDR_UNMARSHAL_PTR(obj->m_fields_impl, MIMEHdrImpl, offset);
return zret;
}
Errata
CacheScan::unmarshal(MIMEHdrImpl *obj, intptr_t offset)
{
Errata zret;
HDR_UNMARSHAL_PTR(obj->m_fblock_list_tail, MIMEFieldBlockImpl, offset);
this->unmarshal(&obj->m_first_fblock, offset);
return zret;
}
Errata
CacheScan::unmarshal(URLImpl *obj, intptr_t offset)
{
Errata zret;
HDR_UNMARSHAL_STR(obj->m_ptr_scheme, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_user, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_password, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_host, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_port, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_path, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_params, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_query, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_fragment, offset);
HDR_UNMARSHAL_STR(obj->m_ptr_printed_string, offset);
return zret;
}
Errata
CacheScan::unmarshal(MIMEFieldBlockImpl *mf, intptr_t offset)
{
Errata zret;
HDR_UNMARSHAL_PTR(mf->m_next, MIMEFieldBlockImpl, offset);
ts::MemSpan mf_mem(reinterpret_cast<char *>(mf), mf->m_length);
for (uint32_t index = 0; index < mf->m_freetop; index++) {
MIMEField *field = &(mf->m_field_slots[index]);
// check if out of bounds
if (!mf_mem.contains(reinterpret_cast<char *>(field))) {
zret.push(0, 0, "Out of bounds memory in the deserialized MIMEFieldBlockImpl");
return zret;
}
if (field && field->m_readiness == MIME_FIELD_SLOT_READINESS_LIVE) {
HDR_UNMARSHAL_STR(field->m_ptr_name, offset);
HDR_UNMARSHAL_STR(field->m_ptr_value, offset);
if (field->m_next_dup) {
HDR_UNMARSHAL_PTR(field->m_next_dup, MIMEField, offset);
}
} else {
// Clear out other types of slots
field->m_readiness = MIME_FIELD_SLOT_READINESS_EMPTY;
}
}
return zret;
}
int
CacheScan::unmarshal(HdrHeap *hh, int buf_length, int obj_type, HdrHeapObjImpl **found_obj, RefCountObj *block_ref)
{
int zret = -1;
*found_obj = nullptr;
// Check out this heap and make sure it is OK
if (hh->m_magic != HDR_BUF_MAGIC_MARSHALED) {
ink_assert(!"HdrHeap::unmarshal bad magic");
return zret;
}
int unmarshal_size = hh->unmarshal_size();
if (unmarshal_size > buf_length) {
ink_assert(!"HdrHeap::unmarshal truncated header");
return zret;
}
hh->m_free_start = nullptr;
if (hh->m_writeable != false) {
std::cerr << "m_writable has to be true" << std::endl;
return 0;
} else if (hh->m_free_size != 0) {
std::cerr << "m_free_size is not 0" << std::endl;
return 0;
} else if (hh->m_ronly_heap[0].m_heap_start == nullptr) {
std::cerr << "m_ronly_heap is nullptr" << std::endl;
return 0;
}
ink_assert(hh->m_free_start == nullptr);
// Convert Heap offsets to pointers
hh->m_data_start = (reinterpret_cast<char *>(hh)) + (intptr_t)hh->m_data_start;
hh->m_free_start = (reinterpret_cast<char *>(hh)) + hh->m_size;
hh->m_ronly_heap[0].m_heap_start = (reinterpret_cast<char *>(hh)) + (intptr_t)hh->m_ronly_heap[0].m_heap_start;
// Crazy Invariant - If we are sitting in a ref counted block,
// the HdrHeap lifetime is externally determined. Whoever
// unmarshalls us should keep the block around as long as
// they want to use the header. However, the strings can
// live beyond the heap life time because they are copied
// by reference into other header heap therefore we need
// to the set the refcount ptr for the strings. We don't
// actually increase the refcount here since for the header
// the lifetime is explicit but copies will increase
// the refcount
if (block_ref) {
hh->m_ronly_heap[0].m_ref_count_ptr.swizzle(block_ref);
}
// Loop over objects and swizzle there pointer to
// live offsets
char *obj_data = hh->m_data_start;
intptr_t offset = (intptr_t)hh;
while (obj_data < hh->m_free_start) {
HdrHeapObjImpl *obj = reinterpret_cast<HdrHeapObjImpl *>(obj_data);
if (!obj_is_aligned(obj)) {
std::cout << "Invalid alignment of object of type HdrHeapObjImpl" << std::endl;
return zret;
}
if (obj->m_type == static_cast<unsigned>(obj_type) && *found_obj == nullptr) {
*found_obj = obj;
}
// TODO : fix this switch
switch (obj->m_type) {
case HDR_HEAP_OBJ_HTTP_HEADER:
this->unmarshal((HTTPHdrImpl *)obj, offset);
break;
case HDR_HEAP_OBJ_URL:
this->unmarshal((URLImpl *)obj, offset);
break;
case HDR_HEAP_OBJ_FIELD_BLOCK:
this->unmarshal((MIMEFieldBlockImpl *)obj, offset);
break;
case HDR_HEAP_OBJ_MIME_HEADER:
this->unmarshal((MIMEHdrImpl *)obj, offset);
break;
case HDR_HEAP_OBJ_EMPTY:
// Nothing to do
break;
default:
std::cout << "WARNING: Unmarshal failed due to unknown obj type " << static_cast<int>(obj->m_type) << " after "
<< static_cast<int>(obj_data - reinterpret_cast<char *>(hh)) << " bytes" << std::endl;
// dump_heap(unmarshal_size);
return zret;
}
if (obj->m_length <= 0) {
std::cerr << "Invalid object length for deserialization" << obj->m_length << std::endl;
break;
}
obj_data = obj_data + obj->m_length;
}
hh->m_magic = HDR_BUF_MAGIC_ALIVE;
return HdrHeapMarshalBlocks(ts::round_up(hh->unmarshal_size()));
}
Errata
CacheScan::unmarshal(char *buf, int len, RefCountObj *block_ref)
{
Errata zret;
HTTPCacheAlt *alt = reinterpret_cast<HTTPCacheAlt *>(buf);
int orig_len = len;
if (alt->m_magic == CACHE_ALT_MAGIC_ALIVE) {
// Already unmarshalled, must be a ram cache
// it
ink_assert(alt->m_unmarshal_len > 0);
ink_assert(alt->m_unmarshal_len <= len);
return zret;
} else if (alt->m_magic != CACHE_ALT_MAGIC_MARSHALED) {
ink_assert(!"HTTPInfo::unmarshal bad magic");
return zret;
}
ink_assert(alt->m_unmarshal_len < 0);
alt->m_magic = CACHE_ALT_MAGIC_ALIVE;
ink_assert(alt->m_writeable == 0);
len -= HTTP_ALT_MARSHAL_SIZE;
// usually the fragment count is less or equal to 4
if (alt->m_frag_offset_count > HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS) {
// stuff that didn't fit in the integral slots.
int extra = sizeof(uint64_t) * alt->m_frag_offset_count - sizeof(alt->m_integral_frag_offsets);
if (extra >= len || extra < 0) {
zret.push(0, 0, "Invalid Fragment Count ", extra);
return zret;
}
char *extra_src = buf + reinterpret_cast<intptr_t>(alt->m_frag_offsets);
// Actual buffer size, which must be a power of two.
// Well, technically not, because we never modify an unmarshalled fragment
// offset table, but it would be a nasty bug should that be done in the
// future.
int bcount = HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS * 2;
while (bcount < alt->m_frag_offset_count) {
bcount *= 2;
}
alt->m_frag_offsets =
static_cast<uint64_t *>(ats_malloc(bcount * sizeof(uint64_t))); // WRONG - must round up to next power of 2.
memcpy(alt->m_frag_offsets, alt->m_integral_frag_offsets, sizeof(alt->m_integral_frag_offsets));
memcpy(alt->m_frag_offsets + HTTPCacheAlt::N_INTEGRAL_FRAG_OFFSETS, extra_src, extra);
len -= extra;
} else if (alt->m_frag_offset_count > 0) {
alt->m_frag_offsets = alt->m_integral_frag_offsets;
} else {
alt->m_frag_offsets = nullptr; // should really already be zero.
}
// request hdrs
HdrHeap *heap = reinterpret_cast<HdrHeap *>(alt->m_request_hdr.m_heap ? (buf + (intptr_t)alt->m_request_hdr.m_heap) : nullptr);
HTTPHdrImpl *hh = nullptr;
int tmp = 0;
if (heap != nullptr && (reinterpret_cast<char *>(heap) - buf) < len) {
tmp = this->unmarshal(heap, len, HDR_HEAP_OBJ_HTTP_HEADER, reinterpret_cast<HdrHeapObjImpl **>(&hh), block_ref);
if (hh == nullptr || tmp < 0) {
zret.push(0, 0, "HTTPInfo::request unmarshal failed");
return zret;
}
len -= tmp;
alt->m_request_hdr.m_heap = heap;
alt->m_request_hdr.m_http = hh;
alt->m_request_hdr.m_mime = hh->m_fields_impl;
alt->m_request_hdr.m_url_cached.m_heap = heap;
}
// response hdrs
heap = reinterpret_cast<HdrHeap *>(alt->m_response_hdr.m_heap ? (buf + (intptr_t)alt->m_response_hdr.m_heap) : nullptr);
if (heap != nullptr && (reinterpret_cast<char *>(heap) - buf) < len) {
tmp = this->unmarshal(heap, len, HDR_HEAP_OBJ_HTTP_HEADER, reinterpret_cast<HdrHeapObjImpl **>(&hh), block_ref);
if (hh == nullptr || tmp < 0) {
zret.push(0, 0, "HTTPInfo::response unmarshal failed");
return zret;
}
len -= tmp;
alt->m_response_hdr.m_heap = heap;
alt->m_response_hdr.m_http = hh;
alt->m_response_hdr.m_mime = hh->m_fields_impl;
}
alt->m_unmarshal_len = orig_len - len;
return zret;
}
// check if the url looks valid
bool
CacheScan::check_url(ts::MemSpan<char> &mem, URLImpl *url)
{
bool in_bound = false; // boolean to check if address in bound
if (!url->m_ptr_scheme) {
in_bound = true; // nullptr is valid
} else if (mem.contains(const_cast<char *>(url->m_ptr_scheme))) {
in_bound = true;
}
return in_bound && mem.contains(reinterpret_cast<char *>(url)) &&
!(url == nullptr || url->m_length <= 0 || url->m_type != HDR_HEAP_OBJ_URL);
}
Errata
CacheScan::get_alternates(const char *buf, int length, bool search)
{
Errata zret;
ink_assert(!(((intptr_t)buf) & 3)); // buf must be aligned
char *start = const_cast<char *>(buf);
RefCountObj *block_ref = nullptr;
ts::MemSpan<char> doc_mem(const_cast<char *>(buf), length);
while (length - (buf - start) > static_cast<int>(sizeof(HTTPCacheAlt))) {
HTTPCacheAlt *a = (HTTPCacheAlt *)buf;
if (a->m_magic == CACHE_ALT_MAGIC_MARSHALED) {
zret = this->unmarshal(const_cast<char *>(buf), length, block_ref);
if (zret.size()) {
std::cerr << zret << std::endl;
return zret;
} else if (!a->m_request_hdr.m_http) {
std::cerr << "no http object found in the request header object" << std::endl;
return zret;
} else if (!doc_mem.contains(reinterpret_cast<char *>(a->m_request_hdr.m_http))) {
std::cerr << "out of bounds request header in the alternate" << std::endl;
return zret;
}
auto *url = a->m_request_hdr.m_http->u.req.m_url_impl;
if (check_url(doc_mem, url)) {
std::string str;
if (search) {
ts::bwprint(str, "{}://{}:{}/{};{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme),
std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port),
std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_params, url->m_len_params),
std::string_view(url->m_ptr_query, url->m_len_query));
if (u_matcher->match(str.data())) {
str = this->stripe->hashText + " " + str;
std::cout << "match found " << str << std::endl;
}
} else {
ts::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", std::string_view(this->stripe->hashText),
std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host),
std::string_view(url->m_ptr_port, url->m_len_port), std::string_view(url->m_ptr_path, url->m_len_path),
std::string_view(url->m_ptr_params, url->m_len_params), std::string_view(url->m_ptr_query, url->m_len_query));
std::cout << str << std::endl;
}
} else {
std::cerr << "The retrieved url object is invalid" << std::endl;
}
} else {
// std::cout << "alternate retrieval failed" << std::endl;
break;
}
buf += a->m_unmarshal_len;
}
return zret;
}
} // end namespace ct