Enhance Connection Collapse in ATS core
Add an option to support cache open read retry on a write lock
failure. With this option, as long as read-while-writer is set
up following the guidelines in the docs, there should be no need
for any plugins to augment the core. Eventual plan is to deprecate
collapsed_forwarding plugin with this new support.
For more context on this, see
https://cwiki.apache.org/confluence/display/TS/Presentations+-+2019?preview=/112821251/132320653/Collapsed%20Forwarding%20.pdf
diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index ea7dfb8..e86b742 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -2289,6 +2289,9 @@
The number of times to attempt a cache open write upon failure to get a write lock.
+ This config is ignored when :ts:cv:`proxy.config.http.cache.open_write_fail_action` is
+ set to ``5``.
+
.. ts:cv:: CONFIG proxy.config.http.cache.open_write_fail_action INT 0
:reloadable:
:overridable:
@@ -2311,6 +2314,12 @@
:ts:cv:`proxy.config.http.cache.max_stale_age`. Otherwise, go to
origin server.
``4`` Return a ``502`` error on either a cache miss or on a revalidation.
+ ``5`` Retry Cache Read on a Cache Write Lock failure. This option together
+ with `proxy.config.cache.enable_read_while_writer` configuration
+ allows to collapse concurrent requests without a need for any plugin.
+ Make sure to configure Read While Writer feature correctly following
+ the docs in Cache Basics section. Note that this option may result in
+ CACHE_LOOKUP_COMPLETE HOOK being called back more than once.
===== ======================================================================
Customizable User Response Pages
diff --git a/proxy/http/HttpCacheSM.cc b/proxy/http/HttpCacheSM.cc
index 731ebf2..0832dbc 100644
--- a/proxy/http/HttpCacheSM.cc
+++ b/proxy/http/HttpCacheSM.cc
@@ -159,7 +159,8 @@
{
STATE_ENTER(&HttpCacheSM::state_cache_open_write, event);
ink_assert(captive_action.cancelled == 0);
- pending_action = nullptr;
+ pending_action = nullptr;
+ bool read_retry_on_write_fail = false;
switch (event) {
case CACHE_EVENT_OPEN_WRITE:
@@ -171,9 +172,26 @@
break;
case CACHE_EVENT_OPEN_WRITE_FAILED:
- if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
+ if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) {
+ // fall back to open_read_tries
+ // Note that when CACHE_WL_FAIL_ACTION_READ_RETRY is configured, max_cache_open_write_retries
+ // is automatically ignored. Make sure to not disable max_cache_open_read_retries
+ // with CACHE_WL_FAIL_ACTION_READ_RETRY as this results in proxy'ing to origin
+ // without write retries in both a cache miss or a cache refresh scenario.
+ if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
+ Debug("http_cache", "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. read retry triggered",
+ master_sm->sm_id, open_write_tries);
+ open_read_tries = 0;
+ read_retry_on_write_fail = true;
+ // make sure it doesn't loop indefinitely
+ open_write_tries = master_sm->t_state.txn_conf->max_cache_open_write_retries + 1;
+ }
+ }
+ if (read_retry_on_write_fail || open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
// Retry open write;
open_write_cb = false;
+ // reset captive_action since HttpSM cancelled it
+ captive_action.cancelled = 0;
do_schedule_in();
} else {
// The cache is hosed or full or something.
@@ -188,18 +206,27 @@
break;
case EVENT_INTERVAL:
- // Retry the cache open write if the number retries is less
- // than or equal to the max number of open write retries
- ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries);
- Debug("http_cache",
- "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
- "retrying cache open write...",
- master_sm->sm_id, open_write_tries);
+ if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) {
+ Debug("http_cache",
+ "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
+ "falling back to read retry...",
+ master_sm->sm_id, open_write_tries);
+ open_read_cb = false;
+ master_sm->handleEvent(CACHE_EVENT_OPEN_READ, data);
+ } else {
+ Debug("http_cache",
+ "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
+ "retrying cache open write...",
+ master_sm->sm_id, open_write_tries);
- open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read,
- static_cast<time_t>(
- (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for),
- retry_write, false);
+ // Retry the cache open write if the number retries is less
+ // than or equal to the max number of open write retries
+ ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries);
+ open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read,
+ static_cast<time_t>(
+ (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for),
+ retry_write, false);
+ }
break;
default:
diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc
index 98482ce..c71be4a 100644
--- a/proxy/http/HttpSM.cc
+++ b/proxy/http/HttpSM.cc
@@ -2456,6 +2456,15 @@
// INTENTIONAL FALL THROUGH
// Allow for stale object to be served
case CACHE_EVENT_OPEN_READ:
+ if (!t_state.cache_info.object_read) {
+ t_state.cache_open_write_fail_action = t_state.txn_conf->cache_open_write_fail_action;
+ // Note that CACHE_LOOKUP_COMPLETE may be invoked more than once
+ // if CACHE_WL_FAIL_ACTION_READ_RETRY is configured
+ ink_assert(t_state.cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY);
+ t_state.cache_lookup_result = HttpTransact::CACHE_LOOKUP_NONE;
+ t_state.cache_info.write_lock_state = HttpTransact::CACHE_WL_READ_RETRY;
+ break;
+ }
// The write vector was locked and the cache_sm retried
// and got the read vector again.
cache_sm.cache_read_vc->get_http_info(&t_state.cache_info.object_read);
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index f5898f0..920b23b 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -2896,7 +2896,6 @@
}
TRANSACT_RETURN(SM_ACTION_SEND_ERROR_CACHE_NOOP, nullptr);
- return;
default:
s->cache_info.write_status = CACHE_WRITE_LOCK_MISS;
remove_ims = true;
@@ -2904,14 +2903,25 @@
}
break;
case CACHE_WL_READ_RETRY:
- // Write failed but retried and got a vector to read
- // We need to clean up our state so that transact does
- // not assert later on. Then handle the open read hit
- //
s->request_sent_time = UNDEFINED_TIME;
s->response_received_time = UNDEFINED_TIME;
s->cache_info.action = CACHE_DO_LOOKUP;
- remove_ims = true;
+ if (!s->cache_info.object_read) {
+ // Write failed and read retry triggered
+ // Clean up server_request and re-initiate
+ // Cache Lookup
+ ink_assert(s->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY);
+ s->cache_info.write_status = CACHE_WRITE_LOCK_MISS;
+ StateMachineAction_t next;
+ next = SM_ACTION_CACHE_LOOKUP;
+ s->next_action = next;
+ s->hdr_info.server_request.destroy();
+ TRANSACT_RETURN(next, nullptr);
+ }
+ // Write failed but retried and got a vector to read
+ // We need to clean up our state so that transact does
+ // not assert later on. Then handle the open read hit
+ remove_ims = true;
SET_VIA_STRING(VIA_DETAIL_CACHE_TYPE, VIA_DETAIL_CACHE);
break;
case CACHE_WL_INIT:
diff --git a/proxy/http/HttpTransact.h b/proxy/http/HttpTransact.h
index f7bfef5..68ec711 100644
--- a/proxy/http/HttpTransact.h
+++ b/proxy/http/HttpTransact.h
@@ -251,6 +251,7 @@
CACHE_WL_FAIL_ACTION_STALE_ON_REVALIDATE = 0x02,
CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_STALE_ON_REVALIDATE = 0x03,
CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_OR_REVALIDATE = 0x04,
+ CACHE_WL_FAIL_ACTION_READ_RETRY = 0x05,
TOTAL_CACHE_WL_FAIL_ACTION_TYPES
};