Enhance Connection Collapse in ATS core

Add an option to support cache open read retry on a write lock
failure. With this option, as long as read-while-writer is set
up following the guidelines in the docs, there should be no need
for any plugins to augment the core. Eventual plan is to deprecate
collapsed_forwarding plugin with this new support.

For more context on this, see
https://cwiki.apache.org/confluence/display/TS/Presentations+-+2019?preview=/112821251/132320653/Collapsed%20Forwarding%20.pdf
diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index ea7dfb8..e86b742 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -2289,6 +2289,9 @@
 
     The number of times to attempt a cache open write upon failure to get a write lock.
 
+    This config is ignored when :ts:cv:`proxy.config.http.cache.open_write_fail_action` is
+    set to ``5``.
+
 .. ts:cv:: CONFIG proxy.config.http.cache.open_write_fail_action INT 0
    :reloadable:
    :overridable:
@@ -2311,6 +2314,12 @@
          :ts:cv:`proxy.config.http.cache.max_stale_age`. Otherwise, go to
          origin server.
    ``4`` Return a ``502`` error on either a cache miss or on a revalidation.
+   ``5`` Retry Cache Read on a Cache Write Lock failure. This option together
+         with `proxy.config.cache.enable_read_while_writer` configuration
+         allows to collapse concurrent requests without a need for any plugin.
+         Make sure to configure Read While Writer feature correctly following
+         the docs in Cache Basics section. Note that this option may result in
+         CACHE_LOOKUP_COMPLETE HOOK being called back more than once.
    ===== ======================================================================
 
 Customizable User Response Pages
diff --git a/proxy/http/HttpCacheSM.cc b/proxy/http/HttpCacheSM.cc
index 731ebf2..0832dbc 100644
--- a/proxy/http/HttpCacheSM.cc
+++ b/proxy/http/HttpCacheSM.cc
@@ -159,7 +159,8 @@
 {
   STATE_ENTER(&HttpCacheSM::state_cache_open_write, event);
   ink_assert(captive_action.cancelled == 0);
-  pending_action = nullptr;
+  pending_action                = nullptr;
+  bool read_retry_on_write_fail = false;
 
   switch (event) {
   case CACHE_EVENT_OPEN_WRITE:
@@ -171,9 +172,26 @@
     break;
 
   case CACHE_EVENT_OPEN_WRITE_FAILED:
-    if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
+    if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) {
+      // fall back to open_read_tries
+      // Note that when CACHE_WL_FAIL_ACTION_READ_RETRY is configured, max_cache_open_write_retries
+      // is automatically ignored. Make sure to not disable max_cache_open_read_retries
+      // with CACHE_WL_FAIL_ACTION_READ_RETRY as this results in proxy'ing to origin
+      // without write retries in both a cache miss or a cache refresh scenario.
+      if (open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
+        Debug("http_cache", "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. read retry triggered",
+              master_sm->sm_id, open_write_tries);
+        open_read_tries          = 0;
+        read_retry_on_write_fail = true;
+        // make sure it doesn't loop indefinitely
+        open_write_tries = master_sm->t_state.txn_conf->max_cache_open_write_retries + 1;
+      }
+    }
+    if (read_retry_on_write_fail || open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries) {
       // Retry open write;
       open_write_cb = false;
+      // reset captive_action since HttpSM cancelled it
+      captive_action.cancelled = 0;
       do_schedule_in();
     } else {
       // The cache is hosed or full or something.
@@ -188,18 +206,27 @@
     break;
 
   case EVENT_INTERVAL:
-    // Retry the cache open write if the number retries is less
-    // than or equal to the max number of open write retries
-    ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries);
-    Debug("http_cache",
-          "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
-          "retrying cache open write...",
-          master_sm->sm_id, open_write_tries);
+    if (master_sm->t_state.txn_conf->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY) {
+      Debug("http_cache",
+            "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
+            "falling back to read retry...",
+            master_sm->sm_id, open_write_tries);
+      open_read_cb = false;
+      master_sm->handleEvent(CACHE_EVENT_OPEN_READ, data);
+    } else {
+      Debug("http_cache",
+            "[%" PRId64 "] [state_cache_open_write] cache open write failure %d. "
+            "retrying cache open write...",
+            master_sm->sm_id, open_write_tries);
 
-    open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read,
-               static_cast<time_t>(
-                 (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for),
-               retry_write, false);
+      // Retry the cache open write if the number retries is less
+      // than or equal to the max number of open write retries
+      ink_assert(open_write_tries <= master_sm->t_state.txn_conf->max_cache_open_write_retries);
+      open_write(&cache_key, lookup_url, read_request_hdr, master_sm->t_state.cache_info.object_read,
+                 static_cast<time_t>(
+                   (master_sm->t_state.cache_control.pin_in_cache_for < 0) ? 0 : master_sm->t_state.cache_control.pin_in_cache_for),
+                 retry_write, false);
+    }
     break;
 
   default:
diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc
index 98482ce..c71be4a 100644
--- a/proxy/http/HttpSM.cc
+++ b/proxy/http/HttpSM.cc
@@ -2456,6 +2456,15 @@
   // INTENTIONAL FALL THROUGH
   // Allow for stale object to be served
   case CACHE_EVENT_OPEN_READ:
+    if (!t_state.cache_info.object_read) {
+      t_state.cache_open_write_fail_action = t_state.txn_conf->cache_open_write_fail_action;
+      // Note that CACHE_LOOKUP_COMPLETE may be invoked more than once
+      // if CACHE_WL_FAIL_ACTION_READ_RETRY is configured
+      ink_assert(t_state.cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY);
+      t_state.cache_lookup_result         = HttpTransact::CACHE_LOOKUP_NONE;
+      t_state.cache_info.write_lock_state = HttpTransact::CACHE_WL_READ_RETRY;
+      break;
+    }
     // The write vector was locked and the cache_sm retried
     // and got the read vector again.
     cache_sm.cache_read_vc->get_http_info(&t_state.cache_info.object_read);
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index f5898f0..920b23b 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -2896,7 +2896,6 @@
       }
 
       TRANSACT_RETURN(SM_ACTION_SEND_ERROR_CACHE_NOOP, nullptr);
-      return;
     default:
       s->cache_info.write_status = CACHE_WRITE_LOCK_MISS;
       remove_ims                 = true;
@@ -2904,14 +2903,25 @@
     }
     break;
   case CACHE_WL_READ_RETRY:
-    //  Write failed but retried and got a vector to read
-    //  We need to clean up our state so that transact does
-    //  not assert later on.  Then handle the open read hit
-    //
     s->request_sent_time      = UNDEFINED_TIME;
     s->response_received_time = UNDEFINED_TIME;
     s->cache_info.action      = CACHE_DO_LOOKUP;
-    remove_ims                = true;
+    if (!s->cache_info.object_read) {
+      //  Write failed and read retry triggered
+      //  Clean up server_request and re-initiate
+      //  Cache Lookup
+      ink_assert(s->cache_open_write_fail_action == HttpTransact::CACHE_WL_FAIL_ACTION_READ_RETRY);
+      s->cache_info.write_status = CACHE_WRITE_LOCK_MISS;
+      StateMachineAction_t next;
+      next           = SM_ACTION_CACHE_LOOKUP;
+      s->next_action = next;
+      s->hdr_info.server_request.destroy();
+      TRANSACT_RETURN(next, nullptr);
+    }
+    //  Write failed but retried and got a vector to read
+    //  We need to clean up our state so that transact does
+    //  not assert later on.  Then handle the open read hit
+    remove_ims = true;
     SET_VIA_STRING(VIA_DETAIL_CACHE_TYPE, VIA_DETAIL_CACHE);
     break;
   case CACHE_WL_INIT:
diff --git a/proxy/http/HttpTransact.h b/proxy/http/HttpTransact.h
index f7bfef5..68ec711 100644
--- a/proxy/http/HttpTransact.h
+++ b/proxy/http/HttpTransact.h
@@ -251,6 +251,7 @@
     CACHE_WL_FAIL_ACTION_STALE_ON_REVALIDATE               = 0x02,
     CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_STALE_ON_REVALIDATE = 0x03,
     CACHE_WL_FAIL_ACTION_ERROR_ON_MISS_OR_REVALIDATE       = 0x04,
+    CACHE_WL_FAIL_ACTION_READ_RETRY                        = 0x05,
     TOTAL_CACHE_WL_FAIL_ACTION_TYPES
   };