blob: 3b1afe8cb15edd7f12b32f3646b654a22b083ea7 [file] [log] [blame]
/** @file
Implementation of various round robin nexthop selections strategies.
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <mutex>
#include <yaml-cpp/yaml.h>
#include "HttpSM.h"
#include "NextHopRoundRobin.h"
NextHopRoundRobin::~NextHopRoundRobin()
{
NH_Debug(NH_DEBUG_TAG, "destructor called for strategy named: %s", strategy_name.c_str());
}
void
NextHopRoundRobin::findNextHop(TSHttpTxn txnp, void *ih, time_t now)
{
HttpSM *sm = reinterpret_cast<HttpSM *>(txnp);
ParentResult *result = &sm->t_state.parent_result;
HttpRequestData request_info = sm->t_state.request_data;
int64_t sm_id = sm->sm_id;
int64_t fail_threshold = sm->t_state.txn_conf->parent_fail_threshold;
int64_t retry_time = sm->t_state.txn_conf->parent_retry_time;
time_t _now = now;
bool firstcall = true;
bool parentUp = false;
bool parentRetry = false;
bool wrapped = result->wrap_around;
std::vector<bool> wrap_around(groups, false);
uint32_t cur_hst_index = 0;
uint32_t cur_grp_index = 0;
uint32_t hst_size = host_groups[cur_grp_index].size();
uint32_t start_group = 0;
uint32_t start_host = 0;
std::shared_ptr<HostRecord> cur_host;
HostStatus &pStatus = HostStatus::instance();
TSHostStatus host_stat = TSHostStatus::TS_HOST_STATUS_UP;
if (result->line_number != -1 && result->result != PARENT_UNDEFINED) {
firstcall = false;
}
if (firstcall) {
// distance is the index into the strategies map, this is the equivalent to the old line_number in parent.config.
result->line_number = distance;
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] first call , cur_grp_index: %d, cur_hst_index: %d, distance: %d", sm_id, cur_grp_index,
cur_hst_index, distance);
switch (policy_type) {
case NH_FIRST_LIVE:
result->start_parent = cur_hst_index = 0;
cur_grp_index = 0;
break;
case NH_RR_STRICT: {
std::lock_guard<std::mutex> lock(_mutex);
cur_hst_index = result->start_parent = this->hst_index;
cur_grp_index = 0;
this->hst_index = (this->hst_index + 1) % hst_size;
} break;
case NH_RR_IP:
cur_grp_index = 0;
if (request_info.get_client_ip() != nullptr) {
cur_hst_index = result->start_parent = ntohl(ats_ip_hash(request_info.get_client_ip())) % hst_size;
} else {
cur_hst_index = this->hst_index;
}
break;
case NH_RR_LATCHED:
cur_grp_index = 0;
cur_hst_index = result->start_parent = latched_index;
break;
default:
ink_assert(0);
break;
}
cur_host = host_groups[cur_grp_index][cur_hst_index];
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] first call, cur_grp_index: %d, cur_hst_index: %d", sm_id, cur_grp_index, cur_hst_index);
} else {
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] next call, cur_grp_index: %d, cur_hst_index: %d, distance: %d", sm_id, cur_grp_index,
cur_hst_index, distance);
// Move to next parent due to failure
latched_index = cur_hst_index = (result->last_parent + 1) % hst_size;
cur_host = host_groups[cur_grp_index][cur_hst_index];
// Check to see if we have wrapped around
if (static_cast<unsigned int>(cur_hst_index) == result->start_parent) {
// We've wrapped around so bypass if we can
if (go_direct == true) {
result->result = PARENT_DIRECT;
} else {
result->result = PARENT_FAIL;
}
result->hostname = nullptr;
result->port = 0;
result->wrap_around = true;
return;
}
}
start_group = cur_grp_index;
start_host = cur_hst_index;
// Verify that the 'cur_hst' is available or retryable, if not loop through the array of parents seeing if any are up or
// should be retried
do {
HostStatRec *hst = pStatus.getHostStatus(cur_host->hostname.c_str());
host_stat = (hst) ? hst->status : TSHostStatus::TS_HOST_STATUS_UP;
// if the config ignore_self_detect is set to true and the host is down due to SELF_DETECT reason
// ignore the down status and mark it as avaialble
if (ignore_self_detect && (hst && hst->status == TS_HOST_STATUS_DOWN)) {
if (hst->reasons == Reason::SELF_DETECT) {
host_stat = TS_HOST_STATUS_UP;
}
}
NH_Debug(NH_DEBUG_TAG,
"[%" PRIu64 "] Selected a parent, %s, failCount (faileAt: %d failCount: %d), FailThreshold: %" PRIu64
", request_info->xact_start: %ld",
sm_id, cur_host->hostname.c_str(), (unsigned)cur_host->failedAt, cur_host->failCount, fail_threshold,
request_info.xact_start);
// check if 'cur_host' is available, mark it up if it is.
if ((cur_host->failedAt == 0) || (cur_host->failCount < fail_threshold)) {
if (host_stat == TS_HOST_STATUS_UP) {
NH_Debug(NH_DEBUG_TAG,
"[%" PRIu64
"] Selecting a parent, %s, due to little failCount (faileAt: %d failCount: %d), FailThreshold: %" PRIu64,
sm_id, cur_host->hostname.c_str(), (unsigned)cur_host->failedAt, cur_host->failCount, fail_threshold);
parentUp = true;
}
} else { // if not available, check to see if it can be retried. If so, set the retry flag and temporairly mark it as
// available.
_now == 0 ? _now = time(nullptr) : _now = now;
if (((result->wrap_around) || (cur_host->failedAt + retry_time) < static_cast<unsigned>(_now)) &&
host_stat == TS_HOST_STATUS_UP) {
// Reuse the parent
parentUp = true;
parentRetry = true;
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] NextHop marked for retry %s:%d", sm_id, cur_host->hostname.c_str(),
host_groups[cur_grp_index][cur_hst_index]->getPort(scheme));
} else { // not retryable or available.
parentUp = false;
}
}
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] parentUp: %s, hostname: %s, host status: %s", sm_id, parentUp ? "true" : "false",
cur_host->hostname.c_str(), HostStatusNames[host_stat]);
// The selected host is available or retryable, return the search result.
if (parentUp == true && host_stat != TS_HOST_STATUS_DOWN) {
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] status for %s: %s", sm_id, cur_host->hostname.c_str(), HostStatusNames[host_stat]);
result->result = PARENT_SPECIFIED;
result->hostname = cur_host->hostname.c_str();
result->port = cur_host->getPort(scheme);
result->last_parent = cur_hst_index;
result->last_group = cur_grp_index;
result->retry = parentRetry;
ink_assert(result->hostname != nullptr);
ink_assert(result->port != 0);
NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] Chosen parent = %s.%d", sm_id, result->hostname, result->port);
return;
}
// only one host group is available, find another host if we have not wrapped.
if (groups == 1) {
latched_index = cur_hst_index = (cur_hst_index + 1) % hst_size;
if (start_host == cur_hst_index) {
wrap_around[cur_grp_index] = wrapped = result->wrap_around = true;
}
} else { // search the fail over groups.
if (ring_mode == NH_ALTERNATE_RING) { // use alternating ring mode.
cur_grp_index = (cur_grp_index + 1) % groups;
hst_size = host_groups[cur_grp_index].size();
if (cur_grp_index == start_group) {
latched_index = cur_hst_index = (cur_hst_index + 1) % hst_size;
if (cur_hst_index == start_host) {
wrapped = wrap_around[cur_grp_index] = result->wrap_around = true;
}
}
} else { // use the exhaust ring mode.
latched_index = cur_hst_index = (cur_hst_index + 1) % hst_size;
if (cur_hst_index == start_host) {
wrap_around[cur_grp_index] = true;
cur_grp_index = (cur_grp_index + 1) % groups;
if (cur_grp_index == start_group) {
wrapped = wrap_around[cur_grp_index] = result->wrap_around = true;
} else {
start_host = cur_hst_index = 0;
}
}
}
}
cur_host = host_groups[cur_grp_index][cur_hst_index];
NH_Debug(
NH_DEBUG_TAG,
"[%" PRIu64 "] host: %s, groups: %d, cur_grp_index: %d, cur_hst_index: %d, wrapped: %s, start_group: %d, start_host: %d",
sm_id, cur_host->hostname.c_str(), groups, cur_grp_index, cur_hst_index, wrapped ? "true" : "false", start_group, start_host);
} while (!wrapped);
if (go_direct == true) {
result->result = PARENT_DIRECT;
} else {
result->result = PARENT_FAIL;
}
result->hostname = nullptr;
result->port = 0;
}