| /** @file |
| |
| Entry point to the traffic manager. |
| |
| @section license License |
| |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| #include "tscore/ink_sys_control.h" |
| #include "tscore/ink_cap.h" |
| #include "tscore/ink_lockfile.h" |
| #include "tscore/ink_sock.h" |
| #include "tscore/ink_args.h" |
| #include "tscore/ink_syslog.h" |
| #include "tscore/runroot.h" |
| #include "tscore/Filenames.h" |
| |
| #include "WebMgmtUtils.h" |
| #include "MgmtUtils.h" |
| #include "MgmtSocket.h" |
| #include "NetworkUtilsRemote.h" |
| #include "FileManager.h" |
| #include "tscore/I_Layout.h" |
| #include "tscore/I_Version.h" |
| #include "tscore/TextBuffer.h" |
| #include "DiagsConfig.h" |
| #include "HTTP.h" |
| #include "CoreAPI.h" |
| |
| #include "LocalManager.h" |
| #include "TSControlMain.h" |
| #include "EventControlMain.h" |
| |
| // Needs LibRecordsConfigInit() |
| #include "RecordsConfig.h" |
| |
| #include "records/P_RecLocal.h" |
| #include "DerivativeMetrics.h" |
| |
| #include <random> |
| |
| #if TS_USE_POSIX_CAP |
| #include <sys/capability.h> |
| #endif |
| #include <grp.h> |
| #include <atomic> |
| #include "tscore/bwf_std_format.h" |
| |
| #define FD_THROTTLE_HEADROOM (128 + 64) // TODO: consolidate with THROTTLE_FD_HEADROOM |
| #define DIAGS_LOG_FILENAME "manager.log" |
| |
| #if ATOMIC_INT_LOCK_FREE != 2 |
| #error "Need lock free std::atomic<int>" |
| #endif |
| |
| using namespace std::literals; |
| |
| // These globals are still referenced directly by management API. |
| LocalManager *lmgmt = nullptr; |
| FileManager *configFiles; |
| |
| static void fileUpdated(char *fname, char *configName); |
| static void runAsUser(const char *userName); |
| |
| #if defined(freebsd) |
| extern "C" int getpwnam_r(const char *name, struct passwd *result, char *buffer, size_t buflen, struct passwd **resptr); |
| #endif |
| |
| static AppVersionInfo appVersionInfo; // Build info for this application |
| |
| static inkcoreapi DiagsConfig *diagsConfig = nullptr; |
| static char debug_tags[1024] = ""; |
| static char action_tags[1024] = ""; |
| static int proxy_off = false; |
| static int listen_off = false; |
| static char bind_stdout[512] = ""; |
| static char bind_stderr[512] = ""; |
| static const char *mgmt_path = nullptr; |
| |
| // By default, set the current directory as base |
| static const char *recs_conf = ts::filename::RECORDS; |
| |
| static int fds_limit; |
| |
| // TODO: Use positive instead negative selection |
| // This should just be #if defined(solaris) |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| static void SignalHandler(int sig, siginfo_t *t, void *f); |
| static void SignalAlrmHandler(int sig, siginfo_t *t, void *f); |
| #else |
| static void SignalHandler(int sig); |
| static void SignalAlrmHandler(int sig); |
| #endif |
| |
| static std::atomic<int> sigHupNotifier; |
| static void SigChldHandler(int sig); |
| |
| static void |
| rotateLogs() |
| { |
| // First, let us synchronously update the rolling config values for both diagslog |
| // and outputlog. Note that the config values for outputlog in traffic_server |
| // are never updated past the original instantiation of Diags. This shouldn't |
| // be an issue since we're never rolling outputlog from traffic_server anyways. |
| // The reason being is that it is difficult to send a notification from TS to |
| // TM, informing TM that outputlog has been rolled. It is much easier sending |
| // a notification (in the form of SIGUSR2) from TM -> TS. |
| int output_log_roll_int = (int)REC_ConfigReadInteger("proxy.config.output.logfile.rolling_interval_sec"); |
| int output_log_roll_size = (int)REC_ConfigReadInteger("proxy.config.output.logfile.rolling_size_mb"); |
| int output_log_roll_enable = (int)REC_ConfigReadInteger("proxy.config.output.logfile.rolling_enabled"); |
| int diags_log_roll_int = (int)REC_ConfigReadInteger("proxy.config.diags.logfile.rolling_interval_sec"); |
| int diags_log_roll_size = (int)REC_ConfigReadInteger("proxy.config.diags.logfile.rolling_size_mb"); |
| int diags_log_roll_enable = (int)REC_ConfigReadInteger("proxy.config.diags.logfile.rolling_enabled"); |
| diags->config_roll_diagslog((RollingEnabledValues)diags_log_roll_enable, diags_log_roll_int, diags_log_roll_size); |
| diags->config_roll_outputlog((RollingEnabledValues)output_log_roll_enable, output_log_roll_int, output_log_roll_size); |
| |
| // Now we can actually roll the logs (if necessary) |
| if (diags->should_roll_diagslog()) { |
| mgmt_log("Rotated %s", DIAGS_LOG_FILENAME); |
| } |
| |
| if (diags->should_roll_outputlog()) { |
| // send a signal to TS to reload traffic.out, so the logfile is kept |
| // synced across processes |
| mgmt_log("Sending SIGUSR2 to TS"); |
| pid_t tspid = lmgmt->watched_process_pid; |
| if (tspid <= 0) { |
| return; |
| } |
| if (kill(tspid, SIGUSR2) != 0) { |
| mgmt_log("Could not send SIGUSR2 to TS: %s", strerror(errno)); |
| } else { |
| mgmt_log("Successfully sent SIGUSR2 to TS!"); |
| } |
| } |
| } |
| |
| static bool |
| is_server_idle() |
| { |
| RecInt active = 0; |
| RecInt threshold = 0; |
| |
| if (RecGetRecordInt("proxy.config.restart.active_client_threshold", &threshold) != REC_ERR_OKAY) { |
| return false; |
| } |
| |
| if (RecGetRecordInt("proxy.process.http.current_active_client_connections", &active) != REC_ERR_OKAY) { |
| return false; |
| } |
| |
| Debug("lm", "%" PRId64 " active clients, threshold is %" PRId64, active, threshold); |
| return active <= threshold; |
| } |
| |
| static bool |
| is_server_idle_from_new_connection() |
| { |
| RecInt active = 0; |
| RecInt threshold = 0; |
| // TODO implement with the right metric |
| |
| Debug("lm", "%" PRId64 " active clients, threshold is %" PRId64, active, threshold); |
| |
| return active <= threshold; |
| } |
| |
| static bool |
| is_server_draining() |
| { |
| RecInt draining = 0; |
| if (RecGetRecordInt("proxy.node.config.draining", &draining) != REC_ERR_OKAY) { |
| return false; |
| } |
| return draining != 0; |
| } |
| |
| static bool |
| waited_enough() |
| { |
| RecInt timeout = 0; |
| if (RecGetRecordInt("proxy.config.stop.shutdown_timeout", &timeout) != REC_ERR_OKAY) { |
| return false; |
| } |
| |
| return (timeout ? (lmgmt->mgmt_shutdown_triggered_at + timeout <= time(nullptr)) : false); |
| } |
| |
| static void |
| check_lockfile() |
| { |
| std::string rundir(RecConfigReadRuntimeDir()); |
| char lockfile[PATH_NAME_MAX]; |
| int err; |
| pid_t holding_pid; |
| |
| ////////////////////////////////////// |
| // test for presence of server lock // |
| ////////////////////////////////////// |
| Layout::relative_to(lockfile, sizeof(lockfile), rundir, SERVER_LOCK); |
| Lockfile server_lockfile(lockfile); |
| err = server_lockfile.Open(&holding_pid); |
| if (err == 1) { |
| server_lockfile.Close(); // no server running |
| } else { |
| char *reason = strerror(-err); |
| if (err == 0) { |
| fprintf(stderr, "FATAL: Lockfile '%s' says server already running as PID %ld\n", lockfile, static_cast<long>(holding_pid)); |
| mgmt_log("FATAL: Lockfile '%s' says server already running as PID %d\n", lockfile, holding_pid); |
| } else { |
| fprintf(stderr, "FATAL: Can't open server lockfile '%s' (%s)\n", lockfile, (reason ? reason : "Unknown Reason")); |
| mgmt_log("FATAL: Can't open server lockfile '%s' (%s)\n", lockfile, (reason ? reason : "Unknown Reason")); |
| } |
| exit(1); |
| } |
| |
| /////////////////////////////////////////// |
| // try to get the exclusive manager lock // |
| /////////////////////////////////////////// |
| Layout::relative_to(lockfile, sizeof(lockfile), rundir, MANAGER_LOCK); |
| Lockfile manager_lockfile(lockfile); |
| err = manager_lockfile.Get(&holding_pid); |
| if (err != 1) { |
| char *reason = strerror(-err); |
| fprintf(stderr, "FATAL: Can't acquire manager lockfile '%s'", lockfile); |
| mgmt_log("FATAL: Can't acquire manager lockfile '%s'", lockfile); |
| if (err == 0) { |
| fprintf(stderr, " (Lock file held by process ID %ld)\n", static_cast<long>(holding_pid)); |
| mgmt_log(" (Lock file held by process ID %d)\n", holding_pid); |
| } else if (reason) { |
| fprintf(stderr, " (%s)\n", reason); |
| mgmt_log(" (%s)\n", reason); |
| } else { |
| fprintf(stderr, "\n"); |
| } |
| exit(1); |
| |
| fprintf(stderr, "unable to acquire manager lock [%d]\n", -err); |
| exit(1); |
| } |
| } |
| |
| static void |
| initSignalHandlers() |
| { |
| struct sigaction sigHandler, sigChldHandler, sigAlrmHandler; |
| sigset_t sigsToBlock; |
| |
| // Set up the signal handler |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| sigHandler.sa_handler = nullptr; |
| sigHandler.sa_sigaction = SignalHandler; |
| #else |
| sigHandler.sa_handler = SignalHandler; |
| #endif |
| sigemptyset(&sigHandler.sa_mask); |
| |
| // We want the handler to remain in place on |
| // SIGHUP to avoid any races with the signals |
| // coming too quickly. Also restart systems calls |
| // after the signal since not all calls are wrapped |
| // to check errno for EINTR |
| sigHandler.sa_flags = SA_RESTART; |
| sigaction(SIGHUP, &sigHandler, nullptr); |
| sigaction(SIGUSR2, &sigHandler, nullptr); |
| |
| // Don't block the signal on entry to the signal |
| // handler so we can reissue it and get a core |
| // file in the appropriate circumstances |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| sigHandler.sa_flags = SA_RESETHAND | SA_SIGINFO; |
| #else |
| sigHandler.sa_flags = SA_RESETHAND; |
| #endif |
| sigaction(SIGINT, &sigHandler, nullptr); |
| sigaction(SIGQUIT, &sigHandler, nullptr); |
| sigaction(SIGILL, &sigHandler, nullptr); |
| sigaction(SIGBUS, &sigHandler, nullptr); |
| sigaction(SIGSEGV, &sigHandler, nullptr); |
| sigaction(SIGTERM, &sigHandler, nullptr); |
| |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| sigAlrmHandler.sa_handler = nullptr; |
| sigAlrmHandler.sa_sigaction = SignalAlrmHandler; |
| #else |
| sigAlrmHandler.sa_handler = SignalAlrmHandler; |
| #endif |
| |
| sigemptyset(&sigAlrmHandler.sa_mask); |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| sigAlrmHandler.sa_flags = SA_SIGINFO; |
| #else |
| sigAlrmHandler.sa_flags = 0; |
| #endif |
| sigaction(SIGALRM, &sigAlrmHandler, nullptr); |
| |
| // Block the delivery of any signals we are not catching |
| // |
| // except for SIGALRM since we use it |
| // to break out of deadlock on semaphore |
| // we share with the proxy |
| // |
| sigfillset(&sigsToBlock); |
| sigdelset(&sigsToBlock, SIGHUP); |
| sigdelset(&sigsToBlock, SIGUSR2); |
| sigdelset(&sigsToBlock, SIGINT); |
| sigdelset(&sigsToBlock, SIGQUIT); |
| sigdelset(&sigsToBlock, SIGILL); |
| sigdelset(&sigsToBlock, SIGABRT); |
| sigdelset(&sigsToBlock, SIGBUS); |
| sigdelset(&sigsToBlock, SIGSEGV); |
| sigdelset(&sigsToBlock, SIGTERM); |
| sigdelset(&sigsToBlock, SIGALRM); |
| ink_thread_sigsetmask(SIG_SETMASK, &sigsToBlock, nullptr); |
| |
| // Set up the SIGCHLD handler so we do not get into |
| // a problem with Solaris 2.6 and strange waitpid() |
| // behavior |
| sigChldHandler.sa_handler = SigChldHandler; |
| sigChldHandler.sa_flags = SA_RESTART; |
| sigemptyset(&sigChldHandler.sa_mask); |
| sigaction(SIGCHLD, &sigChldHandler, nullptr); |
| } |
| |
| static void |
| init_dirs() |
| { |
| std::string rundir(RecConfigReadRuntimeDir()); |
| std::string sysconfdir(RecConfigReadConfigDir()); |
| |
| if (access(sysconfdir.c_str(), R_OK) == -1) { |
| mgmt_elog(0, "unable to access() config directory '%s': %d, %s\n", sysconfdir.c_str(), errno, strerror(errno)); |
| mgmt_elog(0, "please set the 'TS_ROOT' environment variable\n"); |
| ::exit(1); |
| } |
| |
| if (access(rundir.c_str(), R_OK) == -1) { |
| mgmt_elog(0, "unable to access() local state directory '%s': %d, %s\n", rundir.c_str(), errno, strerror(errno)); |
| mgmt_elog(0, "please set 'proxy.config.local_state_dir'\n"); |
| ::exit(1); |
| } |
| } |
| |
| static void |
| chdir_root() |
| { |
| std::string prefix = Layout::get()->prefix; |
| |
| if (chdir(prefix.c_str()) < 0) { |
| mgmt_elog(0, "unable to change to root directory \"%s\" [%d '%s']\n", prefix.c_str(), errno, strerror(errno)); |
| mgmt_elog(0, " please set correct path in env variable TS_ROOT \n"); |
| exit(1); |
| } else { |
| mgmt_log("[TrafficManager] using root directory '%s'\n", prefix.c_str()); |
| } |
| } |
| |
| static void |
| set_process_limits(RecInt fds_throttle) |
| { |
| struct rlimit lim; |
| rlim_t maxfiles; |
| |
| // Set needed rlimits (root) |
| ink_max_out_rlimit(RLIMIT_NOFILE); |
| ink_max_out_rlimit(RLIMIT_STACK); |
| ink_max_out_rlimit(RLIMIT_DATA); |
| ink_max_out_rlimit(RLIMIT_FSIZE); |
| #ifdef RLIMIT_RSS |
| ink_max_out_rlimit(RLIMIT_RSS); |
| #endif |
| |
| maxfiles = ink_get_max_files(); |
| if (maxfiles != RLIM_INFINITY) { |
| float file_max_pct = 0.9; |
| |
| REC_ReadConfigFloat(file_max_pct, "proxy.config.system.file_max_pct"); |
| if (file_max_pct > 1.0) { |
| file_max_pct = 1.0; |
| } |
| |
| lim.rlim_cur = lim.rlim_max = static_cast<rlim_t>(maxfiles * file_max_pct); |
| if (setrlimit(RLIMIT_NOFILE, &lim) == 0 && getrlimit(RLIMIT_NOFILE, &lim) == 0) { |
| fds_limit = static_cast<int>(lim.rlim_cur); |
| syslog(LOG_NOTICE, "NOTE: RLIMIT_NOFILE(%d):cur(%d),max(%d)", RLIMIT_NOFILE, static_cast<int>(lim.rlim_cur), |
| static_cast<int>(lim.rlim_max)); |
| } |
| } |
| |
| if (getrlimit(RLIMIT_NOFILE, &lim) == 0) { |
| if (fds_throttle > (int)(lim.rlim_cur + FD_THROTTLE_HEADROOM)) { |
| lim.rlim_cur = (lim.rlim_max = (rlim_t)fds_throttle); |
| if (!setrlimit(RLIMIT_NOFILE, &lim) && !getrlimit(RLIMIT_NOFILE, &lim)) { |
| fds_limit = static_cast<int>(lim.rlim_cur); |
| syslog(LOG_NOTICE, "NOTE: RLIMIT_NOFILE(%d):cur(%d),max(%d)", RLIMIT_NOFILE, static_cast<int>(lim.rlim_cur), |
| static_cast<int>(lim.rlim_max)); |
| } |
| } |
| } |
| } |
| |
| #if TS_HAS_WCCP |
| static void |
| Errata_Logger(ts::Errata const &err) |
| { |
| size_t n; |
| static size_t const SIZE = 4096; |
| char buff[SIZE]; |
| if (err.size()) { |
| ts::Errata::Code code = err.top().getCode(); |
| n = err.write(buff, SIZE, 1, 0, 2, "> "); |
| // strip trailing newlines. |
| while (n && (buff[n - 1] == '\n' || buff[n - 1] == '\r')) |
| buff[--n] = 0; |
| // log it. |
| if (code > 1) |
| mgmt_elog(0, "[WCCP]%s", buff); |
| else if (code > 0) |
| mgmt_log("[WCCP]%s", buff); |
| else |
| Debug("WCCP", "%s", buff); |
| } |
| } |
| |
| static void |
| Init_Errata_Logging() |
| { |
| ts::Errata::registerSink(&Errata_Logger); |
| } |
| #endif |
| |
| static void |
| millisleep(int ms) |
| { |
| struct timespec ts; |
| |
| ts.tv_sec = ms / 1000; |
| ts.tv_nsec = (ms - ts.tv_sec * 1000) * 1000 * 1000; |
| nanosleep(&ts, nullptr); // we use nanosleep instead of sleep because it does not interact with signals |
| } |
| |
| bool |
| api_socket_is_restricted() |
| { |
| RecInt intval; |
| |
| // If the socket is not administratively restricted, check whether we have platform |
| // support. Otherwise, default to making it restricted. |
| if (RecGetRecordInt("proxy.config.admin.api.restricted", &intval) == REC_ERR_OKAY) { |
| if (intval == 0) { |
| return !mgmt_has_peereid(); |
| } |
| } |
| |
| return true; |
| } |
| |
| int |
| main(int argc, const char **argv) |
| { |
| const long MAX_LOGIN = ink_login_name_max(); |
| |
| runroot_handler(argv); |
| |
| // Before accessing file system initialize Layout engine |
| Layout::create(); |
| mgmt_path = Layout::get()->sysconfdir.c_str(); |
| |
| // Set up the application version info |
| appVersionInfo.setup(PACKAGE_NAME, "traffic_manager", PACKAGE_VERSION, __DATE__, __TIME__, BUILD_MACHINE, BUILD_PERSON, ""); |
| |
| bool found = false; |
| int just_started = 0; |
| // TODO: This seems completely incomplete, disabled for now |
| // int dump_config = 0, dump_process = 0, dump_node = 0, dump_local = 0; |
| char *proxy_port = nullptr; |
| char *tsArgs = nullptr; |
| int disable_syslog = false; |
| char userToRunAs[MAX_LOGIN + 1]; |
| RecInt fds_throttle = -1; |
| bool printed_unrecoverable = false; |
| |
| ArgumentDescription argument_descriptions[] = { |
| {"proxyOff", '-', "Disable proxy", "F", &proxy_off, nullptr, nullptr}, |
| {"listenOff", '-', "Disable traffic manager listen to proxy ports", "F", &listen_off, nullptr, nullptr}, |
| {"path", '-', "Path to the management socket", "S*", &mgmt_path, nullptr, nullptr}, |
| {"recordsConf", '-', "Path to records.config", "S*", &recs_conf, nullptr, nullptr}, |
| {"tsArgs", '-', "Additional arguments for traffic_server", "S*", &tsArgs, nullptr, nullptr}, |
| {"proxyPort", '-', "HTTP port descriptor", "S*", &proxy_port, nullptr, nullptr}, |
| {"maxRecords", 'm', "Max number of librecords metrics and configurations (default & minimum: 1600)", "I", &max_records_entries, |
| "PROXY_MAX_RECORDS", nullptr}, |
| {TM_OPT_BIND_STDOUT, '-', "Regular file to bind stdout to", "S512", &bind_stdout, "PROXY_BIND_STDOUT", nullptr}, |
| {TM_OPT_BIND_STDERR, '-', "Regular file to bind stderr to", "S512", &bind_stderr, "PROXY_BIND_STDERR", nullptr}, |
| #if TS_USE_DIAGS |
| {"debug", 'T', "Vertical-bar-separated Debug Tags", "S1023", debug_tags, nullptr, nullptr}, |
| {"action", 'B', "Vertical-bar-separated Behavior Tags", "S1023", action_tags, nullptr, nullptr}, |
| #endif |
| {"nosyslog", '-', "Do not log to syslog", "F", &disable_syslog, nullptr, nullptr}, |
| HELP_ARGUMENT_DESCRIPTION(), |
| VERSION_ARGUMENT_DESCRIPTION(), |
| RUNROOT_ARGUMENT_DESCRIPTION() |
| }; |
| |
| // Process command line arguments and dump into variables |
| process_args(&appVersionInfo, argument_descriptions, countof(argument_descriptions), argv); |
| |
| // change the directory to the "root" directory |
| chdir_root(); |
| |
| // Line buffer standard output & standard error |
| int status; |
| status = setvbuf(stdout, nullptr, _IOLBF, 0); |
| if (status != 0) { |
| perror("WARNING: can't line buffer stdout"); |
| } |
| status = setvbuf(stderr, nullptr, _IOLBF, 0); |
| if (status != 0) { |
| perror("WARNING: can't line buffer stderr"); |
| } |
| |
| initSignalHandlers(); |
| |
| // Bootstrap with LOG_DAEMON until we've read our configuration |
| if (!disable_syslog) { |
| openlog("traffic_manager", LOG_PID | LOG_NDELAY | LOG_NOWAIT, LOG_DAEMON); |
| mgmt_use_syslog(); |
| syslog(LOG_NOTICE, "NOTE: --- Manager Starting ---"); |
| syslog(LOG_NOTICE, "NOTE: Manager Version: %s", appVersionInfo.FullVersionInfoStr); |
| } |
| |
| // Bootstrap the Diags facility so that we can use it while starting |
| // up the manager |
| diagsConfig = new DiagsConfig("Manager", DIAGS_LOG_FILENAME, debug_tags, action_tags, false); |
| diags->set_std_output(StdStream::STDOUT, bind_stdout); |
| diags->set_std_output(StdStream::STDERR, bind_stderr); |
| |
| RecLocalInit(); |
| LibRecordsConfigInit(); |
| |
| init_dirs(); // setup critical directories, needs LibRecords |
| |
| if (RecGetRecordString("proxy.config.admin.user_id", userToRunAs, sizeof(userToRunAs)) != REC_ERR_OKAY || |
| strlen(userToRunAs) == 0) { |
| mgmt_fatal(0, "proxy.config.admin.user_id is not set\n"); |
| } |
| |
| RecGetRecordInt("proxy.config.net.connections_throttle", &fds_throttle); |
| RecInt listen_per_thread = 0; |
| RecGetRecordInt("proxy.config.exec_thread.listen", &listen_per_thread); |
| if (listen_per_thread > 0) { // Turn off listening. Traffic server is going to listen on all the threads. |
| listen_off = true; |
| } |
| |
| set_process_limits(fds_throttle); // as root |
| |
| // A user of #-1 means to not attempt to switch user. Yes, it's documented ;) |
| if (strcmp(userToRunAs, "#-1") != 0) { |
| runAsUser(userToRunAs); |
| } |
| |
| EnableCoreFile(true); |
| check_lockfile(); |
| |
| url_init(); |
| mime_init(); |
| http_init(); |
| |
| #if TS_HAS_WCCP |
| Init_Errata_Logging(); |
| #endif |
| ts_host_res_global_init(); |
| ts_session_protocol_well_known_name_indices_init(); |
| lmgmt = new LocalManager(proxy_off == false, listen_off == false); |
| RecLocalInitMessage(); |
| lmgmt->initAlarm(); |
| |
| // INKqa11968: need to set up callbacks and diags data structures |
| // using configuration in records.config |
| DiagsConfig *old_diagsconfig = diagsConfig; |
| diagsConfig = new DiagsConfig("Manager", DIAGS_LOG_FILENAME, debug_tags, action_tags, true); |
| if (old_diagsconfig) { |
| delete old_diagsconfig; |
| old_diagsconfig = nullptr; |
| } |
| |
| RecSetDiags(diags); |
| diags->set_std_output(StdStream::STDOUT, bind_stdout); |
| diags->set_std_output(StdStream::STDERR, bind_stderr); |
| |
| if (is_debug_tag_set("diags")) { |
| diags->dump(); |
| } |
| diags->cleanup_func = mgmt_cleanup; |
| |
| // Setup the exported manager version records. |
| RecSetRecordString("proxy.node.version.manager.short", appVersionInfo.VersionStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.long", appVersionInfo.FullVersionInfoStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.build_number", appVersionInfo.BldNumStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.build_time", appVersionInfo.BldTimeStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.build_date", appVersionInfo.BldDateStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.build_machine", appVersionInfo.BldMachineStr, REC_SOURCE_DEFAULT); |
| RecSetRecordString("proxy.node.version.manager.build_person", appVersionInfo.BldPersonStr, REC_SOURCE_DEFAULT); |
| |
| if (!disable_syslog) { |
| char sys_var[] = "proxy.config.syslog_facility"; |
| char *facility_str = nullptr; |
| int facility_int; |
| |
| facility_str = REC_readString(sys_var, &found); |
| ink_assert(found); |
| |
| if (!found) { |
| mgmt_elog(0, "Could not read %s. Defaulting to LOG_DAEMON\n", sys_var); |
| facility_int = LOG_DAEMON; |
| } else { |
| facility_int = facility_string_to_int(facility_str); |
| ats_free(facility_str); |
| if (facility_int < 0) { |
| mgmt_elog(0, "Bad syslog facility specified. Defaulting to LOG_DAEMON\n"); |
| facility_int = LOG_DAEMON; |
| } |
| } |
| |
| // NOTE: do NOT call closelog() here. Solaris gets confused. |
| openlog("traffic_manager", LOG_PID | LOG_NDELAY | LOG_NOWAIT, facility_int); |
| |
| lmgmt->syslog_facility = facility_int; |
| } else { |
| lmgmt->syslog_facility = -1; |
| } |
| |
| // Find out our hostname so we can use it as part of the initialization |
| setHostnameVar(); |
| |
| // Initialize the Config Object bindings before |
| // starting any other threads |
| lmgmt->configFiles = configFiles = new FileManager(); |
| initializeRegistry(); |
| configFiles->registerCallback(fileUpdated); |
| |
| // RecLocal's 'sync_thr' depends on 'configFiles', so we can't |
| // stat the 'sync_thr' until 'configFiles' has been initialized. |
| RecLocalStart(configFiles); |
| |
| // TS needs to be started up with the same outputlog bindings each time, |
| // so we append the outputlog location to the persistent proxy options |
| // |
| // TS needs them to be able to create BaseLogFiles for each value |
| ts::bwprint(lmgmt->proxy_options, "{}{}{}", ts::bwf::OptionalAffix(tsArgs), |
| ts::bwf::OptionalAffix(bind_stdout, " "sv, "--bind_stdout "sv), |
| ts::bwf::OptionalAffix(bind_stderr, " "sv, "--bind_stderr "sv)); |
| |
| if (proxy_port) { |
| HttpProxyPort::loadValue(lmgmt->m_proxy_ports, proxy_port); |
| } |
| |
| lmgmt->initMgmtProcessServer(); /* Setup p-to-p process server */ |
| |
| lmgmt->listenForProxy(); |
| |
| // Setup the API and event sockets |
| std::string rundir(RecConfigReadRuntimeDir()); |
| std::string apisock(Layout::relative_to(rundir, MGMTAPI_MGMT_SOCKET_NAME)); |
| std::string eventsock(Layout::relative_to(rundir, MGMTAPI_EVENT_SOCKET_NAME)); |
| |
| Debug("lm", "using main socket file '%s'", apisock.c_str()); |
| Debug("lm", "using event socket file '%s'", eventsock.c_str()); |
| |
| mode_t oldmask = umask(0); |
| mode_t newmode = api_socket_is_restricted() ? 00700 : 00777; |
| |
| int mgmtapiFD = -1; // FD for the api interface to issue commands |
| int eventapiFD = -1; // FD for the api and clients to handle event callbacks |
| |
| mgmtapiFD = bind_unix_domain_socket(apisock.c_str(), newmode); |
| if (mgmtapiFD == -1) { |
| mgmt_log("[WebIntrMain] Unable to set up socket for handling management API calls. API socket path = %s\n", apisock.c_str()); |
| } |
| |
| eventapiFD = bind_unix_domain_socket(eventsock.c_str(), newmode); |
| if (eventapiFD == -1) { |
| mgmt_log("[WebIntrMain] Unable to set up so for handling management API event calls. Event Socket path: %s\n", |
| eventsock.c_str()); |
| } |
| |
| umask(oldmask); |
| ink_thread_create(nullptr, ts_ctrl_main, &mgmtapiFD, 0, 0, nullptr); |
| ink_thread_create(nullptr, event_callback_main, &eventapiFD, 0, 0, nullptr); |
| |
| mgmt_log("[TrafficManager] Setup complete\n"); |
| |
| RecRegisterStatInt(RECT_NODE, "proxy.node.config.reconfigure_time", time(nullptr), RECP_NON_PERSISTENT); |
| RecRegisterStatInt(RECT_NODE, "proxy.node.config.reconfigure_required", 0, RECP_NON_PERSISTENT); |
| |
| RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.proxy", 0, RECP_NON_PERSISTENT); |
| RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.manager", 0, RECP_NON_PERSISTENT); |
| |
| RecRegisterStatInt(RECT_NODE, "proxy.node.config.draining", 0, RECP_NON_PERSISTENT); |
| |
| int sleep_time = 0; // sleep_time given in sec |
| uint64_t last_start_epoc_s = 0; // latest start attempt in seconds since epoc |
| |
| std::random_device rd; |
| std::mt19937 gen(rd()); |
| std::uniform_real_distribution<> dis(0.0, 0.5); |
| |
| RecInt sleep_ceiling = 60; |
| RecGetRecordInt("proxy.node.config.manager_exponential_sleep_ceiling", &sleep_ceiling); |
| RecInt retry_cap = 0; // 0 means no cap. |
| RecGetRecordInt("proxy.node.config.manager_retry_cap", &retry_cap); |
| bool ignore_retry_cap{retry_cap <= 0}; |
| |
| DerivativeMetrics derived; // This is simple class to calculate some useful derived metrics |
| |
| for (;;) { |
| lmgmt->processEventQueue(); |
| lmgmt->pollMgmtProcessServer(); |
| |
| // Handle rotation of output log (aka traffic.out) as well as DIAGS_LOG_FILENAME (aka manager.log) |
| rotateLogs(); |
| |
| // Check for a SIGHUP |
| if (sigHupNotifier) { |
| mgmt_log("[main] Reading Configuration Files due to SIGHUP\n"); |
| Reconfigure(); |
| sigHupNotifier = 0; |
| mgmt_log("[main] Reading Configuration Files Reread\n"); |
| } |
| |
| // Update the derived metrics. ToDo: this runs once a second, that might be excessive, maybe it should be |
| // done more like every config_update_interval_ms (proxy.config.config_update_interval_ms) ? |
| derived.Update(); |
| |
| if (lmgmt->mgmt_shutdown_outstanding != MGMT_PENDING_NONE) { |
| Debug("lm", "pending shutdown %d", lmgmt->mgmt_shutdown_outstanding); |
| } |
| switch (lmgmt->mgmt_shutdown_outstanding) { |
| case MGMT_PENDING_RESTART: |
| lmgmt->mgmtShutdown(); |
| ::exit(0); |
| break; |
| case MGMT_PENDING_IDLE_RESTART: |
| if (!is_server_draining()) { |
| lmgmt->processDrain(); |
| } |
| if (is_server_idle() || waited_enough()) { |
| lmgmt->mgmtShutdown(); |
| ::exit(0); |
| } |
| break; |
| case MGMT_PENDING_BOUNCE: |
| lmgmt->processBounce(); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| break; |
| case MGMT_PENDING_IDLE_BOUNCE: |
| if (!is_server_draining()) { |
| lmgmt->processDrain(); |
| } |
| if (is_server_idle() || waited_enough()) { |
| lmgmt->processBounce(); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| } |
| break; |
| case MGMT_PENDING_STOP: |
| lmgmt->processShutdown(); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| break; |
| case MGMT_PENDING_IDLE_STOP: |
| if (!is_server_draining()) { |
| lmgmt->processDrain(); |
| } |
| if (is_server_idle() || waited_enough()) { |
| lmgmt->processShutdown(); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| } |
| break; |
| case MGMT_PENDING_DRAIN: |
| if (!is_server_draining()) { |
| lmgmt->processDrain(); |
| } |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| break; |
| case MGMT_PENDING_IDLE_DRAIN: |
| if (is_server_idle_from_new_connection()) { |
| lmgmt->processDrain(); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| } |
| break; |
| case MGMT_PENDING_UNDO_DRAIN: |
| if (is_server_draining()) { |
| lmgmt->processDrain(0); |
| lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| if (lmgmt->run_proxy && !lmgmt->processRunning() && lmgmt->proxy_recoverable && |
| (retry_cap > 0 || ignore_retry_cap)) { /* Make sure we still have a proxy up */ |
| const uint64_t now = static_cast<uint64_t>(time(nullptr)); |
| if (sleep_time && ((now - last_start_epoc_s) < static_cast<uint64_t>(sleep_ceiling))) { |
| const auto variance = dis(gen); |
| // We add a bit of variance to the regular sleep time. |
| const int mod_sleep_time = sleep_time + static_cast<int>(sleep_time * variance); |
| mgmt_log("Relaunching proxy after %d sec..", mod_sleep_time); |
| if (!ignore_retry_cap && sleep_time >= sleep_ceiling) { |
| --retry_cap; |
| } |
| millisleep((1000 * mod_sleep_time)); // we use millisleep instead of sleep because it doesnt interfere with signals |
| sleep_time = std::min<int>(sleep_time * 2, sleep_ceiling); |
| } else { |
| sleep_time = 1; |
| } |
| if (ProxyStateSet(TS_PROXY_ON, TS_CACHE_CLEAR_NONE) == TS_ERR_OKAY) { |
| just_started = 0; |
| last_start_epoc_s = static_cast<uint64_t>(time(nullptr)); |
| } else { |
| just_started++; |
| } |
| } else { |
| // Even if we shouldn't try to start the proxy again, leave manager around to |
| // avoid external automated restarts |
| if (!lmgmt->proxy_recoverable && !printed_unrecoverable) { |
| mgmt_log("[main] Proxy is un-recoverable. Proxy will not be relaunched.\n"); |
| printed_unrecoverable = true; |
| } |
| |
| just_started++; |
| } |
| |
| /* This will catch the case were the proxy dies before it can connect to manager */ |
| if (lmgmt->proxy_launch_outstanding && !lmgmt->processRunning() && just_started >= 120) { |
| just_started = 0; |
| lmgmt->proxy_launch_outstanding = false; |
| if (lmgmt->proxy_launch_pid != -1) { |
| int res; |
| kill(lmgmt->proxy_launch_pid, 9); |
| waitpid(lmgmt->proxy_launch_pid, &res, 0); |
| if (WIFSIGNALED(res)) { |
| int sig = WTERMSIG(res); |
| #ifdef NEED_PSIGNAL |
| mgmt_log("[main] Proxy terminated due to Sig %d. Relaunching after %d sec...\n", sig, sleep_time); |
| #else |
| mgmt_log("[main] Proxy terminated due to Sig %d: %s. Relaunching after %d sec...\n", sig, strsignal(sig), sleep_time); |
| #endif /* NEED_PSIGNAL */ |
| } |
| } |
| mgmt_log("[main] Proxy launch failed, retrying after %d sec...\n", sleep_time); |
| } |
| } |
| |
| // ToDo: Here we should delete anything related to calculated metrics. |
| |
| #ifndef MGMT_SERVICE |
| return 0; |
| #endif |
| |
| } /* End main */ |
| |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| static void |
| SignalAlrmHandler(int /* sig ATS_UNUSED */, siginfo_t *t, void * /* c ATS_UNUSED */) |
| #else |
| static void |
| SignalAlrmHandler(int /* sig ATS_UNUSED */) |
| #endif |
| { |
| /* |
| fprintf("[TrafficManager] ==> SIGALRM received\n"); |
| mgmt_elog(0, "[TrafficManager] ==> SIGALRM received\n"); |
| */ |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| if (t) { |
| if (t->si_code <= 0) { |
| fprintf(stderr, "[TrafficManager] ==> User Alarm from pid: %ld uid: %d\n", (long)t->si_pid, t->si_uid); |
| mgmt_log("[TrafficManager] ==> User Alarm from pid: %d uid: %d\n", t->si_pid, t->si_uid); |
| } else { |
| fprintf(stderr, "[TrafficManager] ==> Kernel Alarm Reason: %d\n", t->si_code); |
| mgmt_log("[TrafficManager] ==> Kernel Alarm Reason: %d\n", t->si_code); |
| } |
| } |
| #endif |
| |
| return; |
| } |
| |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| static void |
| SignalHandler(int sig, siginfo_t *t, void *c) |
| #else |
| static void |
| SignalHandler(int sig) |
| #endif |
| { |
| static int clean = 0; |
| int status; |
| |
| #if !defined(linux) && !defined(freebsd) && !defined(darwin) |
| if (t) { |
| if (t->si_code <= 0) { |
| fprintf(stderr, "[TrafficManager] ==> User Sig %d from pid: %ld uid: %d\n", sig, (long)t->si_pid, t->si_uid); |
| mgmt_log("[TrafficManager] ==> User Sig %d from pid: %ld uid: %d\n", sig, (long)t->si_pid, t->si_uid); |
| } else { |
| fprintf(stderr, "[TrafficManager] ==> Kernel Sig %d; Reason: %d\n", sig, t->si_code); |
| mgmt_log("[TrafficManager] ==> Kernel Sig %d; Reason: %d\n", sig, t->si_code); |
| } |
| } |
| #endif |
| |
| if (sig == SIGHUP) { |
| sigHupNotifier = 1; |
| return; |
| } |
| |
| if (sig == SIGUSR2) { |
| fprintf(stderr, "[TrafficManager] ==> received SIGUSR2, rotating the logs.\n"); |
| mgmt_log("[TrafficManager] ==> received SIGUSR2, rotating the logs.\n"); |
| if (lmgmt && lmgmt->watched_process_pid != -1) { |
| kill(lmgmt->watched_process_pid, sig); |
| } |
| diags->set_std_output(StdStream::STDOUT, bind_stdout); |
| diags->set_std_output(StdStream::STDERR, bind_stderr); |
| if (diags->reseat_diagslog()) { |
| Note("Reseated %s", DIAGS_LOG_FILENAME); |
| } else { |
| Note("Could not reseat %s", DIAGS_LOG_FILENAME); |
| } |
| return; |
| } |
| |
| fprintf(stderr, "[TrafficManager] ==> Cleaning up and reissuing signal #%d\n", sig); |
| mgmt_log("[TrafficManager] ==> Cleaning up and reissuing signal #%d\n", sig); |
| |
| if (lmgmt && !clean) { |
| clean = 1; |
| if (lmgmt->watched_process_pid != -1) { |
| if (sig == SIGTERM || sig == SIGINT) { |
| kill(lmgmt->watched_process_pid, sig); |
| waitpid(lmgmt->watched_process_pid, &status, 0); |
| } |
| } |
| lmgmt->mgmtCleanup(); |
| } |
| |
| switch (sig) { |
| case SIGQUIT: |
| case SIGILL: |
| case SIGTRAP: |
| #if !defined(linux) |
| case SIGEMT: |
| case SIGSYS: |
| #endif |
| case SIGFPE: |
| case SIGBUS: |
| case SIGSEGV: |
| case SIGXCPU: |
| case SIGXFSZ: |
| abort(); |
| default: |
| fprintf(stderr, "[TrafficManager] ==> signal #%d\n", sig); |
| mgmt_log("[TrafficManager] ==> signal #%d\n", sig); |
| ::exit(sig); |
| } |
| fprintf(stderr, "[TrafficManager] ==> signal2 #%d\n", sig); |
| mgmt_log("[TrafficManager] ==> signal2 #%d\n", sig); |
| ::exit(sig); |
| } /* End SignalHandler */ |
| |
| // void SigChldHandler(int sig) |
| // |
| // An empty handler needed so that we catch SIGCHLD |
| // With Solaris 2.6, ignoring sig child changes the behavior |
| // of waitpid() so that if there are no unwaited children, |
| // waitpid() blocks until all child are transformed into |
| // zombies which is bad for us |
| // |
| static void |
| SigChldHandler(int /* sig ATS_UNUSED */) |
| { |
| } |
| |
| void |
| fileUpdated(char *fname, char *configName) |
| { |
| // If there is no config name recorded, assume this file is not reloadable |
| // Just log a message |
| if (configName == nullptr || configName[0] == '\0') { |
| mgmt_log("[fileUpdated] %s changed, need restart", fname); |
| } else { |
| // Signal based on the config entry that has the changed file name |
| lmgmt->signalFileChange(configName); |
| } |
| return; |
| } /* End fileUpdate */ |
| |
| #if TS_USE_POSIX_CAP |
| /** Restore capabilities after user id change. |
| This manipulates LINUX capabilities so that this process |
| can perform certain privileged operations even if it is |
| no longer running as a privilege user. |
| |
| @internal |
| I tried using |
| @code |
| prctl(PR_SET_KEEPCAPS, 1); |
| @endcode |
| but that had no effect even though the call reported success. |
| Only explicit capability manipulation was effective. |
| |
| It does not appear to be necessary to set the capabilities on the |
| executable if originally run as root. That may be needed if |
| started as a user without that capability. |
| */ |
| |
| int |
| restoreCapabilities() |
| { |
| int zret = 0; // return value. |
| cap_t cap_set = cap_get_proc(); // current capabilities |
| // Make a list of the capabilities we want turned on. |
| cap_value_t cap_list[] = { |
| CAP_NET_ADMIN, ///< Set socket transparency. |
| CAP_NET_BIND_SERVICE, ///< Low port (e.g. 80) binding. |
| CAP_IPC_LOCK ///< Lock IPC objects. |
| }; |
| static int const CAP_COUNT = sizeof(cap_list) / sizeof(*cap_list); |
| |
| for (int i = 0; i < CAP_COUNT; i++) { |
| if (cap_set_flag(cap_set, CAP_EFFECTIVE, 1, cap_list + i, CAP_SET) < 0) { |
| Warning("restore CAP_EFFECTIVE failed for option %d", i); |
| } |
| if (cap_set_proc(cap_set) == -1) { // it failed, back out |
| cap_set_flag(cap_set, CAP_EFFECTIVE, 1, cap_list + i, CAP_CLEAR); |
| } |
| } |
| for (int i : cap_list) { |
| cap_flag_value_t val; |
| if (cap_get_flag(cap_set, i, CAP_EFFECTIVE, &val) < 0) { |
| } else { |
| Warning("CAP_EFFECTIVE offiset %d is %s", i, val == CAP_SET ? "set" : "unset"); |
| } |
| } |
| zret = cap_set_proc(cap_set); |
| cap_free(cap_set); |
| return zret; |
| } |
| #endif |
| |
| // void runAsUser(...) |
| // |
| // If we are root, switched to user to run as |
| // specified in records.config |
| // |
| // If we are not root, do nothing |
| // |
| void |
| runAsUser(const char *userName) |
| { |
| if (getuid() == 0 || geteuid() == 0) { |
| ImpersonateUser(userName, IMPERSONATE_EFFECTIVE); |
| |
| #if TS_USE_POSIX_CAP |
| if (0 != restoreCapabilities()) { |
| mgmt_log("[runAsUser] Error: Failed to restore capabilities after switch to user %s.\n", userName); |
| } |
| #endif |
| } |
| } /* End runAsUser() */ |