blob: 6318f48fc3c56d2414a8a50fd2d9debe74fddeb9 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "linux/systemd.hpp"
#include <string>
#include <vector>
#include <process/once.hpp>
#include <stout/os.hpp>
#include <stout/strings.hpp>
#include <stout/try.hpp>
#include "linux/cgroups.hpp"
using process::Once;
using std::string;
using std::vector;
namespace systemd {
int DELEGATE_MINIMUM_VERSION = 218;
Flags::Flags()
{
add(&Flags::enabled,
"enabled",
"Top level control of systemd support. When enabled, features such as\n"
"processes life-time extension are enabled unless there is an explicit\n"
"flag to disable these (see other flags).",
true);
add(&Flags::runtime_directory,
"runtime_directory",
"The path to the systemd system run time directory\n",
"/run/systemd/system");
add(&Flags::cgroups_hierarchy,
"cgroups_hierarchy",
"The path to the cgroups hierarchy root\n",
"/sys/fs/cgroup");
}
static Flags* systemd_flags = nullptr;
const Flags& flags()
{
return *CHECK_NOTNULL(systemd_flags);
}
namespace mesos {
// NOTE: Returning an Error implies the child process will be killed.
Try<Nothing> extendLifetime(pid_t child)
{
if (!systemd::exists()) {
return Error("Failed to contain process on systemd: "
"systemd does not exist on this system");
}
if (!systemd::enabled()) {
return Error("Failed to contain process on systemd: "
"systemd is not configured as enabled on this system");
}
Try<Nothing> assign = cgroups::assign(
hierarchy(),
systemd::mesos::MESOS_EXECUTORS_SLICE,
child);
if (assign.isError()) {
return Error("Failed to contain process on systemd: "
"Failed to assign process to its systemd executor slice: " +
assign.error());
}
LOG(INFO) << "Assigned child process '" << child << "' to '"
<< systemd::mesos::MESOS_EXECUTORS_SLICE << "'";
return Nothing();
}
} // namespace mesos {
Try<Nothing> initialize(const Flags& flags)
{
static Once* initialized = new Once();
if (initialized->once()) {
return Nothing();
}
if (!systemd::exists()) {
return Error("systemd does not exist on this system");
}
systemd_flags = new Flags(flags);
// Do not initialize any state if we do not have systemd support enabled.
if (!systemd_flags->enabled) {
initialized->done();
return Nothing();
}
// If flags->runtime_directory doesn't exist, then we can't proceed.
if (!os::exists(CHECK_NOTNULL(systemd_flags)->runtime_directory)) {
return Error("Failed to locate systemd runtime directory: " +
CHECK_NOTNULL(systemd_flags)->runtime_directory);
}
// On systemd environments we currently migrate executor pids and processes
// that need to live alongside the executor into a separate executor slice.
// This allows the life-time of the process to be extended past the life-time
// of the slave. See MESOS-3352.
// This function takes responsibility for creating and starting this slice.
// We inject a `Subprocess::ParentHook` into the `subprocess` function that
// migrates pids into this slice if the `EXTEND_LIFETIME` option is set on the
// `subprocess` call.
// Ensure that the `MESOS_EXECUTORS_SLICE` exists and is running.
// TODO(jmlvanre): Prevent racing between multiple agents for this creation
// logic.
// Check whether the `MESOS_EXECUTORS_SLICE` already exists. Create it if
// it does not exist.
// We explicitly don't modify the file if it exists in case operators want
// to over-ride the settings for the slice that we provide when we create
// the `Unit` below.
const Path path(path::join(
systemd::runtimeDirectory(),
mesos::MESOS_EXECUTORS_SLICE));
if (!systemd::slices::exists(path)) {
// A simple systemd file to allow us to start a new slice.
string unit = "[Unit]\nDescription=Mesos Executors Slice\n";
Try<Nothing> create = systemd::slices::create(path, unit);
if (create.isError()) {
return Error("Failed to create systemd slice '" +
stringify(mesos::MESOS_EXECUTORS_SLICE) +
"': " + create.error());
}
}
// Regardless of whether we created the file or it existed already, we
// `start` the executor slice. It is safe (a no-op) to `start` an already
// running slice.
Try<Nothing> start = systemd::slices::start(mesos::MESOS_EXECUTORS_SLICE);
if (start.isError()) {
return Error("Failed to start '" +
stringify(mesos::MESOS_EXECUTORS_SLICE) +
"': " + start.error());
}
// Now the `MESOS_EXECUTORS_SLICE` is ready for us to assign any pids. We can
// verify that our cgroups assignments will work by testing the hierarchy.
Try<bool> exists = cgroups::exists(
systemd::hierarchy(),
mesos::MESOS_EXECUTORS_SLICE);
if (exists.isError() || !exists.get()) {
return Error("Failed to locate systemd cgroups hierarchy: " +
(exists.isError() ? exists.error() : "does not exist"));
}
initialized->done();
return Nothing();
}
bool exists()
{
// This is static as the init system should not change while we are running.
static const bool exists = []() -> bool {
// (1) Test whether `/sbin/init` links to systemd.
const Result<string> realpath = os::realpath("/sbin/init");
if (realpath.isError() || realpath.isNone()) {
LOG(WARNING) << "Failed to test /sbin/init for systemd environment: "
<< (realpath.isError() ? realpath.error()
: "does not exist");
return false;
}
CHECK_SOME(realpath);
// (2) Testing whether we have a systemd version.
const string command = realpath.get() + " --version";
Try<string> versionCommand = os::shell(command);
if (versionCommand.isError()) {
LOG(WARNING) << "Failed to test command '" << command << "': "
<< versionCommand.error();
return false;
}
vector<string> tokens = strings::tokenize(versionCommand.get(), " \n");
// We need at least a name and a version number to match systemd.
if (tokens.size() < 2) {
return false;
}
if (tokens[0] != "systemd") {
return false;
}
Try<int> version = numify<int>(tokens[1]);
if (version.isError()) {
LOG(WARNING) << "Failed to parse systemd version '" << tokens[1] << "'";
return false;
}
LOG(INFO) << "systemd version `" << version.get() << "` detected";
// We log a warning if the version is below 218. This is because the
// `Delegate` flag was introduced in version 218. Some systems, like RHEL 7,
// have patched versions that are below 218 but still have the `Delegate`
// flag. This is why we warn / inform users rather than failing. See
// MESOS-3352.
if (version.get() < DELEGATE_MINIMUM_VERSION) {
LOG(WARNING)
<< "Required functionality `Delegate` was introduced in Version `"
<< DELEGATE_MINIMUM_VERSION << "`. Your system may not function"
<< " properly; however since some distributions have patched systemd"
<< " packages, your system may still be functional. This is why we keep"
<< " running. See MESOS-3352 for more information";
}
return true;
}();
return exists;
}
bool enabled()
{
return systemd_flags != nullptr && flags().enabled && exists();
}
Path runtimeDirectory()
{
return Path(flags().runtime_directory);
}
Path hierarchy()
{
return Path(path::join(flags().cgroups_hierarchy, "systemd"));
}
Try<Nothing> daemonReload()
{
Try<string> daemonReload = os::shell("systemctl daemon-reload");
if (daemonReload.isError()) {
return Error("Failed to reload systemd daemon: " + daemonReload.error());
}
return Nothing();
}
namespace slices {
bool exists(const Path& path)
{
return os::exists(path);
}
Try<Nothing> create(const Path& path, const string& data)
{
Try<Nothing> write = os::write(path, data);
if (write.isError()) {
return Error("Failed to write systemd slice `" + path.string() + "`: " +
write.error());
}
LOG(INFO) << "Created systemd slice: `" << path << "`";
Try<Nothing> reload = daemonReload();
if (reload.isError()) {
return Error("Failed to create systemd slice `" + path.string() + "`: " +
reload.error());
}
return Nothing();
}
Try<Nothing> start(const string& name)
{
Try<string> start = os::shell("systemctl start " + name);
if (start.isError()) {
return Error(
"Failed to start systemd slice `" + name + "`: " + start.error());
}
LOG(INFO) << "Started systemd slice `" << name << "`";
return Nothing();
}
} // namespace slices {
} // namespace systemd {