blob: bbc9c190b9dd19862197be0b4f2edb37e0bbc15a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "slave/containerizer/mesos/isolators/linux/devices.hpp"
#include <sys/mount.h>
#include <process/id.hpp>
#include <stout/os.hpp>
#include <stout/path.hpp>
#include <stout/strings.hpp>
#include <stout/os/posix/chown.hpp>
#include "common/protobuf_utils.hpp"
#include "slave/containerizer/mesos/paths.hpp"
using process::Failure;
using process::Future;
using process::Owned;
using mesos::slave::ContainerConfig;
using mesos::slave::ContainerLaunchInfo;
using mesos::slave::ContainerMountInfo;
using mesos::slave::Isolator;
using std::string;
namespace mesos {
namespace internal {
namespace slave {
Try<Isolator*> LinuxDevicesIsolatorProcess::create(const Flags& flags)
{
if (::geteuid() != 0) {
return Error("Linux devices isolator requires root permissions");
}
if (flags.launcher != "linux") {
return Error("'linux' launcher must be used");
}
if (!strings::contains(flags.isolation, "filesystem/linux")) {
return Error("'filesystem/linux' isolator must be used");
}
hashmap<string, Device> whitelistedDevices;
if (flags.allowed_devices.isSome()) {
foreach (const DeviceAccess& deviceAccess,
flags.allowed_devices->allowed_devices()) {
if (!deviceAccess.device().has_path()) {
return Error("Whitelisted device has no device path provided");
}
const string& path = deviceAccess.device().path();
Try<dev_t> rdev = os::stat::rdev(path);
if (rdev.isError()) {
return Error("Failed to obtain device ID for '" + path +
"': " + rdev.error());
}
Try<mode_t> mode = os::stat::mode(path);
if (mode.isError()) {
return Error("Failed to obtain device mode for '" + path +
"': " + mode.error());
}
Device dev = {rdev.get(), S_IRUSR | S_IWUSR };
if (S_ISBLK(mode.get())) {
dev.mode |= S_IFBLK;
} else if (S_ISCHR(mode.get())) {
dev.mode |= S_IFCHR;
} else {
return Error("'" + path + "' is not a block or character device");
}
// Set the desired access for the device. Access is controlled at
// container granularity, which is consistent with the devices cgroup
// policy. This means that if we populate a read-write device into a
// container, then every process in that container should have access,
// regardless of the credential of that process.
if (deviceAccess.access().read()) {
dev.mode |= (S_IRGRP | S_IROTH);
}
if (deviceAccess.access().write()) {
dev.mode |= (S_IWGRP | S_IWOTH);
}
whitelistedDevices.put(
strings::remove(path, "/dev/", strings::PREFIX), dev);
}
}
return new MesosIsolator(Owned<MesosIsolatorProcess>(
new LinuxDevicesIsolatorProcess(flags.runtime_dir, whitelistedDevices)));
}
LinuxDevicesIsolatorProcess::LinuxDevicesIsolatorProcess(
const string& _runtimeDirectory,
const hashmap<string, Device>& _whitelistedDevices)
: ProcessBase(process::ID::generate("linux-devices-isolator")),
runtimeDirectory(_runtimeDirectory),
whitelistedDevices(_whitelistedDevices) {}
bool LinuxDevicesIsolatorProcess::supportsNesting()
{
return true;
}
bool LinuxDevicesIsolatorProcess::supportsStandalone()
{
return true;
}
Future<Option<ContainerLaunchInfo>> LinuxDevicesIsolatorProcess::prepare(
const ContainerID& containerId,
const ContainerConfig& containerConfig)
{
// If there's no rootfs, we won't be building a private `/dev`
// so there's nothing to do.
if (!containerConfig.has_rootfs()) {
return None();
}
if (whitelistedDevices.empty()) {
return None();
}
ContainerLaunchInfo launchInfo;
const string devicesDir = containerizer::paths::getContainerDevicesPath(
runtimeDirectory, containerId);
// The `filesystem/linux` isolator is responsible for creating the
// devices directory and ordered to run before we do. Here, we can
// just assert that the devices directory is still present.
if (!os::exists(devicesDir)) {
return Failure("Missing container devices directory '" + devicesDir + "'");
}
// Import the whitelisted devices to all containers.
foreachpair (const string& path, const Device& dev, whitelistedDevices) {
const string devicePath = path::join(devicesDir, path);
Try<Nothing> mkdir = os::mkdir(Path(devicePath).dirname());
if (mkdir.isError()) {
return Failure(
"Failed to create parent directory for device '" +
devicePath + "': " + mkdir.error());
}
Try<Nothing> mknod = os::mknod(devicePath, dev.mode, dev.dev);
if (mknod.isError()) {
return Failure(
"Failed to create device '" + devicePath + "': " + mknod.error());
}
// We have to chmod the device to make sure that the umask doesn't filter
// the permissions defined by the whitelist.
Try<Nothing> chmod = os::chmod(devicePath, dev.mode & ~S_IFMT);
if (chmod.isError()) {
return Failure(
"Failed to chmod device '" + devicePath + "': " + chmod.error());
}
*launchInfo.add_mounts() = protobuf::slave::createContainerMount(
devicePath,
path::join(containerConfig.rootfs(), "dev", path),
MS_BIND);
}
// TODO(jpeach) Define Task API to let schedulers specify the container
// devices and automatically populate the right devices cgroup entries.
return launchInfo;
}
} // namespace slave {
} // namespace internal {
} // namespace mesos {