blob: cf261d13f83ed13edeaa5114b06dd3985bd050c5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "slave/containerizer/mesos/provisioner/backends/overlay.hpp"
#include <process/dispatch.hpp>
#include <process/id.hpp>
#include <process/process.hpp>
#include <stout/adaptor.hpp>
#include <stout/foreach.hpp>
#include <stout/fs.hpp>
#include <stout/os.hpp>
#include <stout/os/realpath.hpp>
#include "linux/fs.hpp"
#include "slave/paths.hpp"
#include "slave/containerizer/mesos/provisioner/constants.hpp"
using process::Failure;
using process::Future;
using process::Owned;
using process::Process;
using process::Shared;
using process::dispatch;
using process::spawn;
using process::wait;
using std::string;
using std::vector;
namespace mesos {
namespace internal {
namespace slave {
class OverlayBackendProcess : public Process<OverlayBackendProcess>
{
public:
OverlayBackendProcess()
: ProcessBase(process::ID::generate("overlay-provisioner-backend")) {}
Future<Option<vector<Path>>> provision(
const vector<string>& layers,
const string& rootfs,
const string& backendDir);
Future<bool> destroy(
const string& rootfs,
const string& backendDir);
};
Try<std::list<std::string>> OverlayBackend::listEphemeralVolumes(
const string& workDir)
{
return os::glob(path::join(
paths::getProvisionerDir(workDir),
"containers",
"*", /* ContainerID */
"backends",
OVERLAY_BACKEND, /* backendDir */
"scratch"
"*", /* rootfs ID */
"*"));
}
Try<Owned<Backend>> OverlayBackend::create(const Flags&)
{
if (geteuid() != 0) {
return Error("OverlayBackend requires root privileges");
}
return Owned<Backend>(new OverlayBackend(
Owned<OverlayBackendProcess>(new OverlayBackendProcess())));
}
OverlayBackend::~OverlayBackend()
{
terminate(process.get());
wait(process.get());
}
OverlayBackend::OverlayBackend(Owned<OverlayBackendProcess> _process)
: process(_process)
{
spawn(CHECK_NOTNULL(process.get()));
}
Future<Option<vector<Path>>> OverlayBackend::provision(
const vector<string>& layers,
const string& rootfs,
const string& backendDir)
{
return dispatch(
process.get(),
&OverlayBackendProcess::provision,
layers,
rootfs,
backendDir);
}
Future<bool> OverlayBackend::destroy(
const string& rootfs,
const string& backendDir)
{
return dispatch(
process.get(),
&OverlayBackendProcess::destroy,
rootfs,
backendDir);
}
Future<Option<vector<Path>>> OverlayBackendProcess::provision(
const vector<string>& layers,
const string& rootfs,
const string& backendDir)
{
if (layers.size() == 0) {
return Failure("No filesystem layer provided");
}
Try<Nothing> mkdir = os::mkdir(rootfs);
if (mkdir.isError()) {
return Failure(
"Failed to create container rootfs at '" +
rootfs + "': " + mkdir.error());
}
const string rootfsId = Path(rootfs).basename();
const string scratchDir = path::join(backendDir, "scratch", rootfsId);
const string upperdir = path::join(scratchDir, "upperdir");
const string workdir = path::join(scratchDir, "workdir");
mkdir = os::mkdir(upperdir);
if (mkdir.isError()) {
return Failure(
"Failed to create overlay upperdir at '" +
upperdir + "': " + mkdir.error());
}
mkdir = os::mkdir(workdir);
if (mkdir.isError()) {
return Failure(
"Failed to create overlay workdir at '" +
workdir + "': " + mkdir.error());
}
// We create symlink with shorter path to each of the base layers.
Try<string> mktemp = os::mkdtemp();
if (mktemp.isError()) {
return Failure(
"Failed to create temporary directory for symlinks to layers: " +
mktemp.error());
}
const string tempDir = mktemp.get();
const string tempLink = path::join(scratchDir, "links");
Try<Nothing> symlink = ::fs::symlink(tempDir, tempLink);
if (symlink.isError()) {
return Failure(
"Failed to create symlink '" + tempLink +
"' -> '" + tempDir + "': " + symlink.error());
}
VLOG(1) << "Created symlink '" << tempLink << "' -> '" << tempDir << "'";
vector<string> links;
links.reserve(layers.size());
// We create symlinks with file name 0, 1, ..., N-1 in tempDir which
// points to the corresponding layers in the same order.
size_t idx = 0;
foreach (const string& layer, layers) {
const string link = path::join(tempDir, std::to_string(idx++));
Try<Nothing> symlink = ::fs::symlink(layer, link);
if (symlink.isError()) {
return Failure(
"Failed to create symlink at '" + link +
"' -> '" + layer + "': " + symlink.error());
}
links.push_back(link);
}
// For overlayfs, the specified lower directories will be stacked
// beginning from the rightmost one and going left. But we need the
// first layer in the vector to be the bottom most layer.
string options = "lowerdir=" + strings::join(":", adaptor::reverse(links));
options += ",upperdir=" + upperdir;
options += ",workdir=" + workdir;
VLOG(1) << "Provisioning image rootfs with overlayfs: '" << options << "'";
Try<Nothing> mount = fs::mount(
"overlay",
rootfs,
"overlay",
0,
options);
if (mount.isError()) {
return Failure(
"Failed to mount rootfs '" + rootfs +
"' with overlayfs: " + mount.error());
}
// Mark the mount as shared+slave.
mount = fs::mount(
None(),
rootfs,
None(),
MS_SLAVE,
nullptr);
if (mount.isError()) {
return Failure(
"Failed to mark mount '" + rootfs +
"' as a slave mount: " + mount.error());
}
mount = fs::mount(
None(),
rootfs,
None(),
MS_SHARED,
nullptr);
if (mount.isError()) {
return Failure(
"Failed to mark mount '" + rootfs +
"' as a shared mount: " + mount.error());
}
// Note that both upperdir and workdir are ephemeral. The `disk/xfs`
// isolator needs this because XFS will error with EXDEV when renaming
// a file into a tree with a different project ID (see xfs_rename).
return vector<Path>{Path(upperdir), Path(workdir)};
}
Future<bool> OverlayBackendProcess::destroy(
const string& rootfs,
const string& backendDir)
{
Try<fs::MountInfoTable> mountTable = fs::MountInfoTable::read();
if (mountTable.isError()) {
return Failure("Failed to read mount table: " + mountTable.error());
}
foreach (const fs::MountInfoTable::Entry& entry, mountTable->entries) {
if (entry.target == rootfs) {
// NOTE: Use MNT_DETACH here so that if there are still
// processes holding files or directories in the rootfs, the
// unmount will still be successful. The kernel will cleanup the
// mount when the number of references reach zero.
Try<Nothing> unmount = fs::unmount(entry.target, MNT_DETACH);
if (unmount.isError()) {
return Failure(
"Failed to destroy overlay-mounted rootfs '" + rootfs + "': " +
unmount.error());
}
Try<Nothing> rmdir = os::rmdir(rootfs);
if (rmdir.isError()) {
// NOTE: Due to the use of MNT_DETACH above, it's possible
// that `rmdir` will fail with EBUSY if some other mounts in
// other mount namespaces are still on this mount point on
// some old kernel (https://lwn.net/Articles/570338/). No need
// to return a hard failure here because the directory will be
// removed later and re-attempted on agent recovery.
//
// TODO(jieyu): Consider only ignore EBUSY error.
LOG(ERROR) << "Failed to remove rootfs mount point "
<< "'" << rootfs << "': " << rmdir.error();
}
// Clean up tempDir used for image layer links.
const string tempLink = path::join(
backendDir, "scratch", Path(rootfs).basename(), "links");
if (!os::exists(tempLink)) {
// TODO(zhitao): This should be converted into a failure after
// deprecation cycle started by 1.1.0.
VLOG(1) << "Cannot find symlink to temporary directory '" << tempLink
<< "' for image links";
return true;
}
if (!os::stat::islink(tempLink)) {
return Failure("Invalid symlink '" + tempLink + "'");
}
Result<string> realpath = os::realpath(tempLink);
// NOTE: It's possible that the symlink is a dangling symlink.
// This is possible if agent crashes after we remove the temp
// directory but before we remove the symlink itself.
if (realpath.isSome()) {
Try<Nothing> rmdir = os::rmdir(realpath.get());
if (rmdir.isError()) {
return Failure(
"Failed to remove temporary directory for symlinks at "
"'" + realpath.get() + "': " + rmdir.error());
}
VLOG(1) << "Removed temporary directory '" << realpath.get()
<< "' pointed by '" << tempLink << "'";
}
Try<Nothing> rm = os::rm(tempLink);
if (rm.isError()) {
return Failure("Failed to remove symlink at '" + tempLink +
"': " + rm.error());
}
return true;
}
}
return false;
}
} // namespace slave {
} // namespace internal {
} // namespace mesos {