src/slave/containerizer/mesos/isolators/linux/capabilities.cpp - mesos - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <unistd.h>

 #include <sys/types.h>

 #include <algorithm>
 #include <set>

 #include <stout/set.hpp>

 #include "linux/capabilities.hpp"

 #include "slave/containerizer/mesos/isolators/linux/capabilities.hpp"

 using std::set;

 using process::Failure;
 using process::Future;
 using process::Owned;

 using mesos::internal::capabilities::Capabilities;
 using mesos::internal::capabilities::Capability;
 using mesos::internal::capabilities::convert;

 using mesos::slave::ContainerConfig;
 using mesos::slave::ContainerLaunchInfo;
 using mesos::slave::Isolator;

 namespace mesos {
 namespace internal {
 namespace slave {

 Try<Isolator*> LinuxCapabilitiesIsolatorProcess::create(const Flags& flags)
 {
   if (geteuid() != 0) {
     return Error("Linux capabilities isolator requires root permissions");
   }

   Try<Capabilities> create = Capabilities::create();
   if (create.isError()) {
     return Error("Failed to initialize capabilities: " + create.error());
   }

   if (flags.effective_capabilities.isSome() &&
       flags.bounding_capabilities.isSome()) {
     const set<Capability> bounding = convert(
         flags.bounding_capabilities.get());
     const set<Capability> effective = convert(
         flags.effective_capabilities.get());

     if ((effective & bounding).size() != effective.size()) {
       return Error(
           "Allowed capabilities are not a subset of the bounding capabilites");
     }
   }

   return new MesosIsolator(
       Owned<MesosIsolatorProcess>(new LinuxCapabilitiesIsolatorProcess(flags)));
 }


 bool LinuxCapabilitiesIsolatorProcess::supportsNesting()
 {
   return true;
 }


 bool LinuxCapabilitiesIsolatorProcess::supportsStandalone()
 {
   return true;
 }


 Future<Option<ContainerLaunchInfo>> LinuxCapabilitiesIsolatorProcess::prepare(
     const ContainerID& containerId,
     const ContainerConfig& containerConfig)
 {
   Option<CapabilityInfo> effective = None();
   Option<CapabilityInfo> bounding = None();

   // If effective capabilities are specified, those become
   // both the effective and bounding sets, because we guarantee
   // that the effective set is at least as restrictive as the
   // bounding set.
   if (containerConfig.has_container_info() &&
       containerConfig.container_info().has_linux_info()) {
     const auto& linuxInfo = containerConfig.container_info().linux_info();

     if (linuxInfo.has_capability_info() &&
         linuxInfo.has_effective_capabilities()) {
       return Failure(
           "Only one of 'capability_info' or 'effective_capabilities' "
           "is allowed");
     }

     if (linuxInfo.has_capability_info()) {
       effective = linuxInfo.capability_info();
     }

     if (linuxInfo.has_effective_capabilities()) {
       effective = linuxInfo.effective_capabilities();
     }

     if (linuxInfo.has_bounding_capabilities()) {
       bounding = linuxInfo.bounding_capabilities();
     }
   }

   // If the framework didn't specify, use the operator effective set.
   if (effective.isNone()) {
     effective = flags.effective_capabilities;
   }

   // If the framework specified a bounding set, test it against
   // flags.bounding_capabilities since that defines the limits of
   // what the operator is willing to allow.
   if (bounding.isSome() && flags.bounding_capabilities.isSome()) {
     const set<Capability> requested = convert(bounding.get());
     const set<Capability> allowed = convert(flags.bounding_capabilities.get());

     if ((requested & allowed).size() != requested.size()) {
       return Failure(
           "Bounding capabilities '" + stringify(requested) + "', "
           "but only '" + stringify(allowed) + "' are allowed");
     }
   }

   // If the framework didn't specify, use the operator bounding set and fall
   // back to the effective set if necessary.
   if (bounding.isNone()) {
     bounding = flags.bounding_capabilities;
   }

   if (effective.isSome() && bounding.isNone()) {
     bounding = effective;
   }

   // Require the effective task capabilities to be within the bounding set.
   if (effective.isSome()) {
     CHECK_SOME(bounding);

     const set<Capability> requested = convert(effective.get());
     const set<Capability> allowed = convert(bounding.get());

     if ((requested & allowed).size() != requested.size()) {
       return Failure(
           "Requested capabilities '" + stringify(requested) + "', "
           "but only '" + stringify(allowed) + "' are allowed");
     }
   }

   // If no capabilities need to be set for the container, we do
   // not need to modify the container launch info.
   if (effective.isNone() && bounding.isNone()) {
     return None();
   }

   // The bounding set is always present, but the effective set
   // may be absent.
   CHECK_SOME(bounding);

   ContainerLaunchInfo launchInfo;

   if (containerConfig.has_task_info()) {
     // Command task case.
     // 1) If the command task specifies a root filesystem, we need the
     //    command executor to be run with full privileges to perform
     //    operations like 'pivot_root'. Therefore, we set the command
     //    executor flags and the command executor will set the
     //    capabilities when it launches the task.
     // 2) If the command task does not specify a root filesystem, we
     //    will set the capabilities before we execute the command
     //    executor. Command executor itself does not require any
     //    capabilities to execute the user task in that case.
     //
     // TODO(jieyu): The caveat in case 2) is that the command executor
     // should be executable after the process drops capabilities.  For
     // containers that want to run under root, if the requested
     // capability for the container does not have CAP_DAC_READ_SEARCH,
     // the exec will fail with EACCESS, which is not the typical
     // behavior a user would expect from a root user.
     if (containerConfig.has_rootfs()) {
       launchInfo.mutable_command()->add_arguments(
           "--bounding_capabilities=" +
           stringify(JSON::protobuf(bounding.get())));
       if (effective.isSome()) {
         launchInfo.mutable_command()->add_arguments(
             "--effective_capabilities=" +
             stringify(JSON::protobuf(effective.get())));
       }
     } else {
       launchInfo.mutable_bounding_capabilities()->CopyFrom(bounding.get());
       if (effective.isSome()) {
         launchInfo.mutable_effective_capabilities()->CopyFrom(effective.get());
       }
     }
   } else {
     // Custom executor or nested container.
     launchInfo.mutable_bounding_capabilities()->CopyFrom(bounding.get());
     if (effective.isSome()) {
       launchInfo.mutable_effective_capabilities()->CopyFrom(effective.get());
     }
   }

   return launchInfo;
 }

 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include <unistd.h>

	#include <sys/types.h>

	#include <algorithm>
	#include <set>

	#include <stout/set.hpp>

	#include "linux/capabilities.hpp"

	#include "slave/containerizer/mesos/isolators/linux/capabilities.hpp"

	using std::set;

	using process::Failure;
	using process::Future;
	using process::Owned;

	using mesos::internal::capabilities::Capabilities;
	using mesos::internal::capabilities::Capability;
	using mesos::internal::capabilities::convert;

	using mesos::slave::ContainerConfig;
	using mesos::slave::ContainerLaunchInfo;
	using mesos::slave::Isolator;

	namespace mesos {
	namespace internal {
	namespace slave {

	Try<Isolator*> LinuxCapabilitiesIsolatorProcess::create(const Flags& flags)
	{
	if (geteuid() != 0) {
	return Error("Linux capabilities isolator requires root permissions");
	}

	Try<Capabilities> create = Capabilities::create();
	if (create.isError()) {
	return Error("Failed to initialize capabilities: " + create.error());
	}

	if (flags.effective_capabilities.isSome() &&
	flags.bounding_capabilities.isSome()) {
	const set<Capability> bounding = convert(
	flags.bounding_capabilities.get());
	const set<Capability> effective = convert(
	flags.effective_capabilities.get());

	if ((effective & bounding).size() != effective.size()) {
	return Error(
	"Allowed capabilities are not a subset of the bounding capabilites");
	}
	}

	return new MesosIsolator(
	Owned<MesosIsolatorProcess>(new LinuxCapabilitiesIsolatorProcess(flags)));
	}


	bool LinuxCapabilitiesIsolatorProcess::supportsNesting()
	{
	return true;
	}


	bool LinuxCapabilitiesIsolatorProcess::supportsStandalone()
	{
	return true;
	}


	Future<Option<ContainerLaunchInfo>> LinuxCapabilitiesIsolatorProcess::prepare(
	const ContainerID& containerId,
	const ContainerConfig& containerConfig)
	{
	Option<CapabilityInfo> effective = None();
	Option<CapabilityInfo> bounding = None();

	// If effective capabilities are specified, those become
	// both the effective and bounding sets, because we guarantee
	// that the effective set is at least as restrictive as the
	// bounding set.
	if (containerConfig.has_container_info() &&
	containerConfig.container_info().has_linux_info()) {
	const auto& linuxInfo = containerConfig.container_info().linux_info();

	if (linuxInfo.has_capability_info() &&
	linuxInfo.has_effective_capabilities()) {
	return Failure(
	"Only one of 'capability_info' or 'effective_capabilities' "
	"is allowed");
	}

	if (linuxInfo.has_capability_info()) {
	effective = linuxInfo.capability_info();
	}

	if (linuxInfo.has_effective_capabilities()) {
	effective = linuxInfo.effective_capabilities();
	}

	if (linuxInfo.has_bounding_capabilities()) {
	bounding = linuxInfo.bounding_capabilities();
	}
	}

	// If the framework didn't specify, use the operator effective set.
	if (effective.isNone()) {
	effective = flags.effective_capabilities;
	}

	// If the framework specified a bounding set, test it against
	// flags.bounding_capabilities since that defines the limits of
	// what the operator is willing to allow.
	if (bounding.isSome() && flags.bounding_capabilities.isSome()) {
	const set<Capability> requested = convert(bounding.get());
	const set<Capability> allowed = convert(flags.bounding_capabilities.get());

	if ((requested & allowed).size() != requested.size()) {
	return Failure(
	"Bounding capabilities '" + stringify(requested) + "', "
	"but only '" + stringify(allowed) + "' are allowed");
	}
	}

	// If the framework didn't specify, use the operator bounding set and fall
	// back to the effective set if necessary.
	if (bounding.isNone()) {
	bounding = flags.bounding_capabilities;
	}

	if (effective.isSome() && bounding.isNone()) {
	bounding = effective;
	}

	// Require the effective task capabilities to be within the bounding set.
	if (effective.isSome()) {
	CHECK_SOME(bounding);

	const set<Capability> requested = convert(effective.get());
	const set<Capability> allowed = convert(bounding.get());

	if ((requested & allowed).size() != requested.size()) {
	return Failure(
	"Requested capabilities '" + stringify(requested) + "', "
	"but only '" + stringify(allowed) + "' are allowed");
	}
	}

	// If no capabilities need to be set for the container, we do
	// not need to modify the container launch info.
	if (effective.isNone() && bounding.isNone()) {
	return None();
	}

	// The bounding set is always present, but the effective set
	// may be absent.
	CHECK_SOME(bounding);

	ContainerLaunchInfo launchInfo;

	if (containerConfig.has_task_info()) {
	// Command task case.
	// 1) If the command task specifies a root filesystem, we need the
	// command executor to be run with full privileges to perform
	// operations like 'pivot_root'. Therefore, we set the command
	// executor flags and the command executor will set the
	// capabilities when it launches the task.
	// 2) If the command task does not specify a root filesystem, we
	// will set the capabilities before we execute the command
	// executor. Command executor itself does not require any
	// capabilities to execute the user task in that case.
	//
	// TODO(jieyu): The caveat in case 2) is that the command executor
	// should be executable after the process drops capabilities. For
	// containers that want to run under root, if the requested
	// capability for the container does not have CAP_DAC_READ_SEARCH,
	// the exec will fail with EACCESS, which is not the typical
	// behavior a user would expect from a root user.
	if (containerConfig.has_rootfs()) {
	launchInfo.mutable_command()->add_arguments(
	"--bounding_capabilities=" +
	stringify(JSON::protobuf(bounding.get())));
	if (effective.isSome()) {
	launchInfo.mutable_command()->add_arguments(
	"--effective_capabilities=" +
	stringify(JSON::protobuf(effective.get())));
	}
	} else {
	launchInfo.mutable_bounding_capabilities()->CopyFrom(bounding.get());
	if (effective.isSome()) {
	launchInfo.mutable_effective_capabilities()->CopyFrom(effective.get());
	}
	}
	} else {
	// Custom executor or nested container.
	launchInfo.mutable_bounding_capabilities()->CopyFrom(bounding.get());
	if (effective.isSome()) {
	launchInfo.mutable_effective_capabilities()->CopyFrom(effective.get());
	}
	}

	return launchInfo;
	}

	} // namespace slave {
	} // namespace internal {
	} // namespace mesos {