src/tests/containerizer/isolator_tests.cpp - mesos - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <unistd.h>

 #include <functional>
 #include <iostream>
 #include <string>
 #include <vector>

 #include <gmock/gmock.h>

 #include <mesos/resources.hpp>

 #include <mesos/module/isolator.hpp>

 #include <mesos/slave/isolator.hpp>

 #include <process/future.hpp>
 #include <process/io.hpp>
 #include <process/owned.hpp>
 #include <process/reap.hpp>

 #include <stout/abort.hpp>
 #include <stout/duration.hpp>
 #include <stout/gtest.hpp>
 #include <stout/hashset.hpp>
 #include <stout/os.hpp>
 #include <stout/path.hpp>

 #ifdef __linux__
 #include "linux/ns.hpp"
 #endif // __linux__

 #include "master/master.hpp"

 #include "slave/flags.hpp"
 #include "slave/gc.hpp"
 #include "slave/slave.hpp"

 #ifdef __linux__
 #include "slave/containerizer/mesos/isolators/cgroups/constants.hpp"
 #include "slave/containerizer/mesos/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/mesos/isolators/cgroups/mem.hpp"
 #include "slave/containerizer/mesos/isolators/cgroups/net_cls.hpp"
 #include "slave/containerizer/mesos/isolators/cgroups/perf_event.hpp"
 #include "slave/containerizer/mesos/isolators/filesystem/shared.hpp"
 #endif // __linux__
 #include "slave/containerizer/mesos/isolators/posix.hpp"

 #include "slave/containerizer/mesos/launcher.hpp"
 #ifdef __linux__
 #include "slave/containerizer/fetcher.hpp"
 #include "slave/containerizer/mesos/containerizer.hpp"
 #include "slave/containerizer/mesos/launch.hpp"
 #include "slave/containerizer/mesos/linux_launcher.hpp"
 #endif // __linux__

 #include "tests/flags.hpp"
 #include "tests/mesos.hpp"
 #include "tests/module.hpp"
 #include "tests/utils.hpp"

 #include "tests/containerizer/memory_test_helper.hpp"

 using namespace process;

 using mesos::internal::master::Master;
 #ifdef __linux__
 using mesos::internal::slave::CgroupsCpushareIsolatorProcess;
 using mesos::internal::slave::CgroupsMemIsolatorProcess;
 using mesos::internal::slave::CgroupsNetClsIsolatorProcess;
 using mesos::internal::slave::CgroupsPerfEventIsolatorProcess;
 using mesos::internal::slave::CPU_SHARES_PER_CPU_REVOCABLE;
 using mesos::internal::slave::Fetcher;
 using mesos::internal::slave::LinuxLauncher;
 using mesos::internal::slave::NetClsHandle;
 using mesos::internal::slave::NetClsHandleManager;
 using mesos::internal::slave::SharedFilesystemIsolatorProcess;
 #endif // __linux__
 using mesos::internal::slave::Launcher;
 using mesos::internal::slave::MesosContainerizer;
 using mesos::internal::slave::PosixLauncher;
 using mesos::internal::slave::PosixCpuIsolatorProcess;
 using mesos::internal::slave::PosixMemIsolatorProcess;
 using mesos::internal::slave::Slave;

 using mesos::master::detector::MasterDetector;

 using mesos::slave::ContainerConfig;
 using mesos::slave::ContainerLaunchInfo;
 using mesos::slave::Isolator;

 using process::http::OK;
 using process::http::Response;

 using std::ostringstream;
 using std::set;
 using std::string;
 using std::vector;

 namespace mesos {
 namespace internal {
 namespace tests {

 // Flag describing whether a the isolatePid parentHook should check the cgroups
 // before and after isolation.
 enum CheckCgroups
 {
   CHECK_CGROUPS,
   NO_CHECK_CGROUPS,
 };

 // A hook that is executed in the parent process. It attempts to isolate
 // a process and can optionally check the cgroups before and after isolation.
 //
 // NOTE: The child process is blocked by the hook infrastructure while
 // these hooks are executed.
 // NOTE: Returning an Error implies the child process will be killed.
 Try<Nothing> isolatePid(
     pid_t child,
     const Owned<Isolator>& isolator,
     const ContainerID& containerId,
     const CheckCgroups checkCgroups = NO_CHECK_CGROUPS)
 {
   if (checkCgroups == CHECK_CGROUPS) {
     // Before isolation, the cgroup is empty.
     Future<ResourceStatistics> usage = isolator->usage(containerId);

     // Note this is following the implementation of AWAIT_READY.
     if (!process::internal::await(usage, Seconds(15))) {
       return Error("Could check cgroup usage");
     }
     if (usage.isDiscarded() || usage.isFailed()) {
       return Error("Could check cgroup usage");
     }

     if (0U != usage.get().processes()) {
       return Error("Cgroups processes not empty before isolation");
     }
     if (0U != usage.get().threads()) {
       return Error("Cgroups threads not empty before isolation");
     }
   }

   // Isolate process.
   process::Future<Nothing> isolate = isolator->isolate(containerId, child);

   // Note this is following the implementation of AWAIT_READY.
   if (!process::internal::await(isolate, Seconds(15))) {
     return Error("Could not isolate pid");
   }
   if (isolate.isDiscarded() || isolate.isFailed()) {
     return Error("Could not isolate pid");
   }

   if (checkCgroups == CHECK_CGROUPS) {
     // After isolation, there should be one process in the cgroup.
     Future<ResourceStatistics> usage = isolator->usage(containerId);

     // Note this is following the implementation of AWAIT_READY.
     if (!process::internal::await(usage, Seconds(15))) {
       return Error("Could check cgroup usage");
     }
     if (usage.isDiscarded() || usage.isFailed()) {
       return Error("Could check cgroup usage");
     }

     if (1U != usage.get().processes()) {
       return Error("Cgroups processes empty after isolation");
     }
     if (1U != usage.get().threads()) {
       return Error("Cgroups threads empty after isolation");
     }
   }

   return Nothing();
 }


 template <typename T>
 class CpuIsolatorTest : public MesosTest {};


 typedef ::testing::Types<
     PosixCpuIsolatorProcess,
 #ifdef __linux__
     CgroupsCpushareIsolatorProcess,
 #endif // __linux__
     tests::Module<Isolator, TestCpuIsolator>> CpuIsolatorTypes;


 TYPED_TEST_CASE(CpuIsolatorTest, CpuIsolatorTypes);


 TYPED_TEST(CpuIsolatorTest, UserCpuUsage)
 {
   slave::Flags flags;

   Try<Isolator*> _isolator = TypeParam::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   // A PosixLauncher is sufficient even when testing a cgroups isolator.
   Try<Launcher*> _launcher = PosixLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("cpus:1.0").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   AWAIT_READY(isolator->prepare(
       containerId,
       containerConfig));

   const string& file = path::join(dir.get(), "mesos_isolator_test_ready");

   // Max out a single core in userspace. This will run for at most onesecond.
   string command = "while true ; do true ; done &"
     "touch " + file + "; " // Signals the command is running.
     "sleep 60";

   vector<string> argv(3);
   argv[0] = "sh";
   argv[1] = "-c";
   argv[2] = command;

   vector<Subprocess::Hook> parentHooks;

   // Create parent Hook to isolate child.
   //
   // NOTE: We can safely use references here as the hook will be executed
   // during the call to `fork`.
   const lambda::function<Try<Nothing>(pid_t)> isolatePidHook = lambda::bind(
       isolatePid,
       lambda::_1,
       std::cref(isolator),
       std::cref(containerId),
       NO_CHECK_CGROUPS);

   parentHooks.emplace_back(Subprocess::Hook(isolatePidHook));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       argv,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       None(),
       parentHooks);

   ASSERT_SOME(pid);

   // Reap the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   // Wait for the command to start.
   while (!os::exists(file));

   // Wait up to 1 second for the child process to induce 1/8 of a second of
   // user cpu time.
   ResourceStatistics statistics;
   Duration waited = Duration::zero();
   do {
     Future<ResourceStatistics> usage = isolator->usage(containerId);
     AWAIT_READY(usage);

     statistics = usage.get();

     // If we meet our usage expectations, we're done!
     if (statistics.cpus_user_time_secs() >= 0.125) {
       break;
     }

     os::sleep(Milliseconds(200));
     waited += Milliseconds(200);
   } while (waited < Seconds(1));

   EXPECT_LE(0.125, statistics.cpus_user_time_secs());

   // Ensure all processes are killed.
   AWAIT_READY(launcher.get()->destroy(containerId));

   // Make sure the child was reaped.
   AWAIT_READY(status);

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }


 TYPED_TEST(CpuIsolatorTest, SystemCpuUsage)
 {
   slave::Flags flags;

   Try<Isolator*> _isolator = TypeParam::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   // A PosixLauncher is sufficient even when testing a cgroups isolator.
   Try<Launcher*> _launcher = PosixLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("cpus:1.0").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   AWAIT_READY(isolator->prepare(
       containerId,
       containerConfig));

   const string& file = path::join(dir.get(), "mesos_isolator_test_ready");

   // Generating random numbers is done by the kernel and will max out a single
   // core and run almost exclusively in the kernel, i.e., system time.
   string command = "cat /dev/urandom > /dev/null & "
     "touch " + file + "; " // Signals the command is running.
     "sleep 60";

   vector<string> argv(3);
   argv[0] = "sh";
   argv[1] = "-c";
   argv[2] = command;

   vector<Subprocess::Hook> parentHooks;

   // Create parent Hook to isolate child.
   //
   // NOTE: We can safely use references here as the hook will be executed
   // during the call to `fork`.
   const lambda::function<Try<Nothing>(pid_t)> isolatePidHook = lambda::bind(
       isolatePid,
       lambda::_1,
       std::cref(isolator),
       std::cref(containerId),
       NO_CHECK_CGROUPS);

   parentHooks.emplace_back(Subprocess::Hook(isolatePidHook));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       argv,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       None(),
       parentHooks);

   ASSERT_SOME(pid);

   // Reap the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   // Wait for the command to start.
   while (!os::exists(file));

   // Wait up to 1 second for the child process to induce 1/8 of a second of
   // system cpu time.
   ResourceStatistics statistics;
   Duration waited = Duration::zero();
   do {
     Future<ResourceStatistics> usage = isolator->usage(containerId);
     AWAIT_READY(usage);

     statistics = usage.get();

     // If we meet our usage expectations, we're done!
     if (statistics.cpus_system_time_secs() >= 0.125) {
       break;
     }

     os::sleep(Milliseconds(200));
     waited += Milliseconds(200);
   } while (waited < Seconds(1));

   EXPECT_LE(0.125, statistics.cpus_system_time_secs());

   // Ensure all processes are killed.
   AWAIT_READY(launcher.get()->destroy(containerId));

   // Make sure the child was reaped.
   AWAIT_READY(status);

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }


 #ifdef __linux__
 class NetClsHandleManagerTest : public testing::Test {};


 // Tests the ability of the `NetClsHandleManager` class to allocate
 // and free secondary handles from a range of primary handles.
 TEST_F(NetClsHandleManagerTest, AllocateFreeHandles)
 {
   NetClsHandleManager manager(IntervalSet<uint32_t>(
       (Bound<uint32_t>::closed(0x0002),
        Bound<uint32_t>::closed(0x0003))));

   Try<NetClsHandle> handle = manager.alloc(0x0003);
   ASSERT_SOME(handle);

   EXPECT_SOME_TRUE(manager.isUsed(handle.get()));

   ASSERT_SOME(manager.free(handle.get()));

   EXPECT_SOME_FALSE(manager.isUsed(handle.get()));
 }


 // Make sure allocation of secondary handles for invalid primary
 // handles results in an error.
 TEST_F(NetClsHandleManagerTest, AllocateInvalidPrimary)
 {
   NetClsHandleManager manager(IntervalSet<uint32_t>(
       (Bound<uint32_t>::closed(0x0002),
        Bound<uint32_t>::closed(0x0003))));

   ASSERT_ERROR(manager.alloc(0x0001));
 }


 // Tests that we can reserve secondary handles for a given primary
 // handle so that they won't be allocated out later.
 TEST_F(NetClsHandleManagerTest, ReserveHandles)
 {
   NetClsHandleManager manager(IntervalSet<uint32_t>(
       (Bound<uint32_t>::closed(0x0002),
        Bound<uint32_t>::closed(0x0003))));

   NetClsHandle handle(0x0003, 0xffff);

   ASSERT_SOME(manager.reserve(handle));

   EXPECT_SOME_TRUE(manager.isUsed(handle));
 }


 // Tests that secondary handles are allocated only from a given range,
 // when the range is specified.
 TEST_F(NetClsHandleManagerTest, SecondaryHandleRange)
 {
   NetClsHandleManager manager(
       IntervalSet<uint32_t>(
         (Bound<uint32_t>::closed(0x0002),
          Bound<uint32_t>::closed(0x0003))),
       IntervalSet<uint32_t>(
         (Bound<uint32_t>::closed(0xffff),
          Bound<uint32_t>::closed(0xffff))));

   Try<NetClsHandle> handle = manager.alloc(0x0003);
   ASSERT_SOME(handle);

   EXPECT_SOME_TRUE(manager.isUsed(handle.get()));

   // Try allocating another handle. This should fail, since we don't
   // have any more secondary handles left.
   EXPECT_ERROR(manager.alloc(0x0003));

   ASSERT_SOME(manager.free(handle.get()));

   ASSERT_SOME(manager.reserve(handle.get()));

   EXPECT_SOME_TRUE(manager.isUsed(handle.get()));

   // Make sure you cannot reserve a secondary handle that is out of
   // range.
   EXPECT_ERROR(manager.reserve(NetClsHandle(0x0003, 0x0001)));
 }
 #endif // __linux__


 #ifdef __linux__
 class RevocableCpuIsolatorTest : public MesosTest {};


 TEST_F(RevocableCpuIsolatorTest, ROOT_CGROUPS_RevocableCpu)
 {
   slave::Flags flags;

   Try<Isolator*> _isolator = CgroupsCpushareIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = PosixLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   // Include revocable CPU in the executor's resources.
   Resource cpu = Resources::parse("cpus", "1", "*").get();
   cpu.mutable_revocable();

   ExecutorInfo executorInfo;
   executorInfo.add_resources()->CopyFrom(cpu);

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(os::getcwd());

   AWAIT_READY(isolator->prepare(
         containerId,
         containerConfig));

   vector<string> argv{"sleep", "100"};

   Try<pid_t> pid = launcher->fork(
       containerId,
       "/bin/sleep",
       argv,
       Subprocess::PATH("/dev/null"),
       Subprocess::PATH("/dev/null"),
       Subprocess::PATH("/dev/null"),
       None(),
       None(),
       None());

   ASSERT_SOME(pid);

   AWAIT_READY(isolator->isolate(containerId, pid.get()));

   // Executor should have proper cpu.shares for revocable containers.
   Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
   ASSERT_SOME(cpuHierarchy);

   Result<string> cpuCgroup = cgroups::cpu::cgroup(pid.get());
   ASSERT_SOME(cpuCgroup);

   EXPECT_SOME_EQ(
       CPU_SHARES_PER_CPU_REVOCABLE,
       cgroups::cpu::shares(cpuHierarchy.get(), cpuCgroup.get()));

   // Kill the container and clean up.
   Future<Option<int>> status = process::reap(pid.get());

   AWAIT_READY(launcher.get()->destroy(containerId));

   AWAIT_READY(status);

   AWAIT_READY(isolator->cleanup(containerId));
 }
 #endif // __linux__


 #ifdef __linux__
 class LimitedCpuIsolatorTest : public MesosTest {};


 TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Enable_Cfs)
 {
   slave::Flags flags;

   // Enable CFS to cap CPU utilization.
   flags.cgroups_enable_cfs = true;

   Try<Isolator*> _isolator = CgroupsCpushareIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = LinuxLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   // Set the executor's resources to 0.5 cpu.
   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("cpus:0.5").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   Future<Option<ContainerLaunchInfo>> prepare =
     isolator->prepare(
         containerId,
         containerConfig);

   AWAIT_READY(prepare);

   // Generate random numbers to max out a single core. We'll run this for 0.5
   // seconds of wall time so it should consume approximately 250 ms of total
   // cpu time when limited to 0.5 cpu. We use /dev/urandom to prevent blocking
   // on Linux when there's insufficient entropy.
   string command = "cat /dev/urandom > /dev/null & "
     "export MESOS_TEST_PID=$! && "
     "sleep 0.5 && "
     "kill $MESOS_TEST_PID";

   vector<string> argv(3);
   argv[0] = "sh";
   argv[1] = "-c";
   argv[2] = command;

   vector<Subprocess::Hook> parentHooks;

   // Create parent Hook to isolate child.
   //
   // NOTE: We can safely use references here as the hook will be executed
   // during the call to `fork`.
   const lambda::function<Try<Nothing>(pid_t)> isolatePidHook = lambda::bind(
       isolatePid,
       lambda::_1,
       std::cref(isolator),
       std::cref(containerId),
       NO_CHECK_CGROUPS);

   parentHooks.emplace_back(Subprocess::Hook(isolatePidHook));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       argv,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       prepare.get().isSome() ? prepare.get().get().namespaces() : 0,
       parentHooks);

   ASSERT_SOME(pid);

   // Reap the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   // Wait for the command to complete.
   AWAIT_READY(status);

   Future<ResourceStatistics> usage = isolator->usage(containerId);
   AWAIT_READY(usage);

   // Expect that no more than 300 ms of cpu time has been consumed. We also
   // check that at least 50 ms of cpu time has been consumed so this test will
   // fail if the host system is very heavily loaded. This behavior is correct
   // because under such conditions we aren't actually testing the CFS cpu
   // limiter.
   double cpuTime = usage.get().cpus_system_time_secs() +
                    usage.get().cpus_user_time_secs();

   EXPECT_GE(0.30, cpuTime);
   EXPECT_LE(0.05, cpuTime);

   // Ensure all processes are killed.
   AWAIT_READY(launcher.get()->destroy(containerId));

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }


 // This test verifies that we can successfully launch a container with
 // a big (>= 10 cpus) cpu quota. This is to catch the regression
 // observed in MESOS-1049.
 // TODO(vinod): Revisit this if/when the isolator restricts the number
 // of cpus that an executor can use based on the slave cpus.
 TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_CFS_Big_Quota)
 {
   slave::Flags flags;

   // Enable CFS to cap CPU utilization.
   flags.cgroups_enable_cfs = true;

   Try<Isolator*> _isolator = CgroupsCpushareIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = LinuxLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   // Set the executor's resources to 100.5 cpu.
   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("cpus:100.5").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   Future<Option<ContainerLaunchInfo>> prepare =
     isolator->prepare(
         containerId,
         containerConfig);

   AWAIT_READY(prepare);

   vector<string> argv(3);
   argv[0] = "sh";
   argv[1] = "-c";
   argv[2] = "exit 0";

   vector<Subprocess::Hook> parentHooks;

   // Create parent Hook to isolate child.
   // Note: We can safely use references here as we are sure that the life time
   // of the parent will exceed the child.
   const lambda::function<Try<Nothing>(pid_t)> isolatePidHook = lambda::bind(
       isolatePid,
       lambda::_1,
       std::cref(isolator),
       std::cref(containerId),
       NO_CHECK_CGROUPS);

   parentHooks.emplace_back(Subprocess::Hook(isolatePidHook));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       argv,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       prepare.get().isSome() ? prepare.get().get().namespaces() : 0,
       parentHooks);

   ASSERT_SOME(pid);

   // Reap the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   // Wait for the command to complete successfully.
   AWAIT_READY(status);
   ASSERT_SOME_EQ(0, status.get());

   // Ensure all processes are killed.
   AWAIT_READY(launcher.get()->destroy(containerId));

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }


 // A test to verify the number of processes and threads in a
 // container.
 TEST_F(LimitedCpuIsolatorTest, ROOT_CGROUPS_Pids_and_Tids)
 {
   slave::Flags flags;
   flags.cgroups_cpu_enable_pids_and_tids_count = true;

   Try<Isolator*> _isolator = CgroupsCpushareIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = LinuxLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("cpus:0.5;mem:512").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   Future<Option<ContainerLaunchInfo>> prepare =
     isolator->prepare(
         containerId,
         containerConfig);

   AWAIT_READY(prepare);

   // Right after the creation of the cgroup, which happens in
   // 'prepare', we check that it is empty.
   Future<ResourceStatistics> usage = isolator->usage(containerId);
   AWAIT_READY(usage);
   EXPECT_EQ(0U, usage.get().processes());
   EXPECT_EQ(0U, usage.get().threads());

   // Use these to communicate with the test process after it has
   // exec'd to make sure it is running.
   int inputPipes[2];
   ASSERT_NE(-1, ::pipe(inputPipes));

   int outputPipes[2];
   ASSERT_NE(-1, ::pipe(outputPipes));

   vector<string> argv(1);
   argv[0] = "cat";

   vector<Subprocess::Hook> parentHooks;

   // Create parent Hook to isolate child.
   // Note: We can safely use references here as we are sure that the life time
   // of the parent will exceed the child.
   const lambda::function<Try<Nothing>(pid_t)> isolatePidHook = lambda::bind(
       isolatePid,
       lambda::_1,
       std::cref(isolator),
       std::cref(containerId),
       CHECK_CGROUPS);

   parentHooks.emplace_back(Subprocess::Hook(isolatePidHook));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "cat",
       argv,
       Subprocess::FD(inputPipes[0], Subprocess::IO::OWNED),
       Subprocess::FD(outputPipes[1], Subprocess::IO::OWNED),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       prepare.get().isSome() ? prepare.get().get().namespaces() : 0,
       parentHooks);

   ASSERT_SOME(pid);

   // Reap the forked child.
   Future<Option<int>> status = process::reap(pid.get());

   // Write to the test process and wait for an echoed result.
   // This observation ensures that the "cat" process has
   // completed its part of the exec() procedure and is now
   // executing normally.
   AWAIT_READY(io::write(inputPipes[1], "foo")
     .then([outputPipes]() -> Future<short> {
       return io::poll(outputPipes[0], io::READ);
     }));

   // Process count should be 1 since 'cat' is still idling.
   usage = isolator->usage(containerId);
   AWAIT_READY(usage);
   EXPECT_EQ(1U, usage.get().processes());
   EXPECT_EQ(1U, usage.get().threads());

   // Ensure all processes are killed.
   AWAIT_READY(launcher.get()->destroy(containerId));

   // Clean up the extra pipes created for synchronization.
   EXPECT_SOME(os::close(inputPipes[1]));
   EXPECT_SOME(os::close(outputPipes[0]));

   // Wait for the command to complete.
   AWAIT_READY(status);

   // After the process is killed, the cgroup should be empty again.
   usage = isolator->usage(containerId);
   AWAIT_READY(usage);
   EXPECT_EQ(0U, usage.get().processes());
   EXPECT_EQ(0U, usage.get().threads());

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }
 #endif // __linux__


 template <typename T>
 class MemIsolatorTest : public MesosTest {};


 typedef ::testing::Types<
     PosixMemIsolatorProcess,
 #ifdef __linux__
     CgroupsMemIsolatorProcess,
 #endif // __linux__
     tests::Module<Isolator, TestMemIsolator>> MemIsolatorTypes;


 TYPED_TEST_CASE(MemIsolatorTest, MemIsolatorTypes);


 TYPED_TEST(MemIsolatorTest, MemUsage)
 {
   slave::Flags flags;

   Try<Isolator*> _isolator = TypeParam::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("mem:1024").get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   AWAIT_READY(isolator->prepare(
       containerId,
       containerConfig));

   MemoryTestHelper helper;
   ASSERT_SOME(helper.spawn());
   ASSERT_SOME(helper.pid());

   // Set up the reaper to wait on the subprocess.
   Future<Option<int>> status = process::reap(helper.pid().get());

   // Isolate the subprocess.
   AWAIT_READY(isolator->isolate(containerId, helper.pid().get()));

   const Bytes allocation = Megabytes(128);
   EXPECT_SOME(helper.increaseRSS(allocation));

   Future<ResourceStatistics> usage = isolator->usage(containerId);
   AWAIT_READY(usage);

   EXPECT_GE(usage.get().mem_rss_bytes(), allocation.bytes());

   // Ensure the process is killed.
   helper.cleanup();

   // Make sure the subprocess was reaped.
   AWAIT_READY(status);

   // Let the isolator clean up.
   AWAIT_READY(isolator->cleanup(containerId));
 }


 #ifdef __linux__
 class NetClsIsolatorTest : public MesosTest {};


 // This tests the create, prepare, isolate and cleanup methods of the
 // 'CgroupNetClsIsolatorProcess'. The test first creates a 'MesosContainerizer'
 // with net_cls cgroup isolator enabled. The net_cls cgroup isolator is
 // implemented in the 'CgroupNetClsIsolatorProcess' class. The test then
 // launches a task in a mesos container and checks to see if the container has
 // been added to the right net_cls cgroup. Finally, the test kills the task and
 // makes sure that the 'CgroupNetClsIsolatorProcess' cleans up the net_cls
 // cgroup created for the container.
 TEST_F(NetClsIsolatorTest, ROOT_CGROUPS_NetClsIsolate)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);

   uint16_t primary = 0x0012;

   slave::Flags flags = CreateSlaveFlags();
   flags.isolation = "cgroups/net_cls";
   flags.cgroups_net_cls_primary_handle = stringify(primary);
   flags.cgroups_net_cls_secondary_handles = "0xffff,0xffff";

   Fetcher fetcher;

   Try<MesosContainerizer*> _containerizer =
     MesosContainerizer::create(flags, true, &fetcher);

   ASSERT_SOME(_containerizer);
   Owned<MesosContainerizer> containerizer(_containerizer.get());

   Owned<MasterDetector> detector = master.get()->createDetector();

   Try<Owned<cluster::Slave>> slave =
     StartSlave(detector.get(), containerizer.get(), flags);
   ASSERT_SOME(slave);

   MockScheduler sched;

   MesosSchedulerDriver driver(
       &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

   Future<Nothing> schedRegistered;
   EXPECT_CALL(sched, registered(_, _, _))
     .WillOnce(FutureSatisfy(&schedRegistered));

   Future<vector<Offer>> offers;
   EXPECT_CALL(sched, resourceOffers(_, _))
     .WillOnce(FutureArg<1>(&offers))
     .WillRepeatedly(Return());      // Ignore subsequent offers.

   driver.start();

   AWAIT_READY(schedRegistered);

   AWAIT_READY(offers);
   EXPECT_EQ(1u, offers.get().size());

   // Create a task to be launched in the mesos-container. We will be
   // explicitly killing this task to perform the cleanup test.
   TaskInfo task = createTask(offers.get()[0], "sleep 1000");

   Future<TaskStatus> status;
   EXPECT_CALL(sched, statusUpdate(_, _))
     .WillOnce(FutureArg<1>(&status));

   driver.launchTasks(offers.get()[0].id(), {task});

   // Capture the update to verify that the task has been launched.
   AWAIT_READY(status);
   ASSERT_EQ(TASK_RUNNING, status.get().state());

   // Task is ready.  Make sure there is exactly 1 container in the hashset.
   Future<hashset<ContainerID>> containers = containerizer->containers();
   AWAIT_READY(containers);
   EXPECT_EQ(1u, containers.get().size());

   const ContainerID& containerID = *(containers.get().begin());

   Result<string> hierarchy = cgroups::hierarchy("net_cls");
   ASSERT_SOME(hierarchy);

   // Check if the net_cls cgroup for this container exists, by
   // checking for the processes associated with this cgroup.
   string cgroup = path::join(
       flags.cgroups_root,
       containerID.value());

   Try<set<pid_t>> pids = cgroups::processes(hierarchy.get(), cgroup);
   ASSERT_SOME(pids);

   // There should be at least one TGID associated with this cgroup.
   EXPECT_LE(1u, pids.get().size());

   // Read the `net_cls.classid` to verify that the handle has been set.
   Try<uint32_t> classid = cgroups::net_cls::classid(hierarchy.get(), cgroup);
   EXPECT_SOME(classid);

   if (classid.isSome()) {
     // Make sure the primary handle is the same as the one set in
     // `--cgroup_net_cls_primary_handle`.
     EXPECT_EQ(primary, (classid.get() & 0xffff0000) >> 16);

     // Make sure the secondary handle is 0xffff.
     EXPECT_EQ(0xffffu, classid.get() & 0xffff);
   }

   // Isolator cleanup test: Killing the task should cleanup the cgroup
   // associated with the container.
   Future<TaskStatus> killStatus;
   EXPECT_CALL(sched, statusUpdate(_, _))
     .WillOnce(FutureArg<1>(&killStatus));

   // Wait for the executor to exit. We are using 'gc.schedule' as a proxy event
   // to monitor the exit of the executor.
   Future<Nothing> gcSchedule = FUTURE_DISPATCH(
       _, &slave::GarbageCollectorProcess::schedule);

   driver.killTask(status.get().task_id());

   AWAIT_READY(gcSchedule);

   // If the cleanup is successful the net_cls cgroup for this container should
   // not exist.
   ASSERT_FALSE(os::exists(cgroup));

   driver.stop();
   driver.join();
 }


 // This test verifies that we are able to retrieve the `net_cls` handle
 // from `/state`.
 TEST_F(NetClsIsolatorTest, ROOT_CGROUPS_ContainerStatus)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);

   slave::Flags flags = CreateSlaveFlags();
   flags.isolation = "cgroups/net_cls";
   flags.cgroups_net_cls_primary_handle = stringify(0x0012);
   flags.cgroups_net_cls_secondary_handles = "0x0011,0x0012";

   Fetcher fetcher;

   Try<MesosContainerizer*> _containerizer =
     MesosContainerizer::create(flags, true, &fetcher);

   ASSERT_SOME(_containerizer);
   Owned<MesosContainerizer> containerizer(_containerizer.get());

   Owned<MasterDetector> detector = master.get()->createDetector();

   Try<Owned<cluster::Slave>> slave =
     StartSlave(detector.get(), containerizer.get(), flags);
   ASSERT_SOME(slave);

   MockScheduler sched;

   MesosSchedulerDriver driver(
       &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);

   Future<Nothing> schedRegistered;
   EXPECT_CALL(sched, registered(_, _, _))
     .WillOnce(FutureSatisfy(&schedRegistered));

   Future<vector<Offer>> offers;
   EXPECT_CALL(sched, resourceOffers(_, _))
     .WillOnce(FutureArg<1>(&offers))
     .WillRepeatedly(Return());      // Ignore subsequent offers.

   driver.start();

   AWAIT_READY(schedRegistered);

   AWAIT_READY(offers);
   EXPECT_EQ(1u, offers.get().size());

   // Create a task to be launched in the mesos-container.
   TaskInfo task = createTask(offers.get()[0], "sleep 1000");

   Future<TaskStatus> status;
   EXPECT_CALL(sched, statusUpdate(_, _))
     .WillOnce(FutureArg<1>(&status));

   driver.launchTasks(offers.get()[0].id(), {task});

   AWAIT_READY(status);
   ASSERT_EQ(TASK_RUNNING, status.get().state());

   // Task is ready. Verify `ContainerStatus` is present in slave state.
   Future<Response> response = http::get(
       slave.get()->pid,
       "state",
       None(),
       createBasicAuthHeaders(DEFAULT_CREDENTIAL));

   AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
   AWAIT_EXPECT_RESPONSE_HEADER_EQ(APPLICATION_JSON, "Content-Type", response);

   Try<JSON::Object> parse = JSON::parse<JSON::Object>(response.get().body);
   ASSERT_SOME(parse);

   Result<JSON::Object> netCls = parse->find<JSON::Object>(
       "frameworks[0].executors[0].tasks[0].statuses[0]."
       "container_status.cgroup_info.net_cls");
   ASSERT_SOME(netCls);

   uint32_t classid =
     netCls->values["classid"].as<JSON::Number>().as<uint32_t>();

   // Check the primary and the secondary handle.
   EXPECT_EQ(0x0012u, classid >> 16);
   EXPECT_EQ(0x0011u, classid & 0xffff);

   driver.stop();
   driver.join();
 }
 #endif


 #ifdef __linux__
 class PerfEventIsolatorTest : public MesosTest {};


 TEST_F(PerfEventIsolatorTest, ROOT_CGROUPS_Sample)
 {
   slave::Flags flags;

   flags.perf_events = "cycles,task-clock";
   flags.perf_duration = Milliseconds(250);
   flags.perf_interval = Milliseconds(500);

   Try<Isolator*> _isolator = CgroupsPerfEventIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   ExecutorInfo executorInfo;

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Use a relative temporary directory so it gets cleaned up
   // automatically with the test.
   Try<string> dir = os::mkdtemp(path::join(os::getcwd(), "XXXXXX"));
   ASSERT_SOME(dir);

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(dir.get());

   AWAIT_READY(isolator->prepare(
       containerId,
       containerConfig));

   // This first sample is likely to be empty because perf hasn't
   // completed yet but we should still have the required fields.
   Future<ResourceStatistics> statistics1 = isolator->usage(containerId);
   AWAIT_READY(statistics1);
   ASSERT_TRUE(statistics1.get().has_perf());
   EXPECT_TRUE(statistics1.get().perf().has_timestamp());
   EXPECT_TRUE(statistics1.get().perf().has_duration());

   // Wait until we get the next sample. We use a generous timeout of
   // two seconds because we currently have a one second reap interval;
   // when running perf with perf_duration of 250ms we won't notice the
   // exit for up to one second.
   ResourceStatistics statistics2;
   Duration waited = Duration::zero();
   do {
     Future<ResourceStatistics> statistics = isolator->usage(containerId);
     AWAIT_READY(statistics);

     statistics2 = statistics.get();

     ASSERT_TRUE(statistics2.has_perf());

     if (statistics1.get().perf().timestamp() !=
         statistics2.perf().timestamp()) {
       break;
     }

     os::sleep(Milliseconds(250));
     waited += Milliseconds(250);
   } while (waited < Seconds(2));

   sleep(2);

   EXPECT_NE(statistics1.get().perf().timestamp(),
             statistics2.perf().timestamp());

   EXPECT_TRUE(statistics2.perf().has_cycles());
   EXPECT_LE(0u, statistics2.perf().cycles());

   EXPECT_TRUE(statistics2.perf().has_task_clock());
   EXPECT_LE(0.0, statistics2.perf().task_clock());

   AWAIT_READY(isolator->cleanup(containerId));
 }


 class SharedFilesystemIsolatorTest : public MesosTest {};


 // Test that a container can create a private view of a system
 // directory (/var/tmp). Check that a file written by a process inside
 // the container doesn't appear on the host filesystem but does appear
 // under the container's work directory.
 // This test is disabled since we're planning to remove the shared
 // filesystem isolator and this test is not working on other distros
 // such as CentOS 7.1
 // TODO(tnachen): Remove this test when shared filesystem isolator
 // is removed.
 TEST_F(SharedFilesystemIsolatorTest, DISABLED_ROOT_RelativeVolume)
 {
   slave::Flags flags = CreateSlaveFlags();
   flags.isolation = "filesystem/shared";

   Try<Isolator*> _isolator = SharedFilesystemIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = LinuxLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   // Use /var/tmp so we don't mask the work directory (under /tmp).
   const string containerPath = "/var/tmp";
   ASSERT_TRUE(os::stat::isdir(containerPath));

   // Use a host path relative to the container work directory.
   const string hostPath = strings::remove(containerPath, "/", strings::PREFIX);

   ContainerInfo containerInfo;
   containerInfo.set_type(ContainerInfo::MESOS);
   containerInfo.add_volumes()->CopyFrom(
       CREATE_VOLUME(containerPath, hostPath, Volume::RW));

   ExecutorInfo executorInfo;
   executorInfo.mutable_container()->CopyFrom(containerInfo);

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(flags.work_dir);

   Future<Option<ContainerLaunchInfo> > prepare =
     isolator->prepare(
         containerId,
         containerConfig);

   AWAIT_READY(prepare);
   ASSERT_SOME(prepare.get());
   ASSERT_EQ(1, prepare.get().get().pre_exec_commands().size());
   EXPECT_TRUE(prepare.get().get().has_namespaces());

   // The test will touch a file in container path.
   const string file = path::join(containerPath, UUID::random().toString());
   ASSERT_FALSE(os::exists(file));

   // Manually run the isolator's preparation command first, then touch
   // the file.
   vector<string> args;
   args.push_back("sh");
   args.push_back("-x");
   args.push_back("-c");
   args.push_back(
       prepare.get().get().pre_exec_commands(0).value() + " && touch " + file);

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       args,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       prepare.get().get().namespaces());
   ASSERT_SOME(pid);

   // Set up the reaper to wait on the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   AWAIT_READY(status);
   EXPECT_SOME_EQ(0, status.get());

   // Check the correct hierarchy was created under the container work
   // directory.
   string dir = "/";
   foreach (const string& subdir, strings::tokenize(containerPath, "/")) {
     dir = path::join(dir, subdir);

     struct stat hostStat;
     EXPECT_EQ(0, ::stat(dir.c_str(), &hostStat));

     struct stat containerStat;
     EXPECT_EQ(0,
               ::stat(path::join(flags.work_dir, dir).c_str(), &containerStat));

     EXPECT_EQ(hostStat.st_mode, containerStat.st_mode);
     EXPECT_EQ(hostStat.st_uid, containerStat.st_uid);
     EXPECT_EQ(hostStat.st_gid, containerStat.st_gid);
   }

   // Check it did *not* create a file in the host namespace.
   EXPECT_FALSE(os::exists(file));

   // Check it did create the file under the container's work directory
   // on the host.
   EXPECT_TRUE(os::exists(path::join(flags.work_dir, file)));
 }


 // This test is disabled since we're planning to remove the shared
 // filesystem isolator and this test is not working on other distros
 // such as CentOS 7.1
 // TODO(tnachen): Remove this test when shared filesystem isolator
 // is removed.
 TEST_F(SharedFilesystemIsolatorTest, DISABLED_ROOT_AbsoluteVolume)
 {
   slave::Flags flags = CreateSlaveFlags();
   flags.isolation = "filesystem/shared";

   Try<Isolator*> _isolator = SharedFilesystemIsolatorProcess::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   Try<Launcher*> _launcher = LinuxLauncher::create(flags);
   ASSERT_SOME(_launcher);
   Owned<Launcher> launcher(_launcher.get());

   // We'll mount the absolute test work directory as /var/tmp in the
   // container.
   const string hostPath = flags.work_dir;
   const string containerPath = "/var/tmp";

   ContainerInfo containerInfo;
   containerInfo.set_type(ContainerInfo::MESOS);
   containerInfo.add_volumes()->CopyFrom(
       CREATE_VOLUME(containerPath, hostPath, Volume::RW));

   ExecutorInfo executorInfo;
   executorInfo.mutable_container()->CopyFrom(containerInfo);

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(flags.work_dir);

   Future<Option<ContainerLaunchInfo> > prepare =
     isolator->prepare(
         containerId,
         containerConfig);

   AWAIT_READY(prepare);
   ASSERT_SOME(prepare.get());
   ASSERT_EQ(1, prepare.get().get().pre_exec_commands().size());
   EXPECT_TRUE(prepare.get().get().has_namespaces());

   // Test the volume mounting by touching a file in the container's
   // /tmp, which should then be in flags.work_dir.
   const string filename = UUID::random().toString();
   ASSERT_FALSE(os::exists(path::join(containerPath, filename)));

   vector<string> args;
   args.push_back("sh");
   args.push_back("-x");
   args.push_back("-c");
   args.push_back(prepare.get().get().pre_exec_commands(0).value() +
                  " && touch " +
                  path::join(containerPath, filename));

   Try<pid_t> pid = launcher->fork(
       containerId,
       "sh",
       args,
       Subprocess::FD(STDIN_FILENO),
       Subprocess::FD(STDOUT_FILENO),
       Subprocess::FD(STDERR_FILENO),
       None(),
       None(),
       prepare.get().get().namespaces());
   ASSERT_SOME(pid);

   // Set up the reaper to wait on the forked child.
   Future<Option<int> > status = process::reap(pid.get());

   AWAIT_READY(status);
   EXPECT_SOME_EQ(0, status.get());

   // Check the file was created in flags.work_dir.
   EXPECT_TRUE(os::exists(path::join(hostPath, filename)));

   // Check it didn't get created in the host's view of containerPath.
   EXPECT_FALSE(os::exists(path::join(containerPath, filename)));
 }


 class NamespacesPidIsolatorTest : public MesosTest {};


 TEST_F(NamespacesPidIsolatorTest, ROOT_PidNamespace)
 {
   slave::Flags flags = CreateSlaveFlags();
   flags.isolation = "namespaces/pid";

   string directory = os::getcwd(); // We're inside a temporary sandbox.

   Fetcher fetcher;

   Try<MesosContainerizer*> _containerizer =
     MesosContainerizer::create(flags, false, &fetcher);

   ASSERT_SOME(_containerizer);
   Owned<MesosContainerizer> containerizer(_containerizer.get());

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   // Write the command's pid namespace inode and init name to files.
   const string command =
     "stat -c %i /proc/self/ns/pid > ns && (cat /proc/1/comm > init)";

   process::Future<bool> launch = containerizer->launch(
       containerId,
       CREATE_EXECUTOR_INFO("executor", command),
       directory,
       None(),
       SlaveID(),
       process::PID<Slave>(),
       false);
   AWAIT_READY(launch);
   ASSERT_TRUE(launch.get());

   // Wait on the container.
   process::Future<containerizer::Termination> wait =
     containerizer->wait(containerId);
   AWAIT_READY(wait);

   // Check the executor exited correctly.
   EXPECT_TRUE(wait.get().has_status());
   EXPECT_EQ(0, wait.get().status());

   // Check that the command was run in a different pid namespace.
   Try<ino_t> testPidNamespace = ns::getns(::getpid(), "pid");
   ASSERT_SOME(testPidNamespace);

   Try<string> containerPidNamespace = os::read(path::join(directory, "ns"));
   ASSERT_SOME(containerPidNamespace);

   EXPECT_NE(stringify(testPidNamespace.get()),
             strings::trim(containerPidNamespace.get()));

   // Check that 'sh' is the container's 'init' process.
   // This verifies that /proc has been correctly mounted for the container.
   Try<string> init = os::read(path::join(directory, "init"));
   ASSERT_SOME(init);

   EXPECT_EQ("sh", strings::trim(init.get()));
 }


 // Username for the unprivileged user that will be created to test
 // unprivileged cgroup creation. It will be removed after the tests.
 // It is presumed this user does not normally exist.
 const string UNPRIVILEGED_USERNAME = "mesos.test.unprivileged.user";


 template <typename T>
 class UserCgroupIsolatorTest
   : public ContainerizerTest<slave::MesosContainerizer>
 {
 public:
   static void SetUpTestCase()
   {
     ContainerizerTest<slave::MesosContainerizer>::SetUpTestCase();

     // Remove the user in case it wasn't cleaned up from a previous
     // test.
     os::system("userdel -r " + UNPRIVILEGED_USERNAME + " > /dev/null");

     ASSERT_EQ(0, os::system("useradd " + UNPRIVILEGED_USERNAME));
   }


   static void TearDownTestCase()
   {
     ContainerizerTest<slave::MesosContainerizer>::TearDownTestCase();

     ASSERT_EQ(0, os::system("userdel -r " + UNPRIVILEGED_USERNAME));
   }
 };


 // Test all isolators that use cgroups.
 typedef ::testing::Types<
   CgroupsMemIsolatorProcess,
   CgroupsCpushareIsolatorProcess,
   CgroupsPerfEventIsolatorProcess> CgroupsIsolatorTypes;


 TYPED_TEST_CASE(UserCgroupIsolatorTest, CgroupsIsolatorTypes);


 TYPED_TEST(UserCgroupIsolatorTest, ROOT_CGROUPS_UserCgroup)
 {
   slave::Flags flags = UserCgroupIsolatorTest<TypeParam>::CreateSlaveFlags();
   flags.perf_events = "cpu-cycles"; // Needed for CgroupsPerfEventIsolator.

   Try<Isolator*> _isolator = TypeParam::create(flags);
   ASSERT_SOME(_isolator);
   Owned<Isolator> isolator(_isolator.get());

   ExecutorInfo executorInfo;
   executorInfo.mutable_resources()->CopyFrom(
       Resources::parse("mem:1024;cpus:1").get()); // For cpu/mem isolators.

   ContainerID containerId;
   containerId.set_value(UUID::random().toString());

   ContainerConfig containerConfig;
   containerConfig.mutable_executor_info()->CopyFrom(executorInfo);
   containerConfig.set_directory(os::getcwd());
   containerConfig.set_user(UNPRIVILEGED_USERNAME);

   AWAIT_READY(isolator->prepare(
       containerId,
       containerConfig));

   // Isolators don't provide a way to determine the cgroups they use
   // so we'll inspect the cgroups for an isolated dummy process.
   pid_t pid = fork();
   if (pid == 0) {
     // Child just sleeps.
     ::sleep(100);

     ABORT("Child process should not reach here");
   }
   ASSERT_GT(pid, 0);

   AWAIT_READY(isolator->isolate(containerId, pid));

   // Get the container's cgroups from /proc/$PID/cgroup. We're only
   // interested in the cgroups that this isolator has created which we
   // can do explicitly by selecting those that have the path that
   // corresponds to the 'cgroups_root' slave flag. For example:
   //
   //   $ cat /proc/pid/cgroup
   //   6:blkio:/
   //   5:perf_event:/
   //   4:memory:/mesos/b7410ed8-c85b-445e-b50e-3a1698d0e18c
   //   3:freezer:/
   //   2:cpuacct:/
   //   1:cpu:/
   //
   // Our 'grep' will only select the 'memory' line and then 'awk' will
   // output 'memory/mesos/b7410ed8-c85b-445e-b50e-3a1698d0e18c'.
   Try<string> grepOut = os::shell(
       "grep '" + path::join("/", flags.cgroups_root) + "' /proc/" +
        stringify(pid) + "/cgroup | awk -F ':' '{print $2$3}'");

   ASSERT_SOME(grepOut);

   // Kill the dummy child process.
   ::kill(pid, SIGKILL);
   int exitStatus;
   EXPECT_NE(-1, ::waitpid(pid, &exitStatus, 0));

   vector<string> cgroups = strings::tokenize(grepOut.get(), "\n");
   ASSERT_FALSE(cgroups.empty());

   foreach (string cgroup, cgroups) {
     if (!os::exists(path::join(flags.cgroups_hierarchy, cgroup)) &&
         strings::startsWith(cgroup, "cpuacct,cpu")) {
       // An existing bug in CentOS 7.x causes 'cpuacct,cpu' cgroup to
       // be under 'cpu,cpuacct'. Actively detect this here to
       // work around this problem.
       vector<string> parts = strings::split(cgroup, "/");
       parts[0] = "cpu,cpuacct";
       cgroup = strings::join("/", parts);
     }

     // Check the user cannot manipulate the container's cgroup control
     // files.
     EXPECT_NE(0, os::system(
           "su - " + UNPRIVILEGED_USERNAME +
           " -c 'echo $$ >" +
           path::join(flags.cgroups_hierarchy, cgroup, "cgroup.procs") +
           "'"));

     // Check the user can create a cgroup under the container's
     // cgroup.
     string userCgroup = path::join(cgroup, "user");

     EXPECT_EQ(0, os::system(
           "su - " +
           UNPRIVILEGED_USERNAME +
           " -c 'mkdir " +
           path::join(flags.cgroups_hierarchy, userCgroup) +
           "'"));

     // Check the user can manipulate control files in the created
     // cgroup.
     EXPECT_EQ(0, os::system(
           "su - " +
           UNPRIVILEGED_USERNAME +
           " -c 'echo $$ >" +
           path::join(flags.cgroups_hierarchy, userCgroup, "cgroup.procs") +
           "'"));
   }

   // Clean up the container. This will also remove the nested cgroups.
   AWAIT_READY(isolator->cleanup(containerId));
 }
 #endif // __linux__

 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {