src/buildstream/storage/_casbaseddirectory.py - buildstream - Git at Google

 #
 #  Copyright (C) 2018 Bloomberg LP
 #
 #  This program is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Lesser General Public
 #  License as published by the Free Software Foundation; either
 #  version 2 of the License, or (at your option) any later version.
 #
 #  This library is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 #  Lesser General Public License for more details.
 #
 #  You should have received a copy of the GNU Lesser General Public
 #  License along with this library. If not, see <http://www.gnu.org/licenses/>.
 #
 #  Authors:
 #        Jim MacArthur <jim.macarthur@codethink.co.uk>

 """
 CasBasedDirectory
 =========

 Implementation of the Directory class which backs onto a Merkle-tree based content
 addressable storage system.

 See also: :ref:`sandboxing`.
 """

 import os
 import stat
 import tarfile as tarfilelib
 from contextlib import contextmanager
 from io import StringIO
 from google.protobuf import timestamp_pb2

 from .. import utils
 from .._protos.build.bazel.remote.execution.v2 import remote_execution_pb2
 from .directory import Directory, VirtualDirectoryError, _FileType
 from ._filebaseddirectory import FileBasedDirectory
 from ..utils import FileListResult, BST_ARBITRARY_TIMESTAMP


 class IndexEntry:
     """ Directory entry used in CasBasedDirectory.index """

     def __init__(
         self,
         name,
         entrytype,
         *,
         digest=None,
         target=None,
         is_executable=False,
         buildstream_object=None,
         modified=False,
         mtime=None
     ):
         self.name = name
         self.type = entrytype
         self.digest = digest
         self.target = target
         self.is_executable = is_executable
         self.buildstream_object = buildstream_object
         self.modified = modified
         self.mtime = mtime

     def get_directory(self, parent):
         if not self.buildstream_object:
             assert self.type == _FileType.DIRECTORY
             self.buildstream_object = CasBasedDirectory(
                 parent.cas_cache, digest=self.digest, parent=parent, filename=self.name
             )
             self.digest = None

         return self.buildstream_object

     def get_digest(self):
         if self.buildstream_object:
             # directory with buildstream object
             return self.buildstream_object._get_digest()
         else:
             # regular file, symlink or directory without buildstream object
             return self.digest

     # clone():
     #
     # Create a deep copy of this object. If this is a directory, a
     # CasBasedDirectory can also be passed to assign an appropriate
     # parent directory.
     #
     def clone(self) -> "IndexEntry":
         return IndexEntry(
             self.name,
             self.type,
             # If this is a directory, the digest will be converted
             # later if necessary. For other non-file types, digests
             # are always None.
             digest=self.get_digest(),
             target=self.target,
             is_executable=self.is_executable,
             mtime=self.mtime,
         )

     def __eq__(self, other: object) -> bool:
         if not isinstance(other, IndexEntry):
             return NotImplemented

         def get_equivalency_properties(e: IndexEntry):
             return (e.name, e.type, e.target, e.is_executable, e.mtime, e.get_digest())

         return get_equivalency_properties(self) == get_equivalency_properties(other)


 # CasBasedDirectory intentionally doesn't call its superclass constuctor,
 # which is meant to be unimplemented.
 # pylint: disable=super-init-not-called


 class CasBasedDirectory(Directory):
     """
     CAS-based directories can have two names; one is a 'common name' which has no effect
     on functionality, and the 'filename'. If a CasBasedDirectory has a parent, then 'filename'
     must be the name of an entry in the parent directory's index which points to this object.
     This is used to inform a parent directory that it must update the given hash for this
     object when this object changes.

     Typically a top-level CasBasedDirectory will have a common_name and no filename, and
     subdirectories wil have a filename and no common_name. common_name can used to identify
     CasBasedDirectory objects in a log file, since they have no unique position in a file
     system.
     """

     # Two constants which define the separators used by the remote execution API.
     _pb2_path_sep = "/"
     _pb2_absolute_path_prefix = "/"

     def __init__(self, cas_cache, *, digest=None, parent=None, common_name="untitled", filename=None):
         self.filename = filename
         self.common_name = common_name
         self.cas_cache = cas_cache
         self.__digest = digest
         self.index = {}
         self.parent = parent
         self.__subtree_read_only = None

         if digest:
             self._populate_index(digest)

     # _clear():
     #
     # Remove all entries from this directory.
     #
     def _clear(self):
         self.__invalidate_digest()
         self.index = {}

     # _reset():
     #
     # Replace the contents of this directory with the entries from the specified
     # directory digest.
     #
     # Args:
     #     digest (Digest): The digest of the replacement directory
     #
     def _reset(self, *, digest=None):
         self._clear()

         if digest:
             self.__digest = digest
             self._populate_index(digest)

     def _populate_index(self, digest):
         try:
             pb2_directory = remote_execution_pb2.Directory()
             with open(self.cas_cache.objpath(digest), "rb") as f:
                 pb2_directory.ParseFromString(f.read())
         except FileNotFoundError as e:
             raise VirtualDirectoryError("Directory not found in local cache: {}".format(e)) from e

         for prop in pb2_directory.node_properties.properties:
             if prop.name == "SubtreeReadOnly":
                 self.__subtree_read_only = prop.value == "true"

         for entry in pb2_directory.directories:
             self.index[entry.name] = IndexEntry(entry.name, _FileType.DIRECTORY, digest=entry.digest)
         for entry in pb2_directory.files:
             if entry.node_properties.HasField("mtime"):
                 mtime = entry.node_properties.mtime
             else:
                 mtime = None

             self.index[entry.name] = IndexEntry(
                 entry.name,
                 _FileType.REGULAR_FILE,
                 digest=entry.digest,
                 is_executable=entry.is_executable,
                 mtime=mtime,
             )
         for entry in pb2_directory.symlinks:
             self.index[entry.name] = IndexEntry(entry.name, _FileType.SYMLINK, target=entry.target)

     def _find_self_in_parent(self):
         assert self.parent is not None
         parent = self.parent
         for (k, v) in parent.index.items():
             if v.buildstream_object == self:
                 return k
         return None

     def _add_directory(self, name):
         assert name not in self.index

         newdir = CasBasedDirectory(self.cas_cache, parent=self, filename=name)

         self.index[name] = IndexEntry(name, _FileType.DIRECTORY, buildstream_object=newdir)

         self.__invalidate_digest()

         return newdir

     def _add_file(self, name, path, modified=False, can_link=False, properties=None):
         digest = self.cas_cache.add_object(path=path)
         is_executable = os.access(path, os.X_OK)
         mtime = None
         if properties and "mtime" in properties:
             mtime = timestamp_pb2.Timestamp()
             utils._get_file_protobuf_mtimestamp(mtime, path)

         entry = IndexEntry(
             name,
             _FileType.REGULAR_FILE,
             digest=digest,
             is_executable=is_executable,
             modified=modified or name in self.index,
             mtime=mtime,
         )
         self.index[name] = entry

         self.__invalidate_digest()

     def _add_entry(self, entry: IndexEntry):
         self.index[entry.name] = entry.clone()
         self.__invalidate_digest()

     def _contains_entry(self, entry: IndexEntry) -> bool:
         return entry == self.index.get(entry.name)

     # _apply_changes():
     #
     # Apply changes from dir_a to dir_b to this directory. The use
     # case for this is to merge changes between different workspace
     # versions into a buildtree.
     #
     # If a change was made both to this directory, as well as between
     # the given directories, it is applied, overwriting any changes to
     # this directory. This is desirable because we want to keep user
     # changes, however it may need to be re-considered for other use
     # cases.
     #
     # We perform this computation this way, instead of with a _diff
     # method and a subsequent _apply_diff, because it prevents leaking
     # IndexEntry objects, which contain mutable references and may
     # therefore cause problems if used outside of this class.
     #
     # Args:
     #     dir_a: The directory from which to start computing differences.
     #     dir_b: The directory whose changes to apply
     #
     def _apply_changes(self, dir_a: "CasBasedDirectory", dir_b: "CasBasedDirectory"):
         # If the digests are the same, the directories are the same
         # (child properties affect the digest). We can skip any work
         # in such a case.
         if dir_a._get_digest() == dir_b._get_digest():
             return

         def get_subdir(entry: IndexEntry, directory: CasBasedDirectory) -> CasBasedDirectory:
             return directory.index[entry.name].get_directory(directory)

         def is_dir_in(entry: IndexEntry, directory: CasBasedDirectory) -> bool:
             return directory.index[entry.name].type == _FileType.DIRECTORY

         # We first check which files were added, and add them to our
         # directory.
         for entry in dir_b.index.values():
             if self._contains_entry(entry):
                 # We can short-circuit checking entries from b that
                 # already exist in our index.
                 continue

             if not dir_a._contains_entry(entry):
                 if entry.name in self.index and is_dir_in(entry, self) and is_dir_in(entry, dir_b):
                     # If the entry changed, and is a directory in both
                     # the current and to-merge-into tree, we need to
                     # merge recursively.

                     # If the entry is not a directory in dir_a, we
                     # want to overwrite the file, but we need an empty
                     # directory for recursion.
                     if entry.name in dir_a.index and is_dir_in(entry, dir_a):
                         sub_a = get_subdir(entry, dir_a)
                     else:
                         sub_a = CasBasedDirectory(dir_a.cas_cache)

                     subdir = get_subdir(entry, self)
                     subdir._apply_changes(sub_a, get_subdir(entry, dir_b))
                 else:
                     # In any other case, we just add/overwrite the file/directory
                     self._add_entry(entry)

         # We can't iterate and remove entries at the same time
         to_remove = [entry for entry in dir_a.index.values() if entry.name not in dir_b.index]
         for entry in to_remove:
             self.remove(entry.name, recursive=True)

         self.__invalidate_digest()

     def _add_new_link_direct(self, name, target):
         self.index[name] = IndexEntry(name, _FileType.SYMLINK, target=target, modified=name in self.index)

         self.__invalidate_digest()

     def remove(self, *path, recursive=False):
         if len(path) > 1:
             # Delegate remove to subdirectory
             subdir = self.descend(*path[:-1])
             subdir.remove(path[-1], recursive=recursive)
             return

         name = path[0]
         self.__validate_path_component(name)
         entry = self.index.get(name)
         if not entry:
             raise FileNotFoundError("{} not found in {}".format(name, str(self)))

         if entry.type == _FileType.DIRECTORY and not recursive:
             subdir = entry.get_directory(self)
             if not subdir.is_empty():
                 raise VirtualDirectoryError("{} is not empty".format(str(subdir)))

         del self.index[name]
         self.__invalidate_digest()

     def rename(self, src, dest):
         srcdir = self.descend(*src[:-1])
         entry = srcdir._entry_from_path(src[-1])

         destdir = self.descend(*dest[:-1])
         self.__validate_path_component(dest[-1])

         srcdir.remove(src[-1], recursive=True)
         entry.name = dest[-1]
         destdir._add_entry(entry)

     def descend(self, *paths, create=False, follow_symlinks=False):
         """Descend one or more levels of directory hierarchy and return a new
         Directory object for that directory.

         Arguments:
         * *paths (str): A list of strings which are all directory names.
         * create (boolean): If this is true, the directories will be created if
           they don't already exist.

         Note: At the moment, creating a directory by descending does
         not update this object in the CAS cache. However, performing
         an import_files() into a subdirectory of any depth obtained by
         descending from this object *will* cause this directory to be
         updated and stored.

         """

         current_dir = self
         paths = list(paths)

         for path in paths:
             # Skip empty path segments
             if not path:
                 continue

             self.__validate_path_component(path)

             entry = current_dir.index.get(path)

             if entry:
                 if entry.type == _FileType.DIRECTORY:
                     current_dir = entry.get_directory(current_dir)
                 elif follow_symlinks and entry.type == _FileType.SYMLINK:
                     linklocation = entry.target
                     newpaths = linklocation.split(os.path.sep)
                     if os.path.isabs(linklocation):
                         current_dir = current_dir._find_root().descend(*newpaths, follow_symlinks=True)
                     else:
                         current_dir = current_dir.descend(*newpaths, follow_symlinks=True)
                 else:
                     error = "Cannot descend into {}, which is a '{}' in the directory {}"
                     raise VirtualDirectoryError(
                         error.format(path, current_dir.index[path].type, current_dir), reason="not-a-directory"
                     )
             else:
                 if path == ".":
                     continue
                 if path == "..":
                     if current_dir.parent is not None:
                         current_dir = current_dir.parent
                     # In POSIX /.. == / so just stay at the root dir
                     continue
                 if create:
                     current_dir = current_dir._add_directory(path)
                 else:
                     error = "'{}' not found in {}"
                     raise VirtualDirectoryError(error.format(path, str(current_dir)), reason="directory-not-found")

         return current_dir

     def _check_replacement(self, name, relative_pathname, fileListResult):
         """ Checks whether 'name' exists, and if so, whether we can overwrite it.
         If we can, add the name to 'overwritten_files' and delete the existing entry.
         Returns 'True' if the import should go ahead.
         fileListResult.overwritten and fileListResult.ignore are updated depending
         on the result. """
         existing_entry = self.index.get(name)
         if existing_entry is None:
             return True
         elif existing_entry.type == _FileType.DIRECTORY:
             # If 'name' maps to a DirectoryNode, then there must be an entry in index
             # pointing to another Directory.
             subdir = existing_entry.get_directory(self)
             if subdir.is_empty():
                 self.remove(name)
                 fileListResult.overwritten.append(relative_pathname)
                 return True
             else:
                 # We can't overwrite a non-empty directory, so we just ignore it.
                 fileListResult.ignored.append(relative_pathname)
                 return False
         else:
             self.remove(name)
             fileListResult.overwritten.append(relative_pathname)
             return True

     def _partial_import_cas_into_cas(self, source_directory, filter_callback, *, path_prefix="", origin=None, result):
         """ Import files from a CAS-based directory. """
         if origin is None:
             origin = self

         for name, entry in source_directory.index.items():
             # The destination filename, relative to the root where the import started
             relative_pathname = os.path.join(path_prefix, name)

             is_dir = entry.type == _FileType.DIRECTORY

             if is_dir:
                 create_subdir = name not in self.index

                 if create_subdir and not filter_callback:
                     # If subdirectory does not exist yet and there is no filter,
                     # we can import the whole source directory by digest instead
                     # of importing each directory entry individually.
                     subdir_digest = entry.get_digest()
                     dest_entry = IndexEntry(name, _FileType.DIRECTORY, digest=subdir_digest)
                     self.index[name] = dest_entry
                     self.__invalidate_digest()

                     # However, we still need to iterate over the directory entries
                     # to fill in `result.files_written`.

                     # Use source subdirectory object if it already exists,
                     # otherwise create object for destination subdirectory.
                     # This is based on the assumption that the destination
                     # subdirectory is more likely to be modified later on
                     # (e.g., by further import_files() calls).
                     if entry.buildstream_object:
                         subdir = entry.buildstream_object
                     else:
                         subdir = dest_entry.get_directory(self)

                     subdir.__add_files_to_result(path_prefix=relative_pathname, result=result)
                 else:
                     src_subdir = source_directory.descend(name)
                     if src_subdir == origin:
                         continue

                     try:
                         dest_subdir = self.descend(name, create=create_subdir)
                     except VirtualDirectoryError:
                         filetype = self.index[name].type
                         raise VirtualDirectoryError(
                             "Destination is a {}, not a directory: /{}".format(filetype, relative_pathname)
                         )

                     dest_subdir._partial_import_cas_into_cas(
                         src_subdir, filter_callback, path_prefix=relative_pathname, origin=origin, result=result
                     )

             if filter_callback and not filter_callback(relative_pathname):
                 if is_dir and create_subdir and dest_subdir.is_empty():
                     # Complete subdirectory has been filtered out, remove it
                     self.remove(name)

                 # Entry filtered out, move to next
                 continue

             if not is_dir:
                 if self._check_replacement(name, relative_pathname, result):
                     if entry.type == _FileType.REGULAR_FILE:
                         self._add_entry(entry)
                         self.index[entry.name].modified = True
                     else:
                         assert entry.type == _FileType.SYMLINK
                         self._add_new_link_direct(name=name, target=entry.target)
                     result.files_written.append(relative_pathname)

     def import_files(
         self,
         external_pathspec,
         *,
         filter_callback=None,
         report_written=True,
         update_mtime=None,
         can_link=False,
         properties=None
     ):
         """ See superclass Directory for arguments """

         result = FileListResult()

         if isinstance(external_pathspec, FileBasedDirectory):
             external_pathspec = external_pathspec._get_underlying_directory()

         if isinstance(external_pathspec, str):
             # Import files from local filesystem by first importing complete
             # directory into CAS (using buildbox-casd) and then importing its
             # content into this CasBasedDirectory using CAS-to-CAS import
             # to write the report, handle possible conflicts (if the target
             # directory is not empty) and apply the optional filter.
             digest = self.cas_cache.import_directory(external_pathspec, properties=properties)
             external_pathspec = CasBasedDirectory(self.cas_cache, digest=digest)

         assert isinstance(external_pathspec, CasBasedDirectory)
         self._partial_import_cas_into_cas(external_pathspec, filter_callback, result=result)

         # TODO: No notice is taken of report_written or update_mtime.
         # Current behaviour is to fully populate the report, which is inefficient,
         # but still correct.

         return result

     def import_single_file(self, external_pathspec, properties=None):
         result = FileListResult()
         if self._check_replacement(os.path.basename(external_pathspec), os.path.dirname(external_pathspec), result):
             self._add_file(
                 os.path.basename(external_pathspec),
                 external_pathspec,
                 modified=os.path.basename(external_pathspec) in result.overwritten,
                 properties=properties,
             )
             result.files_written.append(external_pathspec)
         return result

     def set_deterministic_user(self):
         """ Sets all files in this directory to the current user's euid/egid.
         We also don't store user data, so this can be ignored.
         """

     def export_files(self, to_directory, *, can_link=False, can_destroy=False):
         """Copies everything from this into to_directory, which must be the name
         of a traditional filesystem directory.

         Arguments:

         to_directory (string): a path outside this directory object
         where the contents will be copied to.

         can_link (bool): Whether we can create hard links in to_directory
         instead of copying.

         can_destroy (bool): Whether we can destroy elements in this
         directory to export them (e.g. by renaming them as the
         target).

         """

         self.cas_cache.checkout(to_directory, self._get_digest(), can_link=can_link)

     def export_to_tar(self, tarfile, destination_dir, mtime=BST_ARBITRARY_TIMESTAMP):
         for filename, entry in sorted(self.index.items()):
             arcname = os.path.join(destination_dir, filename)
             if entry.type == _FileType.DIRECTORY:
                 tarinfo = tarfilelib.TarInfo(arcname)
                 tarinfo.mtime = mtime
                 tarinfo.type = tarfilelib.DIRTYPE
                 tarinfo.mode = 0o755
                 tarfile.addfile(tarinfo)
                 self.descend(filename).export_to_tar(tarfile, arcname, mtime)
             elif entry.type == _FileType.REGULAR_FILE:
                 source_name = self.cas_cache.objpath(entry.digest)
                 tarinfo = tarfilelib.TarInfo(arcname)
                 tarinfo.mtime = mtime
                 if entry.is_executable:
                     tarinfo.mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
                 tarinfo.size = os.path.getsize(source_name)
                 with open(source_name, "rb") as f:
                     tarfile.addfile(tarinfo, f)
             elif entry.type == _FileType.SYMLINK:
                 tarinfo = tarfilelib.TarInfo(arcname)
                 tarinfo.mtime = mtime
                 tarinfo.mode = 0o777
                 tarinfo.linkname = entry.target
                 tarinfo.type = tarfilelib.SYMTYPE
                 f = StringIO(entry.target)
                 tarfile.addfile(tarinfo, f)
             else:
                 raise VirtualDirectoryError("can not export file type {} to tar".format(entry.type))

     def _mark_changed(self):
         """ It should not be possible to externally modify a CAS-based
         directory at the moment."""
         raise NotImplementedError()

     def is_empty(self):
         """ Return true if this directory has no files, subdirectories or links in it.
         """
         return len(self.index) == 0

     def _mark_directory_unmodified(self):
         # Marks all entries in this directory and all child directories as unmodified.
         for i in self.index.values():
             i.modified = False
             if i.type == _FileType.DIRECTORY and i.buildstream_object:
                 i.buildstream_object._mark_directory_unmodified()

     def _mark_entry_unmodified(self, name):
         # Marks an entry as unmodified. If the entry is a directory, it will
         # recursively mark all its tree as unmodified.
         self.index[name].modified = False
         if self.index[name].buildstream_object:
             self.index[name].buildstream_object._mark_directory_unmodified()

     def mark_unmodified(self):
         """ Marks all files in this directory (recursively) as unmodified.
         If we have a parent, we mark our own entry as unmodified in that parent's
         index.
         """
         if self.parent:
             self.parent._mark_entry_unmodified(self._find_self_in_parent())
         else:
             self._mark_directory_unmodified()

     def _lightweight_resolve_to_index(self, path):
         """A lightweight function for transforming paths into IndexEntry
         objects. This does not follow symlinks.

         path: The string to resolve. This should be a series of path
         components separated by the protocol buffer path separator
         _pb2_path_sep.

         Returns: the IndexEntry found, or None if any of the path components were not present.

         """
         directory = self
         path_components = path.split(CasBasedDirectory._pb2_path_sep)
         for component in path_components[:-1]:
             if component not in directory.index:
                 return None
             if directory.index[component].type == _FileType.DIRECTORY:
                 directory = directory.index[component].get_directory(self)
             else:
                 return None
         return directory.index.get(path_components[-1], None)

     def list_modified_paths(self):
         """Provide a list of relative paths which have been modified since the
         last call to mark_unmodified.

         Return value: List(str) - list of modified paths
         """

         for p in self.list_relative_paths():
             i = self._lightweight_resolve_to_index(p)
             if i and i.modified:
                 yield p

     def list_relative_paths(self):
         """Provide a list of all relative paths.

         Yields:
           (List(str)) - list of all files with relative paths.

         """
         yield from self._list_prefixed_relative_paths()

     def _list_prefixed_relative_paths(self, prefix=""):
         """Provide a list of all relative paths.

         Arguments:
           prefix (str): an optional prefix to the relative paths, this is
                         also emitted by itself.

         Yields:
           (List(str)) - list of all files with relative paths.

         """

         file_list = list(filter(lambda i: i[1].type != _FileType.DIRECTORY, self.index.items()))
         directory_list = filter(lambda i: i[1].type == _FileType.DIRECTORY, self.index.items())

         if prefix != "":
             yield prefix

         for (k, v) in sorted(file_list):
             yield os.path.join(prefix, k)

         for (k, v) in sorted(directory_list):
             subdir = v.get_directory(self)
             yield from subdir._list_prefixed_relative_paths(prefix=os.path.join(prefix, k))

     def walk(self):
         """Provide a list of dictionaries containing information about the files.

         Yields:
           info (dict) - a dictionary containing name, type and size of the files.

         """
         yield from self._walk()

     def _walk(self, prefix=""):
         """ Walk through the files, collecting the required data

         Arguments:
           prefix (str): an optional prefix to the relative paths, this is
                         also emitted by itself.

         Yields:
           info (dict) - a dictionary containing name, type and size of the files.

           """
         for leaf in sorted(self.index.keys()):
             entry = self.index[leaf]
             info = {"name": os.path.join(prefix, leaf), "type": entry.type}
             if entry.type == _FileType.REGULAR_FILE:
                 info["executable"] = entry.is_executable
                 info["size"] = self.get_size()
             elif entry.type == _FileType.SYMLINK:
                 info["target"] = entry.target
                 info["size"] = len(entry.target)
             if entry.type == _FileType.DIRECTORY:
                 directory = entry.get_directory(self)
                 info["size"] = len(directory.index)
                 yield info
                 yield from directory._walk(os.path.join(prefix, leaf))
             else:
                 yield info

     def get_size(self):
         digest = self._get_digest()
         total = digest.size_bytes
         for i in self.index.values():
             if i.type == _FileType.DIRECTORY:
                 subdir = i.get_directory(self)
                 total += subdir.get_size()
             elif i.type == _FileType.REGULAR_FILE:
                 total += i.digest.size_bytes
             # Symlink nodes are encoded as part of the directory serialization.
         return total

     def _get_identifier(self):
         path = ""
         if self.parent:
             path = self.parent._get_identifier()
         if self.filename:
             path += "/" + self.filename
         else:
             path += "/" + self.common_name
         return path

     @contextmanager
     def open_file(self, *path: str, mode: str = "r"):
         subdir = self.descend(*path[:-1])
         self.__validate_path_component(path[-1])
         entry = subdir.index.get(path[-1])

         if entry and entry.type != _FileType.REGULAR_FILE:
             raise VirtualDirectoryError("{} in {} is not a file".format(path[-1], str(subdir)))

         if mode not in ["r", "rb", "w", "wb", "w+", "w+b", "x", "xb", "x+", "x+b"]:
             raise ValueError("Unsupported mode: `{}`".format(mode))

         if "b" in mode:
             encoding = None
         else:
             encoding = "utf-8"

         if "r" in mode:
             if not entry:
                 raise FileNotFoundError("{} not found in {}".format(path[-1], str(subdir)))

             # Read-only access, allow direct access to CAS object
             with open(self.cas_cache.objpath(entry.digest), mode, encoding=encoding) as f:
                 yield f
         else:
             if "x" in mode and entry:
                 raise FileExistsError("{} already exists in {}".format(path[-1], str(subdir)))

             with utils._tempnamedfile(mode, encoding=encoding, dir=self.cas_cache.tmpdir) as f:
                 # Make sure the temporary file is readable by buildbox-casd
                 os.chmod(f.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
                 yield f
                 # Import written temporary file into CAS
                 f.flush()
                 subdir._add_file(path[-1], f.name, modified=True)

     def __str__(self):
         return "[CAS:{}]".format(self._get_identifier())

     def _get_underlying_directory(self):
         """ There is no underlying directory for a CAS-backed directory, so
         throw an exception. """
         raise VirtualDirectoryError(
             "_get_underlying_directory was called on a CAS-backed directory," + " which has no underlying directory."
         )

     def _find_root(self):
         """ Finds the root of this directory tree by following 'parent' until there is
         no parent. """
         if self.parent:
             return self.parent._find_root()
         else:
             return self

     # _get_digest():
     #
     # Return the Digest for this directory.
     #
     # Returns:
     #   (Digest): The Digest protobuf object for the Directory protobuf
     #
     def _get_digest(self):
         if not self.__digest:
             # Create updated Directory proto
             pb2_directory = remote_execution_pb2.Directory()

             if self.__subtree_read_only is not None:
                 node_property = pb2_directory.node_properties.properties.add()
                 node_property.name = "SubtreeReadOnly"
                 node_property.value = "true" if self.__subtree_read_only else "false"

             for name, entry in sorted(self.index.items()):
                 if entry.type == _FileType.DIRECTORY:
                     dirnode = pb2_directory.directories.add()
                     dirnode.name = name

                     # Update digests for subdirectories in DirectoryNodes.
                     # No need to call entry.get_directory().
                     # If it hasn't been instantiated, digest must be up-to-date.
                     subdir = entry.buildstream_object
                     if subdir:
                         dirnode.digest.CopyFrom(subdir._get_digest())
                     else:
                         dirnode.digest.CopyFrom(entry.digest)
                 elif entry.type == _FileType.REGULAR_FILE:
                     filenode = pb2_directory.files.add()
                     filenode.name = name
                     filenode.digest.CopyFrom(entry.digest)
                     filenode.is_executable = entry.is_executable
                     if entry.mtime is not None:
                         filenode.node_properties.mtime.CopyFrom(entry.mtime)
                 elif entry.type == _FileType.SYMLINK:
                     symlinknode = pb2_directory.symlinks.add()
                     symlinknode.name = name
                     symlinknode.target = entry.target

             self.__digest = self.cas_cache.add_object(buffer=pb2_directory.SerializeToString())

         return self.__digest

     def _entry_from_path(self, *path, follow_symlinks=False):
         subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
         self.__validate_path_component(path[-1])
         target = subdir.index.get(path[-1])
         if target is None:
             raise FileNotFoundError("{} not found in {}".format(path[-1], str(subdir)))

         if follow_symlinks and target.type == _FileType.SYMLINK:
             linklocation = target.target
             newpath = linklocation.split(os.path.sep)
             if os.path.isabs(linklocation):
                 return subdir._find_root()._entry_from_path(*newpath, follow_symlinks=True)
             return subdir._entry_from_path(*newpath, follow_symlinks=True)
         else:
             return target

     def exists(self, *path, follow_symlinks=False):
         try:
             self._entry_from_path(*path, follow_symlinks=follow_symlinks)
             return True
         except (VirtualDirectoryError, FileNotFoundError):
             return False

     def stat(self, *path, follow_symlinks=False):
         entry = self._entry_from_path(*path, follow_symlinks=follow_symlinks)

         st_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
         st_nlink = 1
         st_mtime = BST_ARBITRARY_TIMESTAMP

         if entry.type == _FileType.REGULAR_FILE:
             st_mode |= stat.S_IFREG
             st_size = entry.get_digest().size_bytes
         elif entry.type == _FileType.DIRECTORY:
             st_mode |= stat.S_IFDIR
             st_size = 0
         elif entry.type == _FileType.SYMLINK:
             st_mode |= stat.S_IFLNK
             st_size = len(entry.target)
         else:
             raise VirtualDirectoryError("Unsupported file type {}".format(entry.type))

         if entry.type == _FileType.DIRECTORY or entry.is_executable:
             st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH

         if entry.mtime is not None:
             st_mtime = utils._parse_protobuf_timestamp(entry.mtime)

         return os.stat_result((st_mode, 0, 0, st_nlink, 0, 0, st_size, st_mtime, st_mtime, st_mtime))

     def file_digest(self, *path):
         entry = self._entry_from_path(*path)
         if entry.type != _FileType.REGULAR_FILE:
             raise VirtualDirectoryError("Unsupported file type for digest: {}".format(entry.type))

         return entry.digest.hash

     def readlink(self, *path):
         entry = self._entry_from_path(*path)
         if entry.type != _FileType.SYMLINK:
             raise VirtualDirectoryError("Unsupported file type for readlink: {}".format(entry.type))

         return entry.target

     def __iter__(self):
         yield from self.index.keys()

     def _set_subtree_read_only(self, read_only):
         self.__subtree_read_only = read_only

         self.__invalidate_digest()

     def __invalidate_digest(self):
         if self.__digest:
             self.__digest = None
             if self.parent:
                 self.parent.__invalidate_digest()

     def __add_files_to_result(self, *, path_prefix="", result):
         for name, entry in self.index.items():
             # The destination filename, relative to the root where the import started
             relative_pathname = os.path.join(path_prefix, name)

             if entry.type == _FileType.DIRECTORY:
                 subdir = self.descend(name)
                 subdir.__add_files_to_result(path_prefix=relative_pathname, result=result)
             else:
                 result.files_written.append(relative_pathname)

     def __validate_path_component(self, path):
         if "/" in path:
             raise VirtualDirectoryError("Invalid path component: '{}'".format(path))