Merge branch 'juerg/vdirectory' into 'master'

storage: Improve Directory API

See merge request BuildStream/buildstream!1878
diff --git a/src/buildstream/element.py b/src/buildstream/element.py
index 8563eee..9efe97c 100644
--- a/src/buildstream/element.py
+++ b/src/buildstream/element.py
@@ -659,7 +659,7 @@
             # Hard link it into the staging area
             #
             vbasedir = sandbox.get_virtual_directory()
-            vstagedir = vbasedir if path is None else vbasedir.descend(*path.lstrip(os.sep).split(os.sep))
+            vstagedir = vbasedir if path is None else vbasedir.descend(*path.lstrip(os.sep).split(os.sep), create=True)
 
             split_filter = self.__split_filter_func(include, exclude, orphans)
 
diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py
index e33bdc3..c83918e 100644
--- a/src/buildstream/storage/_casbaseddirectory.py
+++ b/src/buildstream/storage/_casbaseddirectory.py
@@ -289,24 +289,47 @@
         # We can't iterate and remove entries at the same time
         to_remove = [entry for entry in dir_a.index.values() if entry.name not in dir_b.index]
         for entry in to_remove:
-            self.delete_entry(entry.name)
+            self.remove(entry.name, recursive=True)
 
         self.__invalidate_digest()
 
-    def _copy_link_from_filesystem(self, basename, filename):
-        self._add_new_link_direct(filename, os.readlink(os.path.join(basename, filename)))
-
     def _add_new_link_direct(self, name, target):
         self.index[name] = IndexEntry(name, _FileType.SYMLINK, target=target, modified=name in self.index)
 
         self.__invalidate_digest()
 
-    def delete_entry(self, name):
-        if name in self.index:
-            del self.index[name]
+    def remove(self, *path, recursive=False):
+        if len(path) > 1:
+            # Delegate remove to subdirectory
+            subdir = self.descend(*path[:-1])
+            subdir.remove(path[-1], recursive=recursive)
+            return
 
+        name = path[0]
+        self.__validate_path_component(name)
+        entry = self.index.get(name)
+        if not entry:
+            raise FileNotFoundError("{} not found in {}".format(name, str(self)))
+
+        if entry.type == _FileType.DIRECTORY and not recursive:
+            subdir = entry.get_directory(self)
+            if not subdir.is_empty():
+                raise VirtualDirectoryError("{} is not empty".format(str(subdir)))
+
+        del self.index[name]
         self.__invalidate_digest()
 
+    def rename(self, src, dest):
+        srcdir = self.descend(*src[:-1])
+        entry = srcdir._entry_from_path(src[-1])
+
+        destdir = self.descend(*dest[:-1])
+        self.__validate_path_component(dest[-1])
+
+        srcdir.remove(src[-1], recursive=True)
+        entry.name = dest[-1]
+        destdir._add_entry(entry)
+
     def descend(self, *paths, create=False, follow_symlinks=False):
         """Descend one or more levels of directory hierarchy and return a new
         Directory object for that directory.
@@ -332,6 +355,8 @@
             if not path:
                 continue
 
+            self.__validate_path_component(path)
+
             entry = current_dir.index.get(path)
 
             if entry:
@@ -379,7 +404,7 @@
             # pointing to another Directory.
             subdir = existing_entry.get_directory(self)
             if subdir.is_empty():
-                self.delete_entry(name)
+                self.remove(name)
                 fileListResult.overwritten.append(relative_pathname)
                 return True
             else:
@@ -387,7 +412,7 @@
                 fileListResult.ignored.append(relative_pathname)
                 return False
         else:
-            self.delete_entry(name)
+            self.remove(name)
             fileListResult.overwritten.append(relative_pathname)
             return True
 
@@ -448,7 +473,7 @@
             if filter_callback and not filter_callback(relative_pathname):
                 if is_dir and create_subdir and dest_subdir.is_empty():
                     # Complete subdirectory has been filtered out, remove it
-                    self.delete_entry(name)
+                    self.remove(name)
 
                 # Entry filtered out, move to next
                 continue
@@ -729,12 +754,13 @@
     @contextmanager
     def open_file(self, *path: str, mode: str = "r"):
         subdir = self.descend(*path[:-1])
+        self.__validate_path_component(path[-1])
         entry = subdir.index.get(path[-1])
 
         if entry and entry.type != _FileType.REGULAR_FILE:
             raise VirtualDirectoryError("{} in {} is not a file".format(path[-1], str(subdir)))
 
-        if mode not in ["r", "rb", "w", "wb", "x", "xb"]:
+        if mode not in ["r", "rb", "w", "wb", "w+", "w+b", "x", "xb", "x+", "x+b"]:
             raise ValueError("Unsupported mode: `{}`".format(mode))
 
         if "b" in mode:
@@ -825,22 +851,74 @@
 
         return self.__digest
 
+    def _entry_from_path(self, *path, follow_symlinks=False):
+        subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
+        self.__validate_path_component(path[-1])
+        target = subdir.index.get(path[-1])
+        if target is None:
+            raise FileNotFoundError("{} not found in {}".format(path[-1], str(subdir)))
+
+        if follow_symlinks and target.type == _FileType.SYMLINK:
+            linklocation = target.target
+            newpath = linklocation.split(os.path.sep)
+            if os.path.isabs(linklocation):
+                return subdir._find_root()._entry_from_path(*newpath, follow_symlinks=True)
+            return subdir._entry_from_path(*newpath, follow_symlinks=True)
+        else:
+            return target
+
     def exists(self, *path, follow_symlinks=False):
         try:
-            subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
-            target = subdir.index.get(path[-1])
-            if target is not None:
-                if follow_symlinks and target.type == _FileType.SYMLINK:
-                    linklocation = target.target
-                    newpath = linklocation.split(os.path.sep)
-                    if os.path.isabs(linklocation):
-                        return subdir._find_root().exists(*newpath, follow_symlinks=True)
-                    return subdir.exists(*newpath, follow_symlinks=True)
-                else:
-                    return True
+            self._entry_from_path(*path, follow_symlinks=follow_symlinks)
+            return True
+        except (VirtualDirectoryError, FileNotFoundError):
             return False
-        except VirtualDirectoryError:
-            return False
+
+    def stat(self, *path, follow_symlinks=False):
+        entry = self._entry_from_path(*path, follow_symlinks=follow_symlinks)
+
+        st_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+        st_nlink = 1
+        st_mtime = BST_ARBITRARY_TIMESTAMP
+
+        if entry.type == _FileType.REGULAR_FILE:
+            st_mode |= stat.S_IFREG
+            st_size = entry.get_digest().size_bytes
+        elif entry.type == _FileType.DIRECTORY:
+            st_mode |= stat.S_IFDIR
+            st_size = 0
+        elif entry.type == _FileType.SYMLINK:
+            st_mode |= stat.S_IFLNK
+            st_size = len(entry.target)
+        else:
+            raise VirtualDirectoryError("Unsupported file type {}".format(entry.type))
+
+        if entry.type == _FileType.DIRECTORY or entry.is_executable:
+            st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+
+        if entry.node_properties:
+            for prop in entry.node_properties:
+                if prop.name == "MTime" and prop.value:
+                    st_mtime = utils._parse_timestamp(prop.value)
+
+        return os.stat_result((st_mode, 0, 0, st_nlink, 0, 0, st_size, st_mtime, st_mtime, st_mtime))
+
+    def file_digest(self, *path):
+        entry = self._entry_from_path(*path)
+        if entry.type != _FileType.REGULAR_FILE:
+            raise VirtualDirectoryError("Unsupported file type for digest: {}".format(entry.type))
+
+        return entry.digest.hash
+
+    def readlink(self, *path):
+        entry = self._entry_from_path(*path)
+        if entry.type != _FileType.SYMLINK:
+            raise VirtualDirectoryError("Unsupported file type for readlink: {}".format(entry.type))
+
+        return entry.target
+
+    def __iter__(self):
+        yield from self.index.keys()
 
     def _set_subtree_read_only(self, read_only):
         self.__node_properties = list(filter(lambda prop: prop.name != "SubtreeReadOnly", self.__node_properties))
@@ -867,3 +945,7 @@
                 subdir.__add_files_to_result(path_prefix=relative_pathname, result=result)
             else:
                 result.files_written.append(relative_pathname)
+
+    def __validate_path_component(self, path):
+        if "/" in path:
+            raise VirtualDirectoryError("Invalid path component: '{}'".format(path))
diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py
index c492c41..1231229 100644
--- a/src/buildstream/storage/_filebaseddirectory.py
+++ b/src/buildstream/storage/_filebaseddirectory.py
@@ -236,28 +236,50 @@
     def get_size(self):
         return utils._get_dir_size(self.external_directory)
 
+    def stat(self, *path, follow_symlinks=False):
+        subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
+        newpath = os.path.join(subdir.external_directory, path[-1])
+        st = os.lstat(newpath)
+        if follow_symlinks and stat.S_ISLNK(st.st_mode):
+            linklocation = os.readlink(newpath)
+            newpath = linklocation.split(os.path.sep)
+            if os.path.isabs(linklocation):
+                return subdir._find_root().stat(*newpath, follow_symlinks=True)
+            return subdir.stat(*newpath, follow_symlinks=True)
+        else:
+            return st
+
     def exists(self, *path, follow_symlinks=False):
         try:
-            subdir = self.descend(*path[:-1], follow_symlinks=follow_symlinks)
-            newpath = os.path.join(subdir.external_directory, path[-1])
-            st = os.lstat(newpath)
-            if follow_symlinks and stat.S_ISLNK(st.st_mode):
-                linklocation = os.readlink(newpath)
-                newpath = linklocation.split(os.path.sep)
-                if os.path.isabs(linklocation):
-                    return subdir._find_root().exists(*newpath, follow_symlinks=True)
-                return subdir.exists(*newpath, follow_symlinks=True)
-            else:
-                return True
+            self.stat(*path, follow_symlinks=follow_symlinks)
+            return True
         except (VirtualDirectoryError, FileNotFoundError):
             return False
 
+    def file_digest(self, *path):
+        # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+        subdir = self.descend(*path[:-1])
+        if subdir.exists(path[-1]) and not subdir.isfile(path[-1]):
+            raise VirtualDirectoryError("Unsupported file type for digest")
+
+        newpath = os.path.join(subdir.external_directory, path[-1])
+        return utils.sha256sum(newpath)
+
+    def readlink(self, *path):
+        # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+        subdir = self.descend(*path[:-1])
+        if subdir.exists(path[-1]) and not subdir.islink(path[-1]):
+            raise VirtualDirectoryError("Unsupported file type for readlink")
+
+        newpath = os.path.join(subdir.external_directory, path[-1])
+        return os.readlink(newpath)
+
     def open_file(self, *path: str, mode: str = "r"):
         # Use descend() to avoid following symlinks (potentially escaping the sandbox)
         subdir = self.descend(*path[:-1])
         newpath = os.path.join(subdir.external_directory, path[-1])
 
-        if mode not in ["r", "rb", "w", "wb", "x", "xb"]:
+        if mode not in ["r", "rb", "w", "wb", "w+", "w+b", "x", "xb", "x+", "x+b"]:
             raise ValueError("Unsupported mode: `{}`".format(mode))
 
         if "b" in mode:
@@ -268,8 +290,42 @@
         if "r" in mode:
             return open(newpath, mode=mode, encoding=encoding)
         else:
+            if "x" in mode:
+                # This check is not atomic, however, we're operating with a
+                # single thread in a private directory tree.
+                if subdir.exists(path[-1]):
+                    raise FileExistsError("{} already exists in {}".format(path[-1], str(subdir)))
+                mode = "w" + mode[1:]
+
             return utils.save_file_atomic(newpath, mode=mode, encoding=encoding)
 
+    def remove(self, *path, recursive=False):
+        # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+        subdir = self.descend(*path[:-1])
+        newpath = os.path.join(subdir.external_directory, path[-1])
+
+        if subdir._get_filetype(path[-1]) == _FileType.DIRECTORY:
+            if recursive:
+                shutil.rmtree(newpath)
+            else:
+                os.rmdir(newpath)
+        else:
+            os.unlink(newpath)
+
+    def rename(self, src, dest):
+        # Use descend() to avoid following symlinks (potentially escaping the sandbox)
+        srcdir = self.descend(*src[:-1])
+        destdir = self.descend(*dest[:-1])
+        srcpath = os.path.join(srcdir.external_directory, src[-1])
+        destpath = os.path.join(destdir.external_directory, dest[-1])
+
+        if destdir.exists(dest[-1]):
+            destdir.remove(dest[-1])
+        os.rename(srcpath, destpath)
+
+    def __iter__(self):
+        yield from os.listdir(self.external_directory)
+
     def __str__(self):
         # This returns the whole path (since we don't know where the directory started)
         # which exposes the sandbox directory; we will have to assume for the time being
diff --git a/src/buildstream/storage/directory.py b/src/buildstream/storage/directory.py
index bb9d78f..f9ea404 100644
--- a/src/buildstream/storage/directory.py
+++ b/src/buildstream/storage/directory.py
@@ -32,6 +32,8 @@
 """
 
 
+import os
+import stat
 from typing import Callable, Optional, Union, List
 
 from .._exceptions import BstError
@@ -208,6 +210,66 @@
         """
         raise NotImplementedError()
 
+    def stat(self, *paths: str, follow_symlinks: bool = False) -> os.stat_result:
+        """ Get the status of a file.
+
+        Args:
+          *paths: A list of strings which are all path components.
+          follow_symlinks: True to follow symlinks.
+
+        Returns:
+          A `os.stat_result` object.
+        """
+        raise NotImplementedError()
+
+    def isfile(self, *paths: str, follow_symlinks: bool = False) -> bool:
+        """ Check whether the specified path is an existing regular file.
+
+        Args:
+          *paths: A list of strings which are all path components.
+          follow_symlinks: True to follow symlinks.
+
+        Returns:
+          True if the path is an existing regular file, False otherwise.
+        """
+        try:
+            st = self.stat(*paths, follow_symlinks=follow_symlinks)
+            return stat.S_ISREG(st.st_mode)
+        except (VirtualDirectoryError, FileNotFoundError):
+            return False
+
+    def isdir(self, *paths: str, follow_symlinks: bool = False) -> bool:
+        """ Check whether the specified path is an existing directory.
+
+        Args:
+          *paths: A list of strings which are all path components.
+          follow_symlinks: True to follow symlinks.
+
+        Returns:
+          True if the path is an existing directory, False otherwise.
+        """
+        try:
+            st = self.stat(*paths, follow_symlinks=follow_symlinks)
+            return stat.S_ISDIR(st.st_mode)
+        except (VirtualDirectoryError, FileNotFoundError):
+            return False
+
+    def islink(self, *paths: str, follow_symlinks: bool = False) -> bool:
+        """ Check whether the specified path is an existing symlink.
+
+        Args:
+          *paths: A list of strings which are all path components.
+          follow_symlinks: True to follow symlinks.
+
+        Returns:
+          True if the path is an existing symlink, False otherwise.
+        """
+        try:
+            st = self.stat(*paths, follow_symlinks=follow_symlinks)
+            return stat.S_ISLNK(st.st_mode)
+        except (VirtualDirectoryError, FileNotFoundError):
+            return False
+
     def open_file(self, *paths: str, mode: str = "r"):
         """ Open file and return a corresponding file object. In text mode,
         UTF-8 is used as encoding.
@@ -218,6 +280,42 @@
         """
         raise NotImplementedError()
 
+    def file_digest(self, *paths: str) -> str:
+        """ Return a digest of a file. The digest algorithm is implementation-
+        defined.
+
+        Args:
+          *paths: A list of strings which are all path components.
+        """
+        raise NotImplementedError()
+
+    def readlink(self, *paths: str) -> str:
+        """ Return a string representing the path to which the symbolic link points.
+
+        Args:
+          *paths: A list of strings which are all path components.
+        """
+        raise NotImplementedError()
+
+    def remove(self, *paths: str, recursive: bool = False):
+        """ Remove a file, symlink or directory. Symlinks are not followed.
+
+        Args:
+          *paths: A list of strings which are all path components.
+          recursive: True to delete non-empty directories.
+        """
+        raise NotImplementedError()
+
+    def rename(self, src: List[str], dest: List[str]):
+        """ Rename a file, symlink or directory. If destination path exists
+        already and is a file or empty directory, it will be replaced.
+
+        Args:
+          *src: Source path components.
+          *dest: Destination path components.
+        """
+        raise NotImplementedError()
+
     def _create_empty_file(self, *paths):
         with self.open_file(*paths, mode="w"):
             pass
diff --git a/tests/internals/storage.py b/tests/internals/storage.py
index 2e636cc..ea3f59b 100644
--- a/tests/internals/storage.py
+++ b/tests/internals/storage.py
@@ -2,7 +2,9 @@
 import os
 import pprint
 import shutil
+import stat
 import glob
+import hashlib
 from pathlib import Path
 from typing import List, Optional
 
@@ -11,7 +13,7 @@
 from buildstream._cas import CASCache
 from buildstream.storage._casbaseddirectory import CasBasedDirectory
 from buildstream.storage._filebaseddirectory import FileBasedDirectory
-from buildstream.storage.directory import _FileType
+from buildstream.storage.directory import _FileType, VirtualDirectoryError
 
 DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "storage")
 
@@ -19,7 +21,9 @@
 @contextmanager
 def setup_backend(backend_class, tmpdir):
     if backend_class == FileBasedDirectory:
-        yield backend_class(os.path.join(tmpdir, "vdir"))
+        path = os.path.join(tmpdir, "vdir")
+        os.mkdir(path)
+        yield backend_class(path)
     else:
         cas_cache = CASCache(os.path.join(tmpdir, "cas"), log_directory=os.path.join(tmpdir, "logs"))
         try:
@@ -216,6 +220,94 @@
         )
 
 
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_file_types(tmpdir, datafiles, backend):
+    with setup_backend(backend, str(tmpdir)) as c:
+        c.import_files(os.path.join(str(datafiles), "merge-link"))
+
+        # Test __iter__
+        assert set(c) == {"link", "root-file", "subdirectory"}
+
+        assert c.exists("root-file")
+        assert c.isfile("root-file")
+        assert not c.isdir("root-file")
+        assert not c.islink("root-file")
+
+        st = c.stat("root-file")
+        assert stat.S_ISREG(st.st_mode)
+
+        assert c.exists("link")
+        assert c.islink("link")
+        assert not c.isfile("link")
+        assert c.readlink("link") == "root-file"
+
+        st = c.stat("link")
+        assert stat.S_ISLNK(st.st_mode)
+
+        assert c.exists("subdirectory")
+        assert c.isdir("subdirectory")
+        assert not c.isfile("subdirectory")
+        subdir = c.descend("subdirectory")
+        assert set(subdir) == {"subdir-file"}
+
+        st = c.stat("subdirectory")
+        assert stat.S_ISDIR(st.st_mode)
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_open_file(tmpdir, datafiles, backend):
+    with setup_backend(backend, str(tmpdir)) as c:
+        assert not c.isfile("hello")
+
+        with c.open_file("hello", mode="w") as f:
+            f.write("world")
+        assert c.isfile("hello")
+
+        assert c.file_digest("hello") == hashlib.sha256(b"world").hexdigest()
+
+        with c.open_file("hello", mode="r") as f:
+            assert f.read() == "world"
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_remove(tmpdir, datafiles, backend):
+    with setup_backend(backend, str(tmpdir)) as c:
+        c.import_files(os.path.join(str(datafiles), "merge-link"))
+
+        with pytest.raises((OSError, VirtualDirectoryError)):
+            c.remove("subdirectory")
+
+        with pytest.raises(FileNotFoundError):
+            c.remove("subdirectory", "does-not-exist")
+
+        # Check that `remove()` doesn't follow symlinks
+        c.remove("link")
+        assert not c.exists("link")
+        assert c.exists("root-file")
+
+        c.remove("subdirectory", recursive=True)
+        assert not c.exists("subdirectory")
+
+        # Removing an empty directory does not require recursive=True
+        c.descend("empty-directory", create=True)
+        c.remove("empty-directory")
+
+
+@pytest.mark.parametrize("backend", [FileBasedDirectory, CasBasedDirectory])
+@pytest.mark.datafiles(DATA_DIR)
+def test_rename(tmpdir, datafiles, backend):
+    with setup_backend(backend, str(tmpdir)) as c:
+        c.import_files(os.path.join(str(datafiles), "original"))
+
+        c.rename(["bin", "hello"], ["bin", "hello2"])
+        c.rename(["bin"], ["bin2"])
+
+        assert c.isfile("bin2", "hello2")
+
+
 # This is purely for error output; lists relative paths and
 # their digests so differences are human-grokkable
 def list_relative_paths(directory):