Merge pull request #1522 from apache/tristan/shell-checkout-always-pull

Unconditional pull of artifacts in `bst shell` and `bst artifact checkout`
diff --git a/NEWS b/NEWS
index 18c81c4..74d32bc 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@
 ---
   o BREAKING CHANGE: Removed `--deps plan` from all commands
 
+  o The `--pull` option is removed from the `bst shell` and `bst artifact checkout`
+    commands, which will now unconditionally try to pull required artifacts.
+
 
 ==================
 buildstream 1.93.6
diff --git a/src/buildstream/_frontend/cli.py b/src/buildstream/_frontend/cli.py
index 4c3df63..d2fcc62 100644
--- a/src/buildstream/_frontend/cli.py
+++ b/src/buildstream/_frontend/cli.py
@@ -633,12 +633,10 @@
     "cli_buildtree",
     is_flag=True,
     help=(
-        "Stage a buildtree. Will fail if a buildtree is not available."
-        " --pull and pull-buildtrees configuration is needed "
-        "if trying to query for remotely cached buildtrees."
+        "Stage a buildtree. Will fail if a buildtree is not available. "
+        "pull-buildtrees configuration is needed if the buildtree is not available locally."
     ),
 )
-@click.option("--pull", "pull_", is_flag=True, help="Attempt to pull missing or incomplete artifacts")
 @click.option(
     "--artifact-remote",
     "artifact_remotes",
@@ -672,7 +670,6 @@
     isolate,
     build_,
     cli_buildtree,
-    pull_,
     artifact_remotes,
     source_remotes,
     ignore_project_artifact_remotes,
@@ -723,7 +720,6 @@
                 isolate=isolate,
                 command=command,
                 usebuildtree=cli_buildtree,
-                pull_=pull_,
                 artifact_remotes=artifact_remotes,
                 source_remotes=source_remotes,
                 ignore_project_artifact_remotes=ignore_project_artifact_remotes,
@@ -1273,7 +1269,6 @@
     type=click.Choice(["gz", "xz", "bz2"]),
     help="The compression option of the tarball created.",
 )
-@click.option("--pull", "pull_", is_flag=True, help="Pull the artifact if it's missing or incomplete.")
 @click.option(
     "--directory", default=None, type=click.Path(file_okay=False), help="The directory to checkout the artifact to"
 )
@@ -1299,7 +1294,6 @@
     hardlinks,
     tar,
     compression,
-    pull_,
     directory,
     artifact_remotes,
     ignore_project_artifact_remotes,
@@ -1361,7 +1355,6 @@
             selection=deps,
             integrate=True if integrate is None else integrate,
             hardlinks=hardlinks,
-            pull=pull_,
             compression=compression,
             tar=bool(tar),
             artifact_remotes=artifact_remotes,
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index a1a12fe..6fcfd12 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -246,7 +246,6 @@
     #    isolate (bool): Whether to isolate the environment like we do in builds
     #    command (list): An argv to launch in the sandbox, or None
     #    usebuildtree (bool): Whether to use a buildtree as the source, given cli option
-    #    pull_ (bool): Whether to attempt to pull missing or incomplete artifacts
     #    artifact_remotes: Artifact cache remotes specified on the commmand line
     #    source_remotes: Source cache remotes specified on the commmand line
     #    ignore_project_artifact_remotes: Whether to ignore artifact remotes specified by projects
@@ -266,7 +265,6 @@
         isolate: bool = False,
         command: Optional[List[str]] = None,
         usebuildtree: bool = False,
-        pull_: bool = False,
         artifact_remotes: Iterable[RemoteSpec] = (),
         source_remotes: Iterable[RemoteSpec] = (),
         ignore_project_artifact_remotes: bool = False,
@@ -283,7 +281,7 @@
             elements = self.load_selection(
                 (target,),
                 selection=selection,
-                connect_artifact_cache=pull_,
+                connect_artifact_cache=True,
                 connect_source_cache=True,
                 artifact_remotes=artifact_remotes,
                 source_remotes=source_remotes,
@@ -297,17 +295,12 @@
             element = self.targets[0]
             element._set_required(scope)
 
-            self.query_cache([element] + elements)
-
-            if pull_:
-                self._reset()
-                self._add_queue(PullQueue(self._scheduler))
-
-                # Pull the toplevel element regardless of whether it is in scope
-                plan = elements if element in elements else [element] + elements
-
-                self._enqueue_plan(plan)
-                self._run()
+            # Check whether the required elements are cached, and then
+            # try to pull them if they are not already cached.
+            #
+            pull_elements = [element] + elements
+            self.query_cache(pull_elements)
+            self._pull_missing_artifacts(pull_elements)
 
         missing_deps = [dep for dep in _pipeline.dependencies([element], scope) if not dep._cached()]
         if missing_deps:
@@ -320,12 +313,11 @@
         # Check if we require a pull queue attempt, with given artifact state and context
         if usebuildtree:
             if not element._cached_buildtree():
-                remotes_message = " or in available remotes" if pull_ else ""
                 if not element._cached():
-                    message = "Artifact not cached locally" + remotes_message
+                    message = "Artifact not cached locally or in available remotes"
                     reason = "missing-buildtree-artifact-not-cached"
                 elif element._buildtree_exists():
-                    message = "Buildtree is not cached locally" + remotes_message
+                    message = "Buildtree is not cached locally or in available remotes"
                     reason = "missing-buildtree-artifact-buildtree-not-cached"
                 else:
                     message = "Artifact was created without buildtree"
@@ -655,8 +647,6 @@
     #         will be placed at the given location. If true and
     #         location is '-', the tarball will be dumped on the
     #         standard output.
-    #    pull: If true will attempt to pull any missing or incomplete
-    #          artifacts.
     #    artifact_remotes: Artifact cache remotes specified on the commmand line
     #    ignore_project_artifact_remotes: Whether to ignore artifact remotes specified by projects
     #
@@ -670,7 +660,6 @@
         integrate: bool = True,
         hardlinks: bool = False,
         compression: str = "",
-        pull: bool = False,
         tar: bool = False,
         artifact_remotes: Iterable[RemoteSpec] = (),
         ignore_project_artifact_remotes: bool = False,
@@ -681,7 +670,7 @@
             selection=selection,
             load_artifacts=True,
             attempt_artifact_metadata=True,
-            connect_artifact_cache=pull,
+            connect_artifact_cache=True,
             artifact_remotes=artifact_remotes,
             ignore_project_artifact_remotes=ignore_project_artifact_remotes,
         )
@@ -694,15 +683,11 @@
 
         self._check_location_writable(location, force=force, tar=tar)
 
+        # Check whether the required elements are cached, and then
+        # try to pull them if they are not already cached.
+        #
         self.query_cache(elements)
-
-        uncached_elts = [elt for elt in elements if elt._pull_pending()]
-        if uncached_elts and pull:
-            self._context.messenger.info("Attempting to fetch missing or incomplete artifact")
-            self._reset()
-            self._add_queue(PullQueue(self._scheduler))
-            self._enqueue_plan(uncached_elts)
-            self._run(announce_session=True)
+        self._pull_missing_artifacts(elements)
 
         try:
             scope = {
@@ -1453,6 +1438,27 @@
                 element._cached_remotely()
                 task.add_current_progress()
 
+    # _pull_missing_artifacts()
+    #
+    # Pull missing artifacts from available remotes, this runs the scheduler
+    # just to pull the artifacts if any of the artifacts are missing locally,
+    # and is used in commands which need to use the artifacts.
+    #
+    # This function requires Stream.query_cache() to be called in advance
+    # in order to determine which artifacts to try and pull.
+    #
+    # Args:
+    #    elements (list [Element]): The selected list of required elements
+    #
+    def _pull_missing_artifacts(self, elements):
+        uncached_elts = [elt for elt in elements if elt._pull_pending()]
+        if uncached_elts:
+            self._context.messenger.info("Attempting to fetch missing or incomplete artifact(s)")
+            self._reset()
+            self._add_queue(PullQueue(self._scheduler))
+            self._enqueue_plan(uncached_elts)
+            self._run(announce_session=True)
+
     # _load_tracking()
     #
     # A variant of _load() to be used when the elements should be used
diff --git a/tests/frontend/artifact_checkout.py b/tests/frontend/artifact_checkout.py
index e8231b8..962b31f 100644
--- a/tests/frontend/artifact_checkout.py
+++ b/tests/frontend/artifact_checkout.py
@@ -63,8 +63,7 @@
 
         # Now checkout the artifact
         result = cli.run(
-            project=project,
-            args=["artifact", "checkout", "--directory", checkout, "--pull", "--deps", deps, artifact_name],
+            project=project, args=["artifact", "checkout", "--directory", checkout, "--deps", deps, artifact_name],
         )
 
         if deps in ["all", "run"]:
diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py
index 330b9cd..b25e313 100644
--- a/tests/frontend/buildcheckout.py
+++ b/tests/frontend/buildcheckout.py
@@ -1070,13 +1070,13 @@
         os.unlink(objpath)
 
         # Verify that the build-only dependency is not (complete) in the local cache
+        cli.configure({"artifacts": {}})
         result = cli.run(project=project, args=["artifact", "checkout", input_name, "--directory", checkout_dir])
         result.assert_main_error(ErrorDomain.STREAM, "uncached-checkout-attempt")
 
         # Verify that the pull method fetches relevant artifacts in order to stage
-        result = cli.run(
-            project=project, args=["artifact", "checkout", "--pull", input_name, "--directory", checkout_dir]
-        )
+        cli.configure({"artifacts": {"servers": [{"url": share.repo, "push": True}]}})
+        result = cli.run(project=project, args=["artifact", "checkout", input_name, "--directory", checkout_dir])
         result.assert_success()
 
         # should have pulled whatever was deleted previous
@@ -1093,7 +1093,7 @@
 
         cli.configure({"artifacts": {"servers": [{"url": share.repo, "push": True}]}})
 
-        res = cli.run(project=project, args=["artifact", "checkout", "--pull", build_elt, "--directory", checkout_dir])
+        res = cli.run(project=project, args=["artifact", "checkout", build_elt, "--directory", checkout_dir])
         res.assert_main_error(ErrorDomain.STREAM, "uncached-checkout-attempt")
         assert re.findall(r"Remote \((\S+)\) does not have artifact (\S+) cached", res.stderr)
 
diff --git a/tests/integration/shell.py b/tests/integration/shell.py
index ed7ed65..4e96fb8 100644
--- a/tests/integration/shell.py
+++ b/tests/integration/shell.py
@@ -391,12 +391,14 @@
         objpath = os.path.join(cachedir, "cas", "objects", digest[:2], digest[2:])
         os.unlink(objpath)
 
-        # check shell doesn't work
+        # check shell doesn't work when it cannot pull the missing bits
+        cli.configure({"artifacts": {}})
         result = cli.run(project=project, args=["shell", element_name, "--", "hello"])
         result.assert_main_error(ErrorDomain.APP, "shell-missing-deps")
 
-        # check the artifact gets completed with '--pull' specified
-        result = cli.run(project=project, args=["shell", "--pull", element_name, "--", "hello"])
+        # check the artifact gets completed with access to the remote
+        cli.configure({"artifacts": {"servers": [{"url": share.repo, "push": True}]}})
+        result = cli.run(project=project, args=["shell", element_name, "--", "hello"])
         result.assert_success()
         assert "autotools/amhello.bst" in result.get_pulled_elements()
 
diff --git a/tests/integration/shellbuildtrees.py b/tests/integration/shellbuildtrees.py
index 4c42551..d7e6bc3 100644
--- a/tests/integration/shellbuildtrees.py
+++ b/tests/integration/shellbuildtrees.py
@@ -247,6 +247,11 @@
     # Optionally pull the buildtree along with `bst artifact pull`
     maybe_pull_deps(cli, project, element_name, pull_deps, pull_buildtree)
 
+    # Disable access to the artifact server after pulling, so that `bst shell` cannot automatically
+    # pull the missing bits, this should be equivalent to the missing bits being missing in a
+    # remote server
+    cli.configure({"artifacts": {}})
+
     # Run the shell without asking it to pull any buildtree, just asking to use a buildtree
     result = cli.run(project=project, args=["shell", "--build", element_name, "--use-buildtree", "--", "cat", "test"])
 
@@ -258,9 +263,10 @@
 
 
 #
-# Test behavior of launching a shell and requesting to use a buildtree, while
-# also requesting to download any missing bits from the artifact server on the fly,
-# again with various states of local cache (ranging from nothing cached to everything cached)
+# Test behavior of launching a shell and requesting to use a buildtree, while allowing
+# BuildStream to download any missing bits from the artifact server on the fly (which
+# it will do by default) again with various states of local cache (ranging from nothing
+# cached to everything cached)
 #
 @pytest.mark.datafiles(DATA_DIR)
 @pytest.mark.skipif(not HAVE_SANDBOX, reason="Only available with a functioning sandbox")
@@ -290,17 +296,7 @@
     # Run the shell and request that required artifacts and buildtrees should be pulled
     result = cli.run(
         project=project,
-        args=[
-            "--pull-buildtrees",
-            "shell",
-            "--build",
-            element_name,
-            "--pull",
-            "--use-buildtree",
-            "--",
-            "cat",
-            "test",
-        ],
+        args=["--pull-buildtrees", "shell", "--build", element_name, "--use-buildtree", "--", "cat", "test",],
     )
 
     # In this case, we should succeed every time, regardless of what was
@@ -331,36 +327,3 @@
 
     # Sorry, a buildtree was never cached for this element
     result.assert_main_error(ErrorDomain.APP, "missing-buildtree-artifact-created-without-buildtree")
-
-
-#
-# Test behavior of launching a shell and requesting to use a buildtree.
-#
-# In this case we download everything we need first, but the buildtree was never cached at build time
-#
-@pytest.mark.datafiles(DATA_DIR)
-@pytest.mark.skipif(not HAVE_SANDBOX, reason="Only available with a functioning sandbox")
-def test_shell_pull_uncached_buildtree(share_without_buildtrees, datafiles, cli):
-    project = str(datafiles)
-    element_name = "build-shell/buildtree.bst"
-
-    cli.configure({"artifacts": {"servers": [{"url": share_without_buildtrees.repo}]}})
-
-    # Run the shell and request that required artifacts and buildtrees should be pulled
-    result = cli.run(
-        project=project,
-        args=[
-            "--pull-buildtrees",
-            "shell",
-            "--build",
-            element_name,
-            "--pull",
-            "--use-buildtree",
-            "--",
-            "cat",
-            "test",
-        ],
-    )
-
-    # Sorry, a buildtree was never cached for this element
-    result.assert_main_error(ErrorDomain.APP, "missing-buildtree-artifact-created-without-buildtree")