Merge pull request #1524 from apache/tristan/disambiguate-artifact-globs

Revert to expecting '.bst' on the command line to disambiguate elements and artifacts
diff --git a/NEWS b/NEWS
index 74d32bc..fd72514 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,10 @@
   o The `--pull` option is removed from the `bst shell` and `bst artifact checkout`
     commands, which will now unconditionally try to pull required artifacts.
 
+  o BREAKING CHANGE: Now the artifact commands only accept element names if specified
+    with a `.bst` suffix (including wildcard expressions), and otherwise assumes the
+    command means to specify an artifact name.
+
 
 ==================
 buildstream 1.93.6
diff --git a/src/buildstream/_frontend/cli.py b/src/buildstream/_frontend/cli.py
index d2fcc62..9ce716d 100644
--- a/src/buildstream/_frontend/cli.py
+++ b/src/buildstream/_frontend/cli.py
@@ -1190,20 +1190,28 @@
 
     \b
     - artifact refs: triples of the form <project name>/<element name>/<cache key>
-    - element paths
+    - element names
 
-    When elements are given, the artifact corresponding to the element is used.
+    When elements are given, the artifact is looked up by observing the element
+    and it's current cache key.
 
     The commands also support shell-style wildcard expansion: `?` matches a
-    single character, and `*` matches zero or more. The patterns are matched
-    against artifact refs by default. If the pattern ends with `.bst` then
-    it matches element paths instead. Some example arguments are:
+    single character, `*` matches zero or more characters but does not match the `/`
+    path separator, and `**` matches zero or more characters including `/` path separators.
+
+    If the wildcard expression ends with `.bst`, then it will be used to search
+    element names found in the project, otherwise, it will be used to search artifacts
+    that are present in the local artifact cache.
+
+    Some example arguments are:
 
     \b
     - `myproject/hello/8276376b077eda104c812e6ec2f488c7c9eea211ce572c83d734c10bf241209f`
     - `myproject/he*/827637*`
-    - `*.bst` (all elements)
-    - `myproject/*` (all artifacts from myproject)
+    - `core/*.bst` (all elements in the core directory)
+    - `**.bst` (all elements)
+    - `myproject/**` (all artifacts from myproject)
+    - `myproject/myelement/*` (all cached artifacts for a specific element)
     """
     # Note that the backticks in the above docstring are important for the
     # generated docs. When sphinx is generating rst output from the help output
diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py
index ea73afd..71fff51 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -298,18 +298,6 @@
 
                 raise LoadError(message, LoadErrorReason.MISSING_FILE, detail=detail) from e
 
-            if e.reason == LoadErrorReason.LOADING_DIRECTORY:
-                # If a <directory>.bst file exists in the element path,
-                # let's suggest this as a plausible alternative.
-                message = str(e)
-                if provenance_node:
-                    message = "{}: {}".format(provenance_node.get_provenance(), message)
-                detail = None
-                if os.path.exists(os.path.join(self._basedir, filename + ".bst")):
-                    element_name = filename + ".bst"
-                    detail = "Did you mean '{}'?\n".format(element_name)
-                raise LoadError(message, LoadErrorReason.LOADING_DIRECTORY, detail=detail) from e
-
             # Otherwise, we don't know the reason, so just raise
             raise
 
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 6fcfd12..ca4a098 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -1040,7 +1040,11 @@
                 if todo_elements:
                     # This output should make creating the remaining workspaces as easy as possible.
                     todo_elements = "\nDid not try to create workspaces for " + todo_elements
-                raise StreamError("Failed to create workspace directory: {}".format(e) + todo_elements) from e
+                raise StreamError(
+                    "Failed to create workspace directory: {}".format(e),
+                    reason="workspace-directory-failure",
+                    detail=todo_elements,
+                ) from e
 
             workspaces.create_workspace(target, directory, checkout=not no_checkout)
             self._context.messenger.info("Created a workspace for element: {}".format(target._get_full_name()))
@@ -1999,80 +2003,94 @@
     def _expand_and_classify_targets(
         self, targets: Iterable[str], valid_artifact_names: bool = False
     ) -> Tuple[List[str], List[str]]:
-        initial_targets = []
-        element_targets = []
-        artifact_names = []
-        globs = {}  # Count whether a glob matched elements and artifacts
+        #
+        # We use dicts here instead of sets, in order to deduplicate any possibly duplicate
+        # entries, while also retaining the original order of element specification/discovery,
+        # (which we cannot do with sets).
+        #
+        element_names = {}
+        artifact_names = {}
+        element_globs = {}
+        artifact_globs = {}
 
-        # First extract the globs
+        # First sort out globs and targets
         for target in targets:
             if any(c in "*?[" for c in target):
-                globs[target] = 0
-            else:
-                initial_targets.append(target)
-
-        # Filter out any targets which are found to be artifact names
-        if valid_artifact_names:
-            for target in initial_targets:
-                try:
-                    verify_artifact_ref(target)
-                except ArtifactElementError:
-                    element_targets.append(target)
+                if target.endswith(".bst"):
+                    element_globs[target] = True
                 else:
-                    artifact_names.append(target)
-        else:
-            element_targets = initial_targets
+                    artifact_globs[target] = True
+            elif target.endswith(".bst"):
+                element_names[target] = True
+            else:
+                artifact_names[target] = True
+
+        # Bail out in commands which don't support artifacts if any of the targets
+        # or globs did not end with the expected '.bst' suffix.
+        #
+        if (artifact_names or artifact_globs) and not valid_artifact_names:
+            raise StreamError(
+                "Invalid element names or element glob patterns were specified: {}".format(
+                    ", ".join(list(artifact_names) + list(artifact_globs))
+                ),
+                reason="invalid-element-names",
+                detail="Element names and element glob expressions must end in '.bst'",
+            )
+
+        # Verify targets which were not classified as elements
+        for artifact_name in artifact_names:
+            try:
+                verify_artifact_ref(artifact_name)
+            except ArtifactElementError as e:
+                raise StreamError(
+                    "Specified target does not appear to be an artifact or element name: {}".format(artifact_name),
+                    reason="unrecognized-target-format",
+                    detail="Element names and element glob expressions must end in '.bst'",
+                ) from e
 
         # Expand globs for elements
-        if self._project:
+        if element_globs:
+
+            # Bail out if an element glob is specified without providing a project directory
+            if not self._project:
+                raise StreamError(
+                    "Element glob expressions were specified without any project directory: {}".format(
+                        ", ".join(element_globs)
+                    ),
+                    reason="glob-elements-without-project",
+                )
+
+            # Collect a list of `all_elements` in the project, stripping out the leading
+            # project directory and element path prefix, to produce only element names.
+            #
             all_elements = []
             element_path_length = len(self._project.element_path) + 1
             for dirpath, _, filenames in os.walk(self._project.element_path):
                 for filename in filenames:
                     if filename.endswith(".bst"):
-                        element_path = os.path.join(dirpath, filename)
-                        element_path = element_path[element_path_length:]  # Strip out the element_path
-                        all_elements.append(element_path)
+                        element_name = os.path.join(dirpath, filename)
+                        element_name = element_name[element_path_length:]
+                        all_elements.append(element_name)
 
-            for glob in globs:
-                matched = False
-                for element_path in utils.glob(all_elements, glob):
-                    element_targets.append(element_path)
-                    matched = True
-                if matched:
-                    globs[glob] = globs[glob] + 1
+            # Glob the elements and add the results to the set
+            #
+            for glob in element_globs:
+                glob_results = list(utils.glob(all_elements, glob))
+                for element_name in glob_results:
+                    element_names[element_name] = True
+                if not glob_results:
+                    self._context.messenger.warn("No elements matched the glob expression: {}".format(glob))
 
-        # Expand globs for artifact names
-        if valid_artifact_names:
-            for glob in globs:
-                matches = self._artifacts.list_artifacts(glob=glob)
-                if matches:
-                    artifact_names.extend(matches)
-                    globs[glob] = globs[glob] + 1
+        # Glob the artifact names and add the results to the set
+        #
+        for glob in artifact_globs:
+            glob_results = self._artifacts.list_artifacts(glob=glob)
+            for artifact_name in glob_results:
+                artifact_names[artifact_name] = True
+            if not glob_results:
+                self._context.messenger.warn("No artifact names matched the glob expression: {}".format(glob))
 
-        # Issue warnings and errors
-        unmatched = [glob for glob, glob_count in globs.items() if glob_count == 0]
-        doubly_matched = [glob for glob, glob_count in globs.items() if glob_count > 1]
-
-        # Warn the user if any of the provided globs did not match anything
-        if unmatched:
-            if valid_artifact_names:
-                message = "No elements or artifacts matched the following glob expression(s): {}".format(
-                    ", ".join(unmatched)
-                )
-            else:
-                message = "No elements matched the following glob expression(s): {}".format(", ".join(unmatched))
-            self._context.messenger.warn(message)
-
-        if doubly_matched:
-            raise StreamError(
-                "The provided glob expression(s) matched both element names and artifact names: {}".format(
-                    ", ".join(doubly_matched)
-                ),
-                reason="glob-elements-and-artifacts",
-            )
-
-        return element_targets, artifact_names
+        return list(element_names), list(artifact_names)
 
 
 # _handle_compression()
diff --git a/tests/format/elementnames.py b/tests/format/elementnames.py
index f059d54..2e34952 100644
--- a/tests/format/elementnames.py
+++ b/tests/format/elementnames.py
@@ -11,19 +11,33 @@
 
 
 @pytest.mark.parametrize(
-    "target,reason,provenance",
+    "target,domain,reason,provenance",
     [
-        ("farm.pony", LoadErrorReason.BAD_ELEMENT_SUFFIX, None),
-        ('The "quoted" pony.bst', LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
-        ("bad-suffix-dep.bst", LoadErrorReason.BAD_ELEMENT_SUFFIX, "bad-suffix-dep.bst [line 3 column 2]"),
-        ("bad-chars-dep.bst", LoadErrorReason.BAD_CHARACTERS_IN_NAME, "bad-chars-dep.bst [line 3 column 2]"),
+        # When specifying a bad suffix on the command line we get a different error, we
+        # catch this error earlier on in the load sequence while sorting out element and
+        # artifact names and glob expressions.
+        #
+        ("farm.pony", ErrorDomain.STREAM, "invalid-element-names", None),
+        ('The "quoted" pony.bst', ErrorDomain.LOAD, LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
+        (
+            "bad-suffix-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_ELEMENT_SUFFIX,
+            "bad-suffix-dep.bst [line 3 column 2]",
+        ),
+        (
+            "bad-chars-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_CHARACTERS_IN_NAME,
+            "bad-chars-dep.bst [line 3 column 2]",
+        ),
     ],
     ids=["toplevel-bad-suffix", "toplevel-bad-chars", "dependency-bad-suffix", "dependency-bad-chars"],
 )
 @pytest.mark.datafiles(DATA_DIR)
-def test_invalid_element_names(cli, datafiles, target, reason, provenance):
+def test_invalid_element_names(cli, datafiles, target, domain, reason, provenance):
     project = os.path.join(str(datafiles), "elementnames")
     result = cli.run(project=project, silent=True, args=["show", target])
-    result.assert_main_error(ErrorDomain.LOAD, reason)
+    result.assert_main_error(domain, reason)
     if provenance:
         assert provenance in result.stderr
diff --git a/tests/frontend/artifact_show.py b/tests/frontend/artifact_show.py
index 652adfb..6810847 100644
--- a/tests/frontend/artifact_show.py
+++ b/tests/frontend/artifact_show.py
@@ -150,30 +150,6 @@
         assert found, "Expected result {} not found".format(expected_prefix)
 
 
-# Test artifact show glob behaviors
-@pytest.mark.datafiles(SIMPLE_DIR)
-@pytest.mark.parametrize(
-    "pattern",
-    [
-        # Catch all glob will match everything, that is an error since the glob matches
-        # both elements and artifacts
-        #
-        "**",
-        # This glob is more selective but will also match both artifacts and elements
-        #
-        "**import-bin**",
-    ],
-)
-def test_artifact_show_doubly_matched_glob_error(cli, tmpdir, datafiles, pattern):
-    project = str(datafiles)
-
-    result = cli.run(project=project, args=["build", "target.bst"])
-    result.assert_success()
-
-    result = cli.run(project=project, args=["artifact", "show", pattern])
-    result.assert_main_error(ErrorDomain.STREAM, "glob-elements-and-artifacts")
-
-
 # Test artifact show artifact in remote
 @pytest.mark.datafiles(DATA_DIR)
 def test_artifact_show_element_available_remotely(cli, tmpdir, datafiles):
diff --git a/tests/frontend/show.py b/tests/frontend/show.py
index 456123b..baca10e 100644
--- a/tests/frontend/show.py
+++ b/tests/frontend/show.py
@@ -55,9 +55,6 @@
 @pytest.mark.parametrize(
     "pattern,expected_elements",
     [
-        # Use catch all glob. This should report all elements.
-        #
-        ("**", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
         # Only bst files, same as "**" for `bst show`
         #
         ("**.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
@@ -67,18 +64,15 @@
         ("*.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst"]),
         # Report only targets in the subdirectory
         #
-        ("subdir/*", ["subdir/target.bst"]),
+        ("subdir/*.bst", ["subdir/target.bst"]),
         # Report both targets which end in "target.bst"
         #
         ("**target.bst", ["target.bst", "subdir/target.bst"]),
         # All elements starting with the prefix "import"
         #
-        ("import*", ["import-bin.bst", "import-dev.bst"]),
-        # Glob would match artifact refs, but `bst show` does not accept these as input.
-        #
-        ("test/**", []),
+        ("import*.bst", ["import-bin.bst", "import-dev.bst"]),
     ],
-    ids=["**", "**.bst", "*.bst", "subdir/*", "**target.bst", "import*", "test/**"],
+    ids=["**.bst", "*.bst", "subdir/*", "**target.bst", "import*"],
 )
 def test_show_glob(cli, tmpdir, datafiles, pattern, expected_elements):
     project = str(datafiles)
diff --git a/tests/frontend/workspace.py b/tests/frontend/workspace.py
index 26c0a99..bd64a24 100644
--- a/tests/frontend/workspace.py
+++ b/tests/frontend/workspace.py
@@ -215,7 +215,7 @@
         # Using this finally to make sure we always put thing back how they should be.
         os.chmod(workspace_object.workspace_cmd, cwdstat.st_mode)
 
-    result.assert_main_error(ErrorDomain.STREAM, None)
+    result.assert_main_error(ErrorDomain.STREAM, "workspace-directory-failure")
     # Normally we avoid checking stderr in favour of using the mechine readable result.assert_main_error
     # But Tristan was very keen that the names of the elements left needing workspaces were present in the out put
     assert " ".join([element_name for element_name, workspace_dir_suffix in element_tuples[1:]]) in result.stderr
diff --git a/tests/internals/loader.py b/tests/internals/loader.py
index 2da0172..fd4a357 100644
--- a/tests/internals/loader.py
+++ b/tests/internals/loader.py
@@ -90,7 +90,8 @@
 def test_invalid_directory_load(datafiles):
 
     basedir = str(datafiles)
+    os.makedirs(os.path.join(basedir, "element.bst"))
     with make_loader(basedir) as loader, pytest.raises(LoadError) as exc:
-        loader.load(["elements/"])
+        loader.load(["element.bst"])
 
     assert exc.value.reason == LoadErrorReason.LOADING_DIRECTORY