Merge pull request #1524 from apache/tristan/disambiguate-artifact-globs
Revert to expecting '.bst' on the command line to disambiguate elements and artifacts
diff --git a/NEWS b/NEWS
index 74d32bc..fd72514 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,10 @@
o The `--pull` option is removed from the `bst shell` and `bst artifact checkout`
commands, which will now unconditionally try to pull required artifacts.
+ o BREAKING CHANGE: Now the artifact commands only accept element names if specified
+ with a `.bst` suffix (including wildcard expressions), and otherwise assumes the
+ command means to specify an artifact name.
+
==================
buildstream 1.93.6
diff --git a/src/buildstream/_frontend/cli.py b/src/buildstream/_frontend/cli.py
index d2fcc62..9ce716d 100644
--- a/src/buildstream/_frontend/cli.py
+++ b/src/buildstream/_frontend/cli.py
@@ -1190,20 +1190,28 @@
\b
- artifact refs: triples of the form <project name>/<element name>/<cache key>
- - element paths
+ - element names
- When elements are given, the artifact corresponding to the element is used.
+ When elements are given, the artifact is looked up by observing the element
+ and it's current cache key.
The commands also support shell-style wildcard expansion: `?` matches a
- single character, and `*` matches zero or more. The patterns are matched
- against artifact refs by default. If the pattern ends with `.bst` then
- it matches element paths instead. Some example arguments are:
+ single character, `*` matches zero or more characters but does not match the `/`
+ path separator, and `**` matches zero or more characters including `/` path separators.
+
+ If the wildcard expression ends with `.bst`, then it will be used to search
+ element names found in the project, otherwise, it will be used to search artifacts
+ that are present in the local artifact cache.
+
+ Some example arguments are:
\b
- `myproject/hello/8276376b077eda104c812e6ec2f488c7c9eea211ce572c83d734c10bf241209f`
- `myproject/he*/827637*`
- - `*.bst` (all elements)
- - `myproject/*` (all artifacts from myproject)
+ - `core/*.bst` (all elements in the core directory)
+ - `**.bst` (all elements)
+ - `myproject/**` (all artifacts from myproject)
+ - `myproject/myelement/*` (all cached artifacts for a specific element)
"""
# Note that the backticks in the above docstring are important for the
# generated docs. When sphinx is generating rst output from the help output
diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py
index ea73afd..71fff51 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -298,18 +298,6 @@
raise LoadError(message, LoadErrorReason.MISSING_FILE, detail=detail) from e
- if e.reason == LoadErrorReason.LOADING_DIRECTORY:
- # If a <directory>.bst file exists in the element path,
- # let's suggest this as a plausible alternative.
- message = str(e)
- if provenance_node:
- message = "{}: {}".format(provenance_node.get_provenance(), message)
- detail = None
- if os.path.exists(os.path.join(self._basedir, filename + ".bst")):
- element_name = filename + ".bst"
- detail = "Did you mean '{}'?\n".format(element_name)
- raise LoadError(message, LoadErrorReason.LOADING_DIRECTORY, detail=detail) from e
-
# Otherwise, we don't know the reason, so just raise
raise
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 6fcfd12..ca4a098 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -1040,7 +1040,11 @@
if todo_elements:
# This output should make creating the remaining workspaces as easy as possible.
todo_elements = "\nDid not try to create workspaces for " + todo_elements
- raise StreamError("Failed to create workspace directory: {}".format(e) + todo_elements) from e
+ raise StreamError(
+ "Failed to create workspace directory: {}".format(e),
+ reason="workspace-directory-failure",
+ detail=todo_elements,
+ ) from e
workspaces.create_workspace(target, directory, checkout=not no_checkout)
self._context.messenger.info("Created a workspace for element: {}".format(target._get_full_name()))
@@ -1999,80 +2003,94 @@
def _expand_and_classify_targets(
self, targets: Iterable[str], valid_artifact_names: bool = False
) -> Tuple[List[str], List[str]]:
- initial_targets = []
- element_targets = []
- artifact_names = []
- globs = {} # Count whether a glob matched elements and artifacts
+ #
+ # We use dicts here instead of sets, in order to deduplicate any possibly duplicate
+ # entries, while also retaining the original order of element specification/discovery,
+ # (which we cannot do with sets).
+ #
+ element_names = {}
+ artifact_names = {}
+ element_globs = {}
+ artifact_globs = {}
- # First extract the globs
+ # First sort out globs and targets
for target in targets:
if any(c in "*?[" for c in target):
- globs[target] = 0
- else:
- initial_targets.append(target)
-
- # Filter out any targets which are found to be artifact names
- if valid_artifact_names:
- for target in initial_targets:
- try:
- verify_artifact_ref(target)
- except ArtifactElementError:
- element_targets.append(target)
+ if target.endswith(".bst"):
+ element_globs[target] = True
else:
- artifact_names.append(target)
- else:
- element_targets = initial_targets
+ artifact_globs[target] = True
+ elif target.endswith(".bst"):
+ element_names[target] = True
+ else:
+ artifact_names[target] = True
+
+ # Bail out in commands which don't support artifacts if any of the targets
+ # or globs did not end with the expected '.bst' suffix.
+ #
+ if (artifact_names or artifact_globs) and not valid_artifact_names:
+ raise StreamError(
+ "Invalid element names or element glob patterns were specified: {}".format(
+ ", ".join(list(artifact_names) + list(artifact_globs))
+ ),
+ reason="invalid-element-names",
+ detail="Element names and element glob expressions must end in '.bst'",
+ )
+
+ # Verify targets which were not classified as elements
+ for artifact_name in artifact_names:
+ try:
+ verify_artifact_ref(artifact_name)
+ except ArtifactElementError as e:
+ raise StreamError(
+ "Specified target does not appear to be an artifact or element name: {}".format(artifact_name),
+ reason="unrecognized-target-format",
+ detail="Element names and element glob expressions must end in '.bst'",
+ ) from e
# Expand globs for elements
- if self._project:
+ if element_globs:
+
+ # Bail out if an element glob is specified without providing a project directory
+ if not self._project:
+ raise StreamError(
+ "Element glob expressions were specified without any project directory: {}".format(
+ ", ".join(element_globs)
+ ),
+ reason="glob-elements-without-project",
+ )
+
+ # Collect a list of `all_elements` in the project, stripping out the leading
+ # project directory and element path prefix, to produce only element names.
+ #
all_elements = []
element_path_length = len(self._project.element_path) + 1
for dirpath, _, filenames in os.walk(self._project.element_path):
for filename in filenames:
if filename.endswith(".bst"):
- element_path = os.path.join(dirpath, filename)
- element_path = element_path[element_path_length:] # Strip out the element_path
- all_elements.append(element_path)
+ element_name = os.path.join(dirpath, filename)
+ element_name = element_name[element_path_length:]
+ all_elements.append(element_name)
- for glob in globs:
- matched = False
- for element_path in utils.glob(all_elements, glob):
- element_targets.append(element_path)
- matched = True
- if matched:
- globs[glob] = globs[glob] + 1
+ # Glob the elements and add the results to the set
+ #
+ for glob in element_globs:
+ glob_results = list(utils.glob(all_elements, glob))
+ for element_name in glob_results:
+ element_names[element_name] = True
+ if not glob_results:
+ self._context.messenger.warn("No elements matched the glob expression: {}".format(glob))
- # Expand globs for artifact names
- if valid_artifact_names:
- for glob in globs:
- matches = self._artifacts.list_artifacts(glob=glob)
- if matches:
- artifact_names.extend(matches)
- globs[glob] = globs[glob] + 1
+ # Glob the artifact names and add the results to the set
+ #
+ for glob in artifact_globs:
+ glob_results = self._artifacts.list_artifacts(glob=glob)
+ for artifact_name in glob_results:
+ artifact_names[artifact_name] = True
+ if not glob_results:
+ self._context.messenger.warn("No artifact names matched the glob expression: {}".format(glob))
- # Issue warnings and errors
- unmatched = [glob for glob, glob_count in globs.items() if glob_count == 0]
- doubly_matched = [glob for glob, glob_count in globs.items() if glob_count > 1]
-
- # Warn the user if any of the provided globs did not match anything
- if unmatched:
- if valid_artifact_names:
- message = "No elements or artifacts matched the following glob expression(s): {}".format(
- ", ".join(unmatched)
- )
- else:
- message = "No elements matched the following glob expression(s): {}".format(", ".join(unmatched))
- self._context.messenger.warn(message)
-
- if doubly_matched:
- raise StreamError(
- "The provided glob expression(s) matched both element names and artifact names: {}".format(
- ", ".join(doubly_matched)
- ),
- reason="glob-elements-and-artifacts",
- )
-
- return element_targets, artifact_names
+ return list(element_names), list(artifact_names)
# _handle_compression()
diff --git a/tests/format/elementnames.py b/tests/format/elementnames.py
index f059d54..2e34952 100644
--- a/tests/format/elementnames.py
+++ b/tests/format/elementnames.py
@@ -11,19 +11,33 @@
@pytest.mark.parametrize(
- "target,reason,provenance",
+ "target,domain,reason,provenance",
[
- ("farm.pony", LoadErrorReason.BAD_ELEMENT_SUFFIX, None),
- ('The "quoted" pony.bst', LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
- ("bad-suffix-dep.bst", LoadErrorReason.BAD_ELEMENT_SUFFIX, "bad-suffix-dep.bst [line 3 column 2]"),
- ("bad-chars-dep.bst", LoadErrorReason.BAD_CHARACTERS_IN_NAME, "bad-chars-dep.bst [line 3 column 2]"),
+ # When specifying a bad suffix on the command line we get a different error, we
+ # catch this error earlier on in the load sequence while sorting out element and
+ # artifact names and glob expressions.
+ #
+ ("farm.pony", ErrorDomain.STREAM, "invalid-element-names", None),
+ ('The "quoted" pony.bst', ErrorDomain.LOAD, LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
+ (
+ "bad-suffix-dep.bst",
+ ErrorDomain.LOAD,
+ LoadErrorReason.BAD_ELEMENT_SUFFIX,
+ "bad-suffix-dep.bst [line 3 column 2]",
+ ),
+ (
+ "bad-chars-dep.bst",
+ ErrorDomain.LOAD,
+ LoadErrorReason.BAD_CHARACTERS_IN_NAME,
+ "bad-chars-dep.bst [line 3 column 2]",
+ ),
],
ids=["toplevel-bad-suffix", "toplevel-bad-chars", "dependency-bad-suffix", "dependency-bad-chars"],
)
@pytest.mark.datafiles(DATA_DIR)
-def test_invalid_element_names(cli, datafiles, target, reason, provenance):
+def test_invalid_element_names(cli, datafiles, target, domain, reason, provenance):
project = os.path.join(str(datafiles), "elementnames")
result = cli.run(project=project, silent=True, args=["show", target])
- result.assert_main_error(ErrorDomain.LOAD, reason)
+ result.assert_main_error(domain, reason)
if provenance:
assert provenance in result.stderr
diff --git a/tests/frontend/artifact_show.py b/tests/frontend/artifact_show.py
index 652adfb..6810847 100644
--- a/tests/frontend/artifact_show.py
+++ b/tests/frontend/artifact_show.py
@@ -150,30 +150,6 @@
assert found, "Expected result {} not found".format(expected_prefix)
-# Test artifact show glob behaviors
-@pytest.mark.datafiles(SIMPLE_DIR)
-@pytest.mark.parametrize(
- "pattern",
- [
- # Catch all glob will match everything, that is an error since the glob matches
- # both elements and artifacts
- #
- "**",
- # This glob is more selective but will also match both artifacts and elements
- #
- "**import-bin**",
- ],
-)
-def test_artifact_show_doubly_matched_glob_error(cli, tmpdir, datafiles, pattern):
- project = str(datafiles)
-
- result = cli.run(project=project, args=["build", "target.bst"])
- result.assert_success()
-
- result = cli.run(project=project, args=["artifact", "show", pattern])
- result.assert_main_error(ErrorDomain.STREAM, "glob-elements-and-artifacts")
-
-
# Test artifact show artifact in remote
@pytest.mark.datafiles(DATA_DIR)
def test_artifact_show_element_available_remotely(cli, tmpdir, datafiles):
diff --git a/tests/frontend/show.py b/tests/frontend/show.py
index 456123b..baca10e 100644
--- a/tests/frontend/show.py
+++ b/tests/frontend/show.py
@@ -55,9 +55,6 @@
@pytest.mark.parametrize(
"pattern,expected_elements",
[
- # Use catch all glob. This should report all elements.
- #
- ("**", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
# Only bst files, same as "**" for `bst show`
#
("**.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
@@ -67,18 +64,15 @@
("*.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst"]),
# Report only targets in the subdirectory
#
- ("subdir/*", ["subdir/target.bst"]),
+ ("subdir/*.bst", ["subdir/target.bst"]),
# Report both targets which end in "target.bst"
#
("**target.bst", ["target.bst", "subdir/target.bst"]),
# All elements starting with the prefix "import"
#
- ("import*", ["import-bin.bst", "import-dev.bst"]),
- # Glob would match artifact refs, but `bst show` does not accept these as input.
- #
- ("test/**", []),
+ ("import*.bst", ["import-bin.bst", "import-dev.bst"]),
],
- ids=["**", "**.bst", "*.bst", "subdir/*", "**target.bst", "import*", "test/**"],
+ ids=["**.bst", "*.bst", "subdir/*", "**target.bst", "import*"],
)
def test_show_glob(cli, tmpdir, datafiles, pattern, expected_elements):
project = str(datafiles)
diff --git a/tests/frontend/workspace.py b/tests/frontend/workspace.py
index 26c0a99..bd64a24 100644
--- a/tests/frontend/workspace.py
+++ b/tests/frontend/workspace.py
@@ -215,7 +215,7 @@
# Using this finally to make sure we always put thing back how they should be.
os.chmod(workspace_object.workspace_cmd, cwdstat.st_mode)
- result.assert_main_error(ErrorDomain.STREAM, None)
+ result.assert_main_error(ErrorDomain.STREAM, "workspace-directory-failure")
# Normally we avoid checking stderr in favour of using the mechine readable result.assert_main_error
# But Tristan was very keen that the names of the elements left needing workspaces were present in the out put
assert " ".join([element_name for element_name, workspace_dir_suffix in element_tuples[1:]]) in result.stderr
diff --git a/tests/internals/loader.py b/tests/internals/loader.py
index 2da0172..fd4a357 100644
--- a/tests/internals/loader.py
+++ b/tests/internals/loader.py
@@ -90,7 +90,8 @@
def test_invalid_directory_load(datafiles):
basedir = str(datafiles)
+ os.makedirs(os.path.join(basedir, "element.bst"))
with make_loader(basedir) as loader, pytest.raises(LoadError) as exc:
- loader.load(["elements/"])
+ loader.load(["element.bst"])
assert exc.value.reason == LoadErrorReason.LOADING_DIRECTORY