blob: 25bf9876d8e47f83cc6b46e76adb5a92a771c9ab [file] [log] [blame]
#
# Copyright (C) 2020 Codethink Limited
# Copyright (C) 2019 Bloomberg L.P.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
# Benjamin Schubert <bschubert@bloomberg.net>
import re
import sys
import itertools
from ._exceptions import LoadError
from .exceptions import LoadErrorReason
from .node cimport MappingNode, Node, ScalarNode, SequenceNode, ProvenanceInformation
# Variables are allowed to have dashes here
#
PARSE_EXPANSION = re.compile(r"\%\{([a-zA-Z][a-zA-Z0-9_-]*)\}")
cdef Py_ssize_t MAX_RECURSION_DEPTH = 200
# Throughout this code you will see variables named things like `expstr`.
# These hold data structures called "expansion strings" and are the parsed
# form of the strings which are the input to this subsystem. Strings
# such as "Hello %{name}, how are you?" are parsed into the form:
# ["Hello ", "name", ", how are you?"]
# i.e. a list which consists of one or more strings.
# Strings in even indices of the list (0, 2, 4, etc) are constants which
# are copied into the output of the expansion algorithm. Strings in the
# odd indices (1, 3, 5, etc) are the names of further expansions to make.
# In the example above, first "Hello " is copied, then "name" is expanded
# and so must be another named expansion string passed in to the constructor
# of the Variables class, and whatever is yielded from the expansion of "name"
# is added to the concatenation for the result. Finally ", how are you?" is
# copied in and the whole lot concatenated for return.
#
# To see how strings are parsed, see `_parse_value_expression()` after the class, and
# to see how expansion strings are expanded, see `_expand_value_expression()` after that.
# The Variables helper object will resolve the variable references in
# the given dictionary, expecting that any dictionary values which contain
# variable references can be resolved from the same dictionary.
#
# Each Element creates its own Variables instance to track the configured
# variable settings for the element.
#
# Args:
# node (Node): A node loaded and composited with yaml tools
#
# Raises:
# LoadError, if unresolved variables, or cycles in resolution, occur.
#
cdef class Variables:
cdef MappingNode _original
cdef dict _values
#################################################################
# Magic Methods #
#################################################################
def __init__(self, MappingNode node):
# The original MappingNode, we need to keep this
# around for proper error reporting.
#
self._original = node
# The value map, this dictionary contains either unresolved
# value expressions, or resolved values.
#
# Each mapping value is a list, in the case that the value
# is resolved, then the list is only 1 element long.
#
self._values = self._init_values(node)
# __getitem__()
#
# Fetches a resolved variable by it's name, allows
# addressing the Variables instance like a dictionary.
#
# Args:
# name (str): The name of the variable
#
# Returns:
# (str): The resolved variable value
#
# Raises:
# (LoadError): In the case of an undefined variable or
# a cyclic variable reference
#
def __getitem__(self, str name):
if name not in self._values:
raise KeyError(name)
return self._expand_var(name)
# __contains__()
#
# Checks whether a given variable exists, allows
# supporting `if 'foo' in variables` expressions.
#
# Args:
# name (str): The name of the variable to check for
#
# Returns:
# (bool): True if `name` is a valid variable
#
def __contains__(self, str name):
return name in self._values
# __iter__()
#
# Provide an iterator for all variables effective values
#
# Returns:
# (Iterator[Tuple[str, str]])
#
def __iter__(self):
return _VariablesIterator(self)
#################################################################
# Public API #
#################################################################
# check()
#
# Assert that all variables declared on this Variables
# instance have been resolved properly, and reports errors
# for undefined references and circular references.
#
# Raises:
# (LoadError): In the case of an undefined variable or
# a cyclic variable reference
#
cpdef check(self):
cdef object key
# Just resolve all variables.
for key in self._values.keys():
self._expand_var(<str> key)
# get()
#
# Expand definition of variable by name. If the variable is not
# defined, it will return None instead of failing.
#
# Args:
# name (str): Name of the variable to expand
#
# Returns:
# (str|None): The expanded value for the variable or None variable was not defined.
#
cpdef str get(self, str name):
if name not in self._values:
return None
return self[name]
# expand()
#
# Expand all the variables found in the given Node, recursively.
# This does the change in place, modifying the node. If you want to keep
# the node untouched, you should use `node.clone()` beforehand
#
# Args:
# (Node): A node for which to substitute the values
#
# Raises:
# (LoadError): In the case of an undefined variable or
# a cyclic variable reference
#
cpdef expand(self, Node node):
if isinstance(node, ScalarNode):
(<ScalarNode> node).value = self.subst(<ScalarNode> node)
elif isinstance(node, SequenceNode):
for entry in (<SequenceNode> node).value:
self.expand(entry)
elif isinstance(node, MappingNode):
for entry in (<MappingNode> node).value.values():
self.expand(entry)
else:
assert False, "Unknown 'Node' type"
# subst():
#
# Substitutes any variables in 'string' and returns the result.
#
# Args:
# node (ScalarNode): The ScalarNode to substitute variables in
#
# Returns:
# (str): The new string with any substitutions made
#
# Raises:
# (LoadError): In the case of an undefined variable or
# a cyclic variable reference
#
cpdef str subst(self, ScalarNode node):
value_expression = _parse_value_expression(node.as_str())
return self._expand_value_expression(value_expression, node)
#################################################################
# Private API #
#################################################################
# Variable resolving code
#
# Here we resolve all of our inputs into a dictionary, ready for use
# in subst()
cdef dict _init_values(self, MappingNode node):
# Special case, if notparallel is specified in the variables for this
# element, then override max-jobs to be 1.
# Initialize it as a string as all variables are processed as strings.
#
if node.get_bool('notparallel', False):
node['max-jobs'] = str(1)
cdef dict ret = {}
cdef str key
cdef str value
for key in node.keys():
value = node.get_str(key)
ret[sys.intern(key)] = _parse_value_expression(value)
return ret
#################################################################
# Resolution algorithm #
#################################################################
#
# This is split into a fast path and a slower path, with a small
# calling layer in between for expanding variables and for expanding
# value expressions which refer to variables expected to be defined
# in this Variables instance.
#
# The fast path is initially used, it supports limited variable
# expansion depth due to it's recursive nature, and does not support
# full error reporting.
#
# The fallback slower path is non-recursive and reports user facing
# errors, it is called in the case that KeyError or RecursionError
# are reported from the faster path.
#
# _expand_var()
#
# Helper to expand and cache a variable definition.
#
# Args:
# name (str): Name of the variable to expand
#
# Returns:
# (str): The expanded value of variable
#
# Raises:
# KeyError, if any expansion is missing
# RecursionError, if recursion required for evaluation is too deep
#
cdef str _expand_var(self, str name):
try:
return self._fast_expand_var(name)
except (KeyError, RecursionError):
return self._slow_expand_var(name)
# _expand_value_expression()
#
# Expands a given top level expansion string.
#
# Args:
# value_expression (list): The parsed value expression to be expanded
# node (ScalarNode): The toplevel ScalarNode who is asking for an expansion
#
# Returns:
# (str): The expanded value expression
#
# Raises:
# KeyError, if any expansion is missing
# RecursionError, if recursion required for evaluation is too deep
#
cdef str _expand_value_expression(self, list value_expression, ScalarNode node):
try:
return self._fast_expand_value_expression(value_expression)
except (KeyError, RecursionError):
return self._slow_expand_value_expression(None, value_expression, node)
#################################################################
# Resolution algorithm: fast path #
#################################################################
cdef str _fast_expand_var(self, str name, int counter = 0):
cdef str sub
cdef list value_expression
value_expression = <list> self._values[name]
if len(value_expression) > 1:
sub = self._fast_expand_value_expression(value_expression, counter)
value_expression = [sys.intern(sub)]
self._values[name] = value_expression
return <str> value_expression[0]
cdef str _fast_expand_value_expression(self, list value, int counter = 0):
if counter > MAX_RECURSION_DEPTH:
raise RecursionError()
cdef Py_ssize_t idx
cdef object val
cdef list acc = []
for idx, val in enumerate(value):
if (idx % 2) == 0:
acc.append(val)
else:
acc.append(self._fast_expand_var(<str> val, counter + 1))
return "".join(acc)
#################################################################
# Resolution algorithm: slow path #
#################################################################
# _get_checked_value_expression()
#
# Fetches a value expression from the value table and raises a user
# facing error if the value is undefined.
#
# Args:
# varname (str): The variable name to fetch
# referee (str): The variable name referring to `varname`, or None
# node (ScalarNode): The ScalarNode for which we need to resolve `name`
#
# Returns:
# (list): The value expression for varname
#
# Raises:
# (LoadError): An appropriate error in case of undefined variables
#
cdef list _get_checked_value_expression(self, str varname, str referee, ScalarNode node):
cdef ProvenanceInformation provenance = None
cdef Node referee_value
cdef str error_message
#
# Fetch the value and detect undefined references
#
try:
return <list> self._values[varname]
except KeyError as e:
# Either the provenance is the toplevel calling provenance,
# or it is the provenance of the direct referee
referee_node = self._original.get_node(referee, allowed_types=None, allow_none=True)
if referee_node:
provenance = referee_node.get_provenance()
elif node:
provenance = node.get_provenance()
error_message = "Reference to undefined variable '{}'".format(varname)
if provenance:
error_message = "{}: {}".format(provenance, error_message)
raise LoadError(error_message, LoadErrorReason.UNRESOLVED_VARIABLE) from e
cdef str _slow_expand_var(self, str name):
cdef list value_expression
cdef str expanded
value_expression = self._get_checked_value_expression(name, None, None)
if len(value_expression) > 1:
expanded = self._slow_expand_value_expression(name, value_expression, None)
value_expression = [sys.intern(expanded)]
self._values[name] = value_expression
return <str> value_expression[0]
cdef str _slow_expand_value_expression(self, str varname, list value_expression, ScalarNode node):
cdef ResolutionStep step
cdef ResolutionStep new_step
cdef ResolutionStep this_step
cdef list iter_value_expression
cdef Py_ssize_t idx = 0
cdef object value
cdef str resolved_varname
cdef str resolved_value = None
# We will collect the varnames and value expressions which need
# to be resolved in the loop, sorted by dependency, and then
# finally reverse through them resolving them one at a time
#
cdef list resolved_varnames = []
cdef list resolved_values = []
step = ResolutionStep()
step.init(varname, value_expression, None)
while step:
# Keep a hold of the current overall step
this_step = step
step = step.prev
# Check for circular dependencies
this_step.check_circular(self._original)
for idx, value in enumerate(this_step.value_expression):
# Skip literal parts of the value expression
if (idx % 2) == 0:
continue
iter_value_expression = self._get_checked_value_expression(<str> value, this_step.referee, node)
# Queue up this value.
#
# Even if the value was already resolved, we need it in context to resolve
# previously enqueued variables
resolved_values.append(iter_value_expression)
resolved_varnames.append(value)
# Queue up the values dependencies.
#
if len(iter_value_expression) > 1:
new_step = ResolutionStep()
new_step.init(<str> value, iter_value_expression, this_step)
# Link it to the end of the stack
new_step.prev = step
step = new_step
# We've now constructed the dependencies queue such that
# later dependencies are on the right, we can now safely peddle
# backwards and the last (leftmost) resolved value is the one
# we want to return.
#
idx = len(resolved_values) -1
while idx >= 0:
# Values in, strings out
#
iter_value_expression = <list> resolved_values[idx]
resolved_varname = <str> resolved_varnames[idx]
# Resolve as needed
#
if len(iter_value_expression) > 1:
resolved_value = self._resolve_value_expression(iter_value_expression)
iter_value_expression = [resolved_value]
if resolved_varname is not None:
self._values[resolved_varname] = iter_value_expression
idx -= 1
return resolved_value
cdef str _resolve_value_expression(self, list value_expression):
cdef Py_ssize_t idx
cdef object value
cdef list acc = []
for idx, value in enumerate(value_expression):
if (idx % 2) == 0:
acc.append(value)
else:
acc.append(self._values[value][0])
return "".join(acc)
# ResolutionStep()
#
# The context for a single iteration in variable resolution.
#
# This only exists for better performance than constructing
# and unpacking tuples.
#
cdef class ResolutionStep:
cdef str referee
cdef list value_expression
cdef ResolutionStep parent
cdef ResolutionStep prev
# init()
#
# Initialize this ResolutionStep
#
# Args:
# referee (str): The name of the referring variable
# value_expression (list): The parsed value expression to be expanded
# parent (ResolutionStep): The parent ResolutionStep
#
cdef init(self, str referee, list value_expression, ResolutionStep parent):
self.referee = referee
self.value_expression = value_expression
self.parent = parent
self.prev = None
# check_circular()
#
# Check for circular references in this step.
#
# Args:
# original_values (MappingNode): The original MappingNode for the Variables
#
# Raises:
# (LoadError): Will raise a user facing LoadError with
# LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE in case
# circular references were encountered.
#
cdef check_circular(self, MappingNode original_values):
cdef ResolutionStep step = self.parent
while step:
if self.referee is step.referee:
self._raise_circular_reference_error(step, original_values)
step = step.parent
# _raise_circular_reference_error()
#
# Helper function to construct a full report and raise the circular reference error.
#
cdef _raise_circular_reference_error(self, ResolutionStep conflict, MappingNode original_values):
cdef list error_lines = []
cdef ResolutionStep step = self
cdef ScalarNode node
cdef str referee
while step is not conflict:
if step.parent:
referee = step.parent.referee
else:
referee = self.referee
node = original_values.get_scalar(referee)
error_lines.append("{}: Variable '{}' refers to variable '{}'".format(node.get_provenance(), referee, step.referee))
step = step.parent
raise LoadError("Circular dependency detected on variable '{}'".format(self.referee),
LoadErrorReason.CIRCULAR_REFERENCE_VARIABLE,
detail="\n".join(reversed(error_lines)))
# Cache for the parsed expansion strings. While this is nominally
# something which might "waste" memory, in reality each of these
# will live as long as the element which uses it, which is the
# vast majority of the memory usage across the execution of BuildStream.
cdef dict VALUE_EXPRESSION_CACHE = {
# Prime the cache with the empty string since otherwise that can
# cause issues with the parser, complications to which cause slowdown
"": [""],
}
# Helper to parse a string into an expansion string tuple, caching
# the results so that future parse requests don't need to think about
# the string
cdef list _parse_value_expression(str instr):
cdef list ret
try:
return <list> VALUE_EXPRESSION_CACHE[instr]
except KeyError:
# This use of the regex turns a string like "foo %{bar} baz" into
# a list ["foo ", "bar", " baz"]
splits = PARSE_EXPANSION.split(instr)
# If an expansion ends the string, we get an empty string on the end
# which we can optimise away, making the expansion routines not need
# a test for this.
if splits[-1] == '':
del splits [-1]
# Cache an interned copy of this. We intern it to try and reduce the
# memory impact of the cache. It seems odd to cache the list length
# but this is measurably cheaper than calculating it each time during
# string expansion.
ret = [sys.intern(<str> s) for s in <list> splits]
VALUE_EXPRESSION_CACHE[instr] = ret
return ret
# Iterator for all flatten variables.
# Used by Variables.__iter__
cdef class _VariablesIterator:
cdef Variables _variables
cdef object _iter
def __init__(self, Variables variables):
self._variables = variables
self._iter = iter(variables._values)
def __iter__(self):
return self
def __next__(self):
name = next(self._iter)
return name, self._variables._expand_var(name)