src/buildstream/_yaml.pyx - buildstream - Git at Google

 #
 #  Copyright (C) 2018 Codethink Limited
 #  Copyright (C) 2019 Bloomberg LLP
 #
 #  This program is free software; you can redistribute it and/or
 #  modify it under the terms of the GNU Lesser General Public
 #  License as published by the Free Software Foundation; either
 #  version 2 of the License, or (at your option) any later version.
 #
 #  This library is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 #  Lesser General Public License for more details.
 #
 #  You should have received a copy of the GNU Lesser General Public
 #  License along with this library. If not, see <http://www.gnu.org/licenses/>.
 #
 #  Authors:
 #        Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
 #        Daniel Silverstone <daniel.silverstone@codethink.co.uk>
 #        James Ennis <james.ennis@codethink.co.uk>
 #        Benjamin Schubert <bschubert@bloomberg.net>

 import datetime
 import sys
 from contextlib import ExitStack
 from collections import OrderedDict
 from collections.abc import Mapping

 from ruamel import yaml

 from ._exceptions import LoadError
 from .exceptions import LoadErrorReason
 from . cimport node
 from .node cimport MappingNode, ScalarNode, SequenceNode


 # These exceptions are intended to be caught entirely within
 # the BuildStream framework, hence they do not reside in the
 # public exceptions.py

 class YAMLLoadError(Exception):
     pass


 # Represents the various states in which the Representer can be
 # while parsing yaml.
 cdef enum RepresenterState:
     doc
     init
     stream
     wait_key
     wait_list_item
     wait_value


 ctypedef RepresenterState (*representer_action)(Representer, object)

 # Representer for YAML events comprising input to the BuildStream format.
 #
 # All streams MUST represent a single document which must be a Mapping.
 # Anything else is considered an error.
 #
 # Mappings must only have string keys, values are always represented as
 # strings if they are scalar, or else as simple dictionaries and lists.
 #
 cdef class Representer:

     cdef int _file_index
     cdef RepresenterState state
     cdef list output, keys

     # Initialise a new representer
     #
     # The file index is used to store into the Node instances so that the
     # provenance of the YAML can be tracked.
     #
     # Args:
     #   file_index (int): The index of this YAML file
     def __init__(self, int file_index):
         self._file_index = file_index
         self.state = RepresenterState.init
         self.output = []
         self.keys = []

     # Handle a YAML parse event
     #
     # Args:
     #   event (YAML Event): The event to be handled
     #
     # Raises:
     #   YAMLLoadError: Something went wrong.
     cdef void handle_event(self, event) except *:
         if getattr(event, "anchor", None) is not None:
             raise YAMLLoadError("Anchors are disallowed in BuildStream at line {} column {}"
                                 .format(event.start_mark.line, event.start_mark.column))

         cdef str event_name = event.__class__.__name__

         if event_name == "ScalarEvent":
             if event.tag is not None:
                 if not event.tag.startswith("tag:yaml.org,2002:"):
                     raise YAMLLoadError(
                         "Non-core tag expressed in input.  " +
                         "This is disallowed in BuildStream. At line {} column {}"
                         .format(event.start_mark.line, event.start_mark.column))

         cdef representer_action handler = self._get_handler_for_event(event_name)
         if not handler:
             raise YAMLLoadError(
                 "Invalid input detected. No handler for {} in state {} at line {} column {}"
                 .format(event, self.state, event.start_mark.line, event.start_mark.column))

         # Cython weirdness here, we need to pass self to the function
         self.state = <RepresenterState> handler(self, event)  # pylint: disable=not-callable

     # Get the output of the YAML parse
     #
     # Returns:
     #   (Node or None): Return the Node instance of the top level mapping or
     #                   None if there wasn't one.
     cdef MappingNode get_output(self):
         if len(self.output):
             return self.output[0]
         return None

     cdef representer_action _get_handler_for_event(self, str event_name):
         if self.state == RepresenterState.wait_list_item:
             if event_name == "ScalarEvent":
                 return self._handle_wait_list_item_ScalarEvent
             elif event_name == "MappingStartEvent":
                 return self._handle_wait_list_item_MappingStartEvent
             elif event_name == "SequenceStartEvent":
                 return self._handle_wait_list_item_SequenceStartEvent
             elif event_name == "SequenceEndEvent":
                 return self._handle_wait_list_item_SequenceEndEvent
         elif self.state == RepresenterState.wait_value:
             if event_name == "ScalarEvent":
                 return self._handle_wait_value_ScalarEvent
             elif event_name == "MappingStartEvent":
                 return self._handle_wait_value_MappingStartEvent
             elif event_name == "SequenceStartEvent":
                 return self._handle_wait_value_SequenceStartEvent
         elif self.state == RepresenterState.wait_key:
             if event_name == "ScalarEvent":
                 return self._handle_wait_key_ScalarEvent
             elif event_name == "MappingEndEvent":
                 return self._handle_wait_key_MappingEndEvent
         elif self.state == RepresenterState.stream:
             if event_name == "DocumentStartEvent":
                 return self._handle_stream_DocumentStartEvent
             elif event_name == "StreamEndEvent":
                 return self._handle_stream_StreamEndEvent
         elif self.state == RepresenterState.doc:
             if event_name == "MappingStartEvent":
                 return self._handle_doc_MappingStartEvent
             elif event_name == "DocumentEndEvent":
                 return self._handle_doc_DocumentEndEvent
         elif self.state == RepresenterState.init and event_name == "StreamStartEvent":
             return self._handle_init_StreamStartEvent
         return NULL

     cdef RepresenterState _handle_init_StreamStartEvent(self, object ev):
         return RepresenterState.stream

     cdef RepresenterState _handle_stream_DocumentStartEvent(self, object ev):
         return RepresenterState.doc

     cdef RepresenterState _handle_doc_MappingStartEvent(self, object ev):
         newmap = MappingNode.__new__(MappingNode, self._file_index, ev.start_mark.line, ev.start_mark.column, {})
         self.output.append(newmap)
         return RepresenterState.wait_key

     cdef RepresenterState _handle_wait_key_ScalarEvent(self, object ev):
         self.keys.append(ev.value)
         return RepresenterState.wait_value

     cdef RepresenterState _handle_wait_value_ScalarEvent(self, object ev):
         key = self.keys.pop()
         (<MappingNode> self.output[-1]).value[key] = \
             ScalarNode.__new__(ScalarNode, self._file_index, ev.start_mark.line, ev.start_mark.column, ev.value)
         return RepresenterState.wait_key

     cdef RepresenterState _handle_wait_value_MappingStartEvent(self, object ev):
         cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
         key = self.keys.pop()
         (<MappingNode> self.output[-2]).value[key] = self.output[-1]
         return new_state

     cdef RepresenterState _handle_wait_key_MappingEndEvent(self, object ev):
         # We've finished a mapping, so pop it off the output stack
         # unless it's the last one in which case we leave it
         if len(self.output) > 1:
             self.output.pop()
             if type(self.output[-1]) is SequenceNode:
                 return RepresenterState.wait_list_item
             else:
                 return RepresenterState.wait_key
         else:
             return RepresenterState.doc

     cdef RepresenterState _handle_wait_value_SequenceStartEvent(self, object ev):
         self.output.append(SequenceNode.__new__(
             SequenceNode, self._file_index, ev.start_mark.line, ev.start_mark.column, []))
         (<MappingNode> self.output[-2]).value[self.keys[-1]] = self.output[-1]
         return RepresenterState.wait_list_item

     cdef RepresenterState _handle_wait_list_item_SequenceStartEvent(self, object ev):
         self.keys.append(len((<SequenceNode> self.output[-1]).value))
         self.output.append(SequenceNode.__new__(
             SequenceNode, self._file_index, ev.start_mark.line, ev.start_mark.column, []))
         (<SequenceNode> self.output[-2]).value.append(self.output[-1])
         return RepresenterState.wait_list_item

     cdef RepresenterState _handle_wait_list_item_SequenceEndEvent(self, object ev):
         # When ending a sequence, we need to pop a key because we retain the
         # key until the end so that if we need to mutate the underlying entry
         # we can.
         key = self.keys.pop()
         self.output.pop()
         if type(key) is int:
             return RepresenterState.wait_list_item
         else:
             return RepresenterState.wait_key

     cdef RepresenterState _handle_wait_list_item_ScalarEvent(self, object ev):
         (<SequenceNode> self.output[-1]).value.append(
            ScalarNode.__new__(ScalarNode, self._file_index, ev.start_mark.line, ev.start_mark.column, ev.value))
         return RepresenterState.wait_list_item

     cdef RepresenterState _handle_wait_list_item_MappingStartEvent(self, object ev):
         cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
         (<SequenceNode> self.output[-2]).value.append(self.output[-1])
         return new_state

     cdef RepresenterState _handle_doc_DocumentEndEvent(self, object ev):
         if len(self.output) != 1:
             raise YAMLLoadError("Zero, or more than one document found in YAML stream")
         return RepresenterState.stream

     cdef RepresenterState _handle_stream_StreamEndEvent(self, object ev):
         return RepresenterState.init


 # Loads a dictionary from some YAML
 #
 # Args:
 #    filename (str): The YAML file to load
 #    shortname (str): The filename in shorthand for error reporting (or None)
 #    copy_tree (bool): Whether to make a copy, preserving the original toplevels
 #                      for later serialization
 #    project (Project): The (optional) project to associate the parsed YAML with
 #
 # Returns (dict): A loaded copy of the YAML file with provenance information
 #
 # Raises: LoadError
 #
 cpdef MappingNode load(str filename, str shortname, bint copy_tree=False, object project=None):
     cdef MappingNode data

     if not shortname:
         shortname = filename

     cdef str displayname
     if (project is not None) and (project.junction is not None):
         displayname = "{}:{}".format(project.junction.name, shortname)
     else:
         displayname = shortname

     cdef Py_ssize_t file_number = node._create_new_file(filename, shortname, displayname, project)

     try:
         with open(filename) as f:
             contents = f.read()

         data = load_data(contents,
                          file_index=file_number,
                          file_name=filename,
                          copy_tree=copy_tree)

         return data
     except FileNotFoundError as e:
         raise LoadError("Could not find file at {}".format(filename),
                         LoadErrorReason.MISSING_FILE) from e
     except IsADirectoryError as e:
         raise LoadError("{} is a directory".format(filename),
                         LoadErrorReason.LOADING_DIRECTORY) from e
     except LoadError as e:
         raise LoadError("{}: {}".format(displayname, e), e.reason) from e


 # Like load(), but doesnt require the data to be in a file
 #
 cpdef MappingNode load_data(str data, int file_index=node._SYNTHETIC_FILE_INDEX, str file_name=None, bint copy_tree=False):
     cdef Representer rep

     try:
         rep = Representer(file_index)
         parser = yaml.CParser(data)

         try:
             while parser.check_event():
                 rep.handle_event(parser.get_event())
         finally:
             parser.dispose()

         contents = rep.get_output()
     except YAMLLoadError as e:
         raise LoadError("Malformed YAML:\n\n{}\n\n".format(e),
                         LoadErrorReason.INVALID_YAML) from e
     except Exception as e:
         raise LoadError("Severely malformed YAML:\n\n{}\n\n".format(e),
                         LoadErrorReason.INVALID_YAML) from e

     if type(contents) != MappingNode:
         # Special case allowance for None, when the loaded file has only comments in it.
         if contents is None:
             contents = MappingNode.__new__(MappingNode, file_index, 0, 0, {})
         else:
             raise LoadError("YAML file has content of type '{}' instead of expected type 'dict': {}"
                             .format(type(contents[0]).__name__, file_name),
                             LoadErrorReason.INVALID_YAML)

     # Store this away because we'll use it later for "top level" provenance
     node._set_root_node_for_file(file_index, contents)

     if copy_tree:
         contents = contents.clone()
     return contents


 ###############################################################################

 # Roundtrip code

 # Represent Nodes automatically

 def represent_mapping(self, MappingNode mapping):
     return self.represent_dict(mapping.value)

 def represent_scalar(self, ScalarNode scalar):
     return self.represent_str(scalar.value)

 def represent_sequence(self, SequenceNode sequence):
     return self.represent_list(sequence.value)


 yaml.RoundTripRepresenter.add_representer(MappingNode, represent_mapping)
 yaml.RoundTripRepresenter.add_representer(ScalarNode, represent_scalar)
 yaml.RoundTripRepresenter.add_representer(SequenceNode, represent_sequence)

 # Represent simple types as strings

 def represent_as_str(self, value):
     return self.represent_str(str(value))

 yaml.RoundTripRepresenter.add_representer(type(None), represent_as_str)
 yaml.RoundTripRepresenter.add_representer(int, represent_as_str)
 yaml.RoundTripRepresenter.add_representer(float, represent_as_str)
 yaml.RoundTripRepresenter.add_representer(bool, represent_as_str)
 yaml.RoundTripRepresenter.add_representer(datetime.datetime, represent_as_str)
 yaml.RoundTripRepresenter.add_representer(datetime.date, represent_as_str)

 # Always represent things consistently:

 yaml.RoundTripRepresenter.add_representer(OrderedDict,
                                           yaml.SafeRepresenter.represent_dict)

 # Always parse things consistently

 yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int',
                                           yaml.RoundTripConstructor.construct_yaml_str)
 yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:float',
                                           yaml.RoundTripConstructor.construct_yaml_str)
 yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:bool',
                                           yaml.RoundTripConstructor.construct_yaml_str)
 yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:null',
                                           yaml.RoundTripConstructor.construct_yaml_str)
 yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:timestamp',
                                           yaml.RoundTripConstructor.construct_yaml_str)


 # HardlineDumper
 #
 # This is a dumper used during roundtrip_dump which forces every scalar to be
 # a plain string, in order to match the output format to the input format.
 #
 # If you discover something is broken, please add a test case to the roundtrip
 # test in tests/internals/yaml/roundtrip-test.yaml
 #
 class HardlineDumper(yaml.RoundTripDumper):
     def __init__(self, *args, **kwargs):
         yaml.RoundTripDumper.__init__(self, *args, **kwargs)
         # For each of YAML 1.1 and 1.2, force everything to be a plain string
         for version in [(1, 1), (1, 2), None]:
             self.add_version_implicit_resolver(
                 version,
                 u'tag:yaml.org,2002:str',
                 yaml.util.RegExp(r'.*'),
                 None)


 # roundtrip_load()
 #
 # Load a YAML file into memory in a form which allows roundtripping as best
 # as ruamel permits.
 #
 # Note, the returned objects can be treated as Mappings and Lists and Strings
 # but replacing content wholesale with plain dicts and lists may result
 # in a loss of comments and formatting.
 #
 # Args:
 #    filename (str): The file to load in
 #    allow_missing (bool): Optionally set this to True to allow missing files
 #
 # Returns:
 #    (Mapping): The loaded YAML mapping.
 #
 # Raises:
 #    (LoadError): If the file is missing, or a directory, this is raised.
 #                 Also if the YAML is malformed.
 #
 def roundtrip_load(filename, *, allow_missing=False):
     try:
         with open(filename, "r") as fh:
             data = fh.read()
         contents = roundtrip_load_data(data, filename=filename)
     except FileNotFoundError as e:
         if allow_missing:
             # Missing files are always empty dictionaries
             return {}
         else:
             raise LoadError("Could not find file at {}".format(filename),
                             LoadErrorReason.MISSING_FILE) from e
     except IsADirectoryError as e:
         raise LoadError("{} is a directory.".format(filename),
                         LoadErrorReason.LOADING_DIRECTORY) from e
     return contents


 # roundtrip_load_data()
 #
 # Parse the given contents as YAML, returning them as a roundtrippable data
 # structure.
 #
 # A lack of content will be returned as an empty mapping.
 #
 # Args:
 #    contents (str): The contents to be parsed as YAML
 #    filename (str): Optional filename to be used in error reports
 #
 # Returns:
 #    (Mapping): The loaded YAML mapping
 #
 # Raises:
 #    (LoadError): Raised on invalid YAML, or YAML which parses to something other
 #                 than a Mapping
 #
 def roundtrip_load_data(contents, *, filename=None):
     try:
         contents = yaml.load(contents, yaml.RoundTripLoader, preserve_quotes=True)
     except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
         raise LoadError("Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark),
                         LoadErrorReason.INVALID_YAML) from e

     # Special case empty files at this point
     if contents is None:
         # We'll make them empty mappings like the main Node loader
         contents = {}

     if not isinstance(contents, Mapping):
         raise LoadError("YAML file has content of type '{}' instead of expected type 'dict': {}"
                         .format(type(contents).__name__, filename), LoadErrorReason.INVALID_YAML)

     return contents


 # roundtrip_dump()
 #
 # Dumps the given contents as a YAML file.  Ideally the contents came from
 # parsing with `roundtrip_load` or `roundtrip_load_data` so that they will be
 # dumped in the same form as they came from.
 #
 # If `file` is a string, it is the filename to write to, if `file` has a
 # `write` method, it's treated as a stream, otherwise output is to stdout.
 #
 # Args:
 #    contents (Mapping or list): The content to write out as YAML.
 #    file (any): The file to write to
 #
 def roundtrip_dump(contents, file=None):
     with ExitStack() as stack:
         if type(file) is str:
             from . import utils
             f = stack.enter_context(utils.save_file_atomic(file, 'w'))
         elif hasattr(file, 'write'):
             f = file
         else:
             f = sys.stdout
         yaml.round_trip_dump(contents, f, Dumper=HardlineDumper)
	#
	# Copyright (C) 2018 Codethink Limited
	# Copyright (C) 2019 Bloomberg LLP
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU Lesser General Public
	# License as published by the Free Software Foundation; either
	# version 2 of the License, or (at your option) any later version.
	#
	# This library is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with this library. If not, see <http://www.gnu.org/licenses/>.
	#
	# Authors:
	# Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
	# Daniel Silverstone <daniel.silverstone@codethink.co.uk>
	# James Ennis <james.ennis@codethink.co.uk>
	# Benjamin Schubert <bschubert@bloomberg.net>

	import datetime
	import sys
	from contextlib import ExitStack
	from collections import OrderedDict
	from collections.abc import Mapping

	from ruamel import yaml

	from ._exceptions import LoadError
	from .exceptions import LoadErrorReason
	from . cimport node
	from .node cimport MappingNode, ScalarNode, SequenceNode


	# These exceptions are intended to be caught entirely within
	# the BuildStream framework, hence they do not reside in the
	# public exceptions.py

	class YAMLLoadError(Exception):
	pass


	# Represents the various states in which the Representer can be
	# while parsing yaml.
	cdef enum RepresenterState:
	doc
	init
	stream
	wait_key
	wait_list_item
	wait_value


	ctypedef RepresenterState (*representer_action)(Representer, object)

	# Representer for YAML events comprising input to the BuildStream format.
	#
	# All streams MUST represent a single document which must be a Mapping.
	# Anything else is considered an error.
	#
	# Mappings must only have string keys, values are always represented as
	# strings if they are scalar, or else as simple dictionaries and lists.
	#
	cdef class Representer:

	cdef int _file_index
	cdef RepresenterState state
	cdef list output, keys

	# Initialise a new representer
	#
	# The file index is used to store into the Node instances so that the
	# provenance of the YAML can be tracked.
	#
	# Args:
	# file_index (int): The index of this YAML file
	def __init__(self, int file_index):
	self._file_index = file_index
	self.state = RepresenterState.init
	self.output = []
	self.keys = []

	# Handle a YAML parse event
	#
	# Args:
	# event (YAML Event): The event to be handled
	#
	# Raises:
	# YAMLLoadError: Something went wrong.
	cdef void handle_event(self, event) except *:
	if getattr(event, "anchor", None) is not None:
	raise YAMLLoadError("Anchors are disallowed in BuildStream at line {} column {}"
	.format(event.start_mark.line, event.start_mark.column))

	cdef str event_name = event.__class__.__name__

	if event_name == "ScalarEvent":
	if event.tag is not None:
	if not event.tag.startswith("tag:yaml.org,2002:"):
	raise YAMLLoadError(
	"Non-core tag expressed in input. " +
	"This is disallowed in BuildStream. At line {} column {}"
	.format(event.start_mark.line, event.start_mark.column))

	cdef representer_action handler = self._get_handler_for_event(event_name)
	if not handler:
	raise YAMLLoadError(
	"Invalid input detected. No handler for {} in state {} at line {} column {}"
	.format(event, self.state, event.start_mark.line, event.start_mark.column))

	# Cython weirdness here, we need to pass self to the function
	self.state = <RepresenterState> handler(self, event) # pylint: disable=not-callable

	# Get the output of the YAML parse
	#
	# Returns:
	# (Node or None): Return the Node instance of the top level mapping or
	# None if there wasn't one.
	cdef MappingNode get_output(self):
	if len(self.output):
	return self.output[0]
	return None

	cdef representer_action _get_handler_for_event(self, str event_name):
	if self.state == RepresenterState.wait_list_item:
	if event_name == "ScalarEvent":
	return self._handle_wait_list_item_ScalarEvent
	elif event_name == "MappingStartEvent":
	return self._handle_wait_list_item_MappingStartEvent
	elif event_name == "SequenceStartEvent":
	return self._handle_wait_list_item_SequenceStartEvent
	elif event_name == "SequenceEndEvent":
	return self._handle_wait_list_item_SequenceEndEvent
	elif self.state == RepresenterState.wait_value:
	if event_name == "ScalarEvent":
	return self._handle_wait_value_ScalarEvent
	elif event_name == "MappingStartEvent":
	return self._handle_wait_value_MappingStartEvent
	elif event_name == "SequenceStartEvent":
	return self._handle_wait_value_SequenceStartEvent
	elif self.state == RepresenterState.wait_key:
	if event_name == "ScalarEvent":
	return self._handle_wait_key_ScalarEvent
	elif event_name == "MappingEndEvent":
	return self._handle_wait_key_MappingEndEvent
	elif self.state == RepresenterState.stream:
	if event_name == "DocumentStartEvent":
	return self._handle_stream_DocumentStartEvent
	elif event_name == "StreamEndEvent":
	return self._handle_stream_StreamEndEvent
	elif self.state == RepresenterState.doc:
	if event_name == "MappingStartEvent":
	return self._handle_doc_MappingStartEvent
	elif event_name == "DocumentEndEvent":
	return self._handle_doc_DocumentEndEvent
	elif self.state == RepresenterState.init and event_name == "StreamStartEvent":
	return self._handle_init_StreamStartEvent
	return NULL

	cdef RepresenterState _handle_init_StreamStartEvent(self, object ev):
	return RepresenterState.stream

	cdef RepresenterState _handle_stream_DocumentStartEvent(self, object ev):
	return RepresenterState.doc

	cdef RepresenterState _handle_doc_MappingStartEvent(self, object ev):
	newmap = MappingNode.__new__(MappingNode, self._file_index, ev.start_mark.line, ev.start_mark.column, {})
	self.output.append(newmap)
	return RepresenterState.wait_key

	cdef RepresenterState _handle_wait_key_ScalarEvent(self, object ev):
	self.keys.append(ev.value)
	return RepresenterState.wait_value

	cdef RepresenterState _handle_wait_value_ScalarEvent(self, object ev):
	key = self.keys.pop()
	(<MappingNode> self.output[-1]).value[key] = \
	ScalarNode.__new__(ScalarNode, self._file_index, ev.start_mark.line, ev.start_mark.column, ev.value)
	return RepresenterState.wait_key

	cdef RepresenterState _handle_wait_value_MappingStartEvent(self, object ev):
	cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
	key = self.keys.pop()
	(<MappingNode> self.output[-2]).value[key] = self.output[-1]
	return new_state

	cdef RepresenterState _handle_wait_key_MappingEndEvent(self, object ev):
	# We've finished a mapping, so pop it off the output stack
	# unless it's the last one in which case we leave it
	if len(self.output) > 1:
	self.output.pop()
	if type(self.output[-1]) is SequenceNode:
	return RepresenterState.wait_list_item
	else:
	return RepresenterState.wait_key
	else:
	return RepresenterState.doc

	cdef RepresenterState _handle_wait_value_SequenceStartEvent(self, object ev):
	self.output.append(SequenceNode.__new__(
	SequenceNode, self._file_index, ev.start_mark.line, ev.start_mark.column, []))
	(<MappingNode> self.output[-2]).value[self.keys[-1]] = self.output[-1]
	return RepresenterState.wait_list_item

	cdef RepresenterState _handle_wait_list_item_SequenceStartEvent(self, object ev):
	self.keys.append(len((<SequenceNode> self.output[-1]).value))
	self.output.append(SequenceNode.__new__(
	SequenceNode, self._file_index, ev.start_mark.line, ev.start_mark.column, []))
	(<SequenceNode> self.output[-2]).value.append(self.output[-1])
	return RepresenterState.wait_list_item

	cdef RepresenterState _handle_wait_list_item_SequenceEndEvent(self, object ev):
	# When ending a sequence, we need to pop a key because we retain the
	# key until the end so that if we need to mutate the underlying entry
	# we can.
	key = self.keys.pop()
	self.output.pop()
	if type(key) is int:
	return RepresenterState.wait_list_item
	else:
	return RepresenterState.wait_key

	cdef RepresenterState _handle_wait_list_item_ScalarEvent(self, object ev):
	(<SequenceNode> self.output[-1]).value.append(
	ScalarNode.__new__(ScalarNode, self._file_index, ev.start_mark.line, ev.start_mark.column, ev.value))
	return RepresenterState.wait_list_item

	cdef RepresenterState _handle_wait_list_item_MappingStartEvent(self, object ev):
	cdef RepresenterState new_state = self._handle_doc_MappingStartEvent(ev)
	(<SequenceNode> self.output[-2]).value.append(self.output[-1])
	return new_state

	cdef RepresenterState _handle_doc_DocumentEndEvent(self, object ev):
	if len(self.output) != 1:
	raise YAMLLoadError("Zero, or more than one document found in YAML stream")
	return RepresenterState.stream

	cdef RepresenterState _handle_stream_StreamEndEvent(self, object ev):
	return RepresenterState.init


	# Loads a dictionary from some YAML
	#
	# Args:
	# filename (str): The YAML file to load
	# shortname (str): The filename in shorthand for error reporting (or None)
	# copy_tree (bool): Whether to make a copy, preserving the original toplevels
	# for later serialization
	# project (Project): The (optional) project to associate the parsed YAML with
	#
	# Returns (dict): A loaded copy of the YAML file with provenance information
	#
	# Raises: LoadError
	#
	cpdef MappingNode load(str filename, str shortname, bint copy_tree=False, object project=None):
	cdef MappingNode data

	if not shortname:
	shortname = filename

	cdef str displayname
	if (project is not None) and (project.junction is not None):
	displayname = "{}:{}".format(project.junction.name, shortname)
	else:
	displayname = shortname

	cdef Py_ssize_t file_number = node._create_new_file(filename, shortname, displayname, project)

	try:
	with open(filename) as f:
	contents = f.read()

	data = load_data(contents,
	file_index=file_number,
	file_name=filename,
	copy_tree=copy_tree)

	return data
	except FileNotFoundError as e:
	raise LoadError("Could not find file at {}".format(filename),
	LoadErrorReason.MISSING_FILE) from e
	except IsADirectoryError as e:
	raise LoadError("{} is a directory".format(filename),
	LoadErrorReason.LOADING_DIRECTORY) from e
	except LoadError as e:
	raise LoadError("{}: {}".format(displayname, e), e.reason) from e


	# Like load(), but doesnt require the data to be in a file
	#
	cpdef MappingNode load_data(str data, int file_index=node._SYNTHETIC_FILE_INDEX, str file_name=None, bint copy_tree=False):
	cdef Representer rep

	try:
	rep = Representer(file_index)
	parser = yaml.CParser(data)

	try:
	while parser.check_event():
	rep.handle_event(parser.get_event())
	finally:
	parser.dispose()

	contents = rep.get_output()
	except YAMLLoadError as e:
	raise LoadError("Malformed YAML:\n\n{}\n\n".format(e),
	LoadErrorReason.INVALID_YAML) from e
	except Exception as e:
	raise LoadError("Severely malformed YAML:\n\n{}\n\n".format(e),
	LoadErrorReason.INVALID_YAML) from e

	if type(contents) != MappingNode:
	# Special case allowance for None, when the loaded file has only comments in it.
	if contents is None:
	contents = MappingNode.__new__(MappingNode, file_index, 0, 0, {})
	else:
	raise LoadError("YAML file has content of type '{}' instead of expected type 'dict': {}"
	.format(type(contents[0]).__name__, file_name),
	LoadErrorReason.INVALID_YAML)

	# Store this away because we'll use it later for "top level" provenance
	node._set_root_node_for_file(file_index, contents)

	if copy_tree:
	contents = contents.clone()
	return contents


	###############################################################################

	# Roundtrip code

	# Represent Nodes automatically

	def represent_mapping(self, MappingNode mapping):
	return self.represent_dict(mapping.value)

	def represent_scalar(self, ScalarNode scalar):
	return self.represent_str(scalar.value)

	def represent_sequence(self, SequenceNode sequence):
	return self.represent_list(sequence.value)


	yaml.RoundTripRepresenter.add_representer(MappingNode, represent_mapping)
	yaml.RoundTripRepresenter.add_representer(ScalarNode, represent_scalar)
	yaml.RoundTripRepresenter.add_representer(SequenceNode, represent_sequence)

	# Represent simple types as strings

	def represent_as_str(self, value):
	return self.represent_str(str(value))

	yaml.RoundTripRepresenter.add_representer(type(None), represent_as_str)
	yaml.RoundTripRepresenter.add_representer(int, represent_as_str)
	yaml.RoundTripRepresenter.add_representer(float, represent_as_str)
	yaml.RoundTripRepresenter.add_representer(bool, represent_as_str)
	yaml.RoundTripRepresenter.add_representer(datetime.datetime, represent_as_str)
	yaml.RoundTripRepresenter.add_representer(datetime.date, represent_as_str)

	# Always represent things consistently:

	yaml.RoundTripRepresenter.add_representer(OrderedDict,
	yaml.SafeRepresenter.represent_dict)

	# Always parse things consistently

	yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int',
	yaml.RoundTripConstructor.construct_yaml_str)
	yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:float',
	yaml.RoundTripConstructor.construct_yaml_str)
	yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:bool',
	yaml.RoundTripConstructor.construct_yaml_str)
	yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:null',
	yaml.RoundTripConstructor.construct_yaml_str)
	yaml.RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:timestamp',
	yaml.RoundTripConstructor.construct_yaml_str)


	# HardlineDumper
	#
	# This is a dumper used during roundtrip_dump which forces every scalar to be
	# a plain string, in order to match the output format to the input format.
	#
	# If you discover something is broken, please add a test case to the roundtrip
	# test in tests/internals/yaml/roundtrip-test.yaml
	#
	class HardlineDumper(yaml.RoundTripDumper):
	def __init__(self, args, *kwargs):
	yaml.RoundTripDumper.__init__(self, args, *kwargs)
	# For each of YAML 1.1 and 1.2, force everything to be a plain string
	for version in [(1, 1), (1, 2), None]:
	self.add_version_implicit_resolver(
	version,
	u'tag:yaml.org,2002:str',
	yaml.util.RegExp(r'.*'),
	None)


	# roundtrip_load()
	#
	# Load a YAML file into memory in a form which allows roundtripping as best
	# as ruamel permits.
	#
	# Note, the returned objects can be treated as Mappings and Lists and Strings
	# but replacing content wholesale with plain dicts and lists may result
	# in a loss of comments and formatting.
	#
	# Args:
	# filename (str): The file to load in
	# allow_missing (bool): Optionally set this to True to allow missing files
	#
	# Returns:
	# (Mapping): The loaded YAML mapping.
	#
	# Raises:
	# (LoadError): If the file is missing, or a directory, this is raised.
	# Also if the YAML is malformed.
	#
	def roundtrip_load(filename, *, allow_missing=False):
	try:
	with open(filename, "r") as fh:
	data = fh.read()
	contents = roundtrip_load_data(data, filename=filename)
	except FileNotFoundError as e:
	if allow_missing:
	# Missing files are always empty dictionaries
	return {}
	else:
	raise LoadError("Could not find file at {}".format(filename),
	LoadErrorReason.MISSING_FILE) from e
	except IsADirectoryError as e:
	raise LoadError("{} is a directory.".format(filename),
	LoadErrorReason.LOADING_DIRECTORY) from e
	return contents


	# roundtrip_load_data()
	#
	# Parse the given contents as YAML, returning them as a roundtrippable data
	# structure.
	#
	# A lack of content will be returned as an empty mapping.
	#
	# Args:
	# contents (str): The contents to be parsed as YAML
	# filename (str): Optional filename to be used in error reports
	#
	# Returns:
	# (Mapping): The loaded YAML mapping
	#
	# Raises:
	# (LoadError): Raised on invalid YAML, or YAML which parses to something other
	# than a Mapping
	#
	def roundtrip_load_data(contents, *, filename=None):
	try:
	contents = yaml.load(contents, yaml.RoundTripLoader, preserve_quotes=True)
	except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
	raise LoadError("Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark),
	LoadErrorReason.INVALID_YAML) from e

	# Special case empty files at this point
	if contents is None:
	# We'll make them empty mappings like the main Node loader
	contents = {}

	if not isinstance(contents, Mapping):
	raise LoadError("YAML file has content of type '{}' instead of expected type 'dict': {}"
	.format(type(contents).__name__, filename), LoadErrorReason.INVALID_YAML)

	return contents


	# roundtrip_dump()
	#
	# Dumps the given contents as a YAML file. Ideally the contents came from
	# parsing with `roundtrip_load` or `roundtrip_load_data` so that they will be
	# dumped in the same form as they came from.
	#
	# If `file` is a string, it is the filename to write to, if `file` has a
	# `write` method, it's treated as a stream, otherwise output is to stdout.
	#
	# Args:
	# contents (Mapping or list): The content to write out as YAML.
	# file (any): The file to write to
	#
	def roundtrip_dump(contents, file=None):
	with ExitStack() as stack:
	if type(file) is str:
	from . import utils
	f = stack.enter_context(utils.save_file_atomic(file, 'w'))
	elif hasattr(file, 'write'):
	f = file
	else:
	f = sys.stdout
	yaml.round_trip_dump(contents, f, Dumper=HardlineDumper)