build-support/iwyu/fix_includes.py - kudu - Git at Google

 #!/usr/bin/env python

 ##===--- fix_includes.py - rewrite source files based on iwyu output ------===##
 #
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##

 # Modified for Kudu to suit a few idiosyncrasies of the Kudu style.
 # For a detailed changelog, refer to the git log on this file.

 from __future__ import print_function

 """Update files with the 'correct' #include and forward-declare lines.

 Given the output of include_what_you_use on stdin -- when run at the
 (default) --v=1 verbosity level or higher -- modify the files
 mentioned in the output, removing their old #include lines and
 replacing them with the lines given by the include_what_you_use
 script.

 This script runs in four stages.  In the first, it groups physical
 lines together to form 'move spans'.  A 'move span' is the atomic unit
 for moving or deleting code.  A move span is either a) an #include
 line, along with any comment lines immediately preceding it; b) a
 forward-declare line -- or more if it's a multi-line forward declare
 -- along with preceding comments; c) any other single line.  Example:

    // I really am glad I'm forward-declaring this class!
    // If I didn't, I'd have to #include the entire world.
    template<typename A, typename B, typename C, typename D>
    class MyClass;

 Then, it groups move spans together into 'reorder spans'.  These are
 spans of code that consist entirely of #includes and forward-declares,
 maybe separated by blank lines and comments.  We assume that we can
 arbitrarily reorder #includes and forward-declares within a reorder
 span, without affecting correctness.  Things like #ifdefs, #defines,
 namespace declarations, static variable declarations, class
 definitions, etc -- just about anything -- break up reorder spans.

 In stage 3 it deletes all #include and forward-declare lines that iwyu
 says to delete.  iwyu includes line numbers for deletion, making this
 part easy.  If this step results in "empty" #ifdefs or namespaces
 (#ifdefs or namespaces with no code inside them), we delete those as
 well.  We recalculate the reorder spans, which may have gotten bigger
 due to the deleted code.

 In stage 4 it adds new iwyu-dictated #includes and forward-declares
 after the last existing #includes and forward-declares.  Then it
 reorders the #includes and forward-declares to match the order
 specified by iwyu.  It follows iwyu's instructions as much as
 possible, modulo the constraint that an #include or forward-declare
 cannot leave its current reorder span.

 All this moving messes up the blank lines, which we then need to fix
 up.  Then we're done!
 """

 __author__ = 'csilvers@google.com (Craig Silverstein)'

 import difflib
 import optparse
 import os
 import re
 import sys
 from collections import OrderedDict

 _USAGE = """\
 %prog [options] [filename] ... < <output from include-what-you-use script>
     OR %prog -s [other options] <filename> ...

 %prog reads the output from the include-what-you-use
 script on stdin -- run with --v=1 (default) verbose or above -- and,
 unless --sort_only or --dry_run is specified,
 modifies the files mentioned in the output, removing their old
 #include lines and replacing them with the lines given by the
 include_what_you_use script.  It also sorts the #include and
 forward-declare lines.

 All files mentioned in the include-what-you-use script are modified,
 unless filenames are specified on the commandline, in which case only
 those files are modified.

 The exit code is the number of files that were modified (or that would
 be modified if --dry_run was specified) unless that number exceeds 100,
 in which case 100 is returned.

 ------------------------------------------------------------
 NOTE: For usage in Kudu, typically this script should be run using the
 wrapper in build-support/fix_includes.py which sets up the arguments
 appropriately for common tasks.
 ------------------------------------------------------------
 """

 _COMMENT_RE = re.compile(r'\s*//.*')

 # These are the types of lines a file can have.  These are matched
 # using re.match(), so don't need a leading ^.
 _C_COMMENT_START_RE = re.compile(r'\s*/\*')
 _C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$')
 _COMMENT_LINE_RE = re.compile(r'\s*//')
 _PRAGMA_ONCE_LINE_RE = re.compile(r'\s*#\s*pragma\s+once')
 _BLANK_LINE_RE = re.compile(r'\s*$')
 _IF_RE = re.compile(r'\s*#\s*if')               # compiles #if/ifdef/ifndef
 _ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b')  # compiles #else/elif
 _ENDIF_RE = re.compile(r'\s*#\s*endif\b')
 # This is used to delete 'empty' namespaces after fwd-decls are removed.
 # Some third-party libraries use macros to start/end namespaces.
 _NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|'
                                  r'\s*(U_NAMESPACE_BEGIN)|'
                                  r'\s*(HASH_NAMESPACE_DECLARATION_START)')
 _NAMESPACE_END_RE = re.compile(r'\s*(})|'
                                r'\s*(U_NAMESPACE_END)|'
                                r'\s*(HASH_NAMESPACE_DECLARATION_END)')
 # The group (in parens) holds the unique 'key' identifying this #include.
 _INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^"">]+[>"])')
 # We don't need this to actually match forward-declare lines (we get
 # that information from the iwyu input), but we do need an RE here to
 # serve as an index to _LINE_TYPES.  So we use an RE that never matches.
 _FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE')
 # Likewise, used to mark an '#ifdef' line of a header guard, or other
 # #ifdef that covers an entire file.
 _HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
 # Marks the '#define' line that comes after a header guard.  Since we
 # know the previous line was a header-guard line, we're not that picky
 # about this one.
 _HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')

 # Infixes used in test filenames like 'foo-test.cc'. These will be
 # canonicalized to be equivalent to  'foo.cc' and thus ensure that 'foo.h'
 # be the first include.
 _TEST_INFIX_RE = re.compile(r'([-_]unittest|[-_]regtest|[-_]test)$')

 # We annotate every line in the source file by the re it matches, or None.
 # Note that not all of the above RE's are represented here; for instance,
 # we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
 _LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE,
                _NAMESPACE_START_RE, _NAMESPACE_END_RE,
                _IF_RE, _ELSE_RE, _ENDIF_RE,
                _INCLUDE_RE, _FORWARD_DECLARE_RE,
                _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
                _PRAGMA_ONCE_LINE_RE,
               ]

 # A regexp matching #include lines that should be a barrier for
 # sorting -- that is, we should never reorganize the code so an
 # #include that used to come before this line now comes after, or vice
 # versa.  This can be used for 'fragile' #includes that require other
 # #includes to happen before them to function properly.
 # (Note that the barrier has no effect on where new #includes are
 # added; it just affects the reordering of existing #includes.)
 _BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(<linux/)')

 # A list of all known extensions for C++ source files, used to
 # guess if a filename is a source file or a header.
 # Please keep this in sync with source_extensions in iwyu_path_util.cc.
 _SOURCE_EXTENSIONS = [".c", ".C", ".cc", ".CC", ".cxx", ".CXX",
                       ".cpp", ".CPP", ".c++", ".C++", ".cp"]

 # Lines matching this pattern will not be deleted even if they look
 # like duplicate includes.
 _IWYU_PRAGMA_KEEP_RE = re.compile(r'IWYU pragma:\s+keep')


 # Adapt Python 2 iterators to Python 3 syntax
 if sys.version_info[0] < 3:
   def next(i):
     return i.next()


 class OrderedSet(object):
   """ Sometimes sets affect order of outputs, which hinders testing. This
   (naive) set implementation preserves order to avoid that problem. """
   def __init__(self, iterable=None):
     iterable = iterable or []
     self.storage = OrderedDict((a, None) for a in iterable)

   def add(self, value):
     self.storage[value] = None

   def intersection_update(self, other):
     self.storage = OrderedDict(
         (k, None) for k in self.storage if k in other.storage)

   def update(self, other):
     self.storage.update(other.storage)

   def difference(self, other):
     diff_values = (v for v in self if v not in other)
     return OrderedSet(diff_values)

   def __iter__(self):
     return self.storage.keys().__iter__()

   def __contains__(self, value):
     return value in self.storage

   def __len__(self):
     return len(self.storage)


 def _MayBeHeaderFile(filename):
   """Tries to figure out if filename is a C++ header file.  Defaults to yes."""
   # Header files have all sorts of extensions: .h, .hpp, .hxx, or no
   # extension at all.  So we say everything is a header file unless it
   # has a known extension that's not.
   extension = os.path.splitext(filename)[1]
   return extension not in _SOURCE_EXTENSIONS


 class FixIncludesError(Exception):
   pass


 class IWYUOutputRecord(object):
   """Information that the iwyu output file has about one source file."""

   def __init__(self, filename):
     self.filename = filename

     # A set of integers.
     self.lines_to_delete = set()

     # A set of integer line-numbers, for each #include iwyu saw that
     # is marked with a line number.  This is usually not an exhaustive
     # list of include-lines, but that's ok because we only use this
     # data structure for sanity checking: we double-check with our own
     # analysis that these lines are all # #include lines.  If not, we
     # know the iwyu data is likely out of date, and we complain.  So
     # more data here is always welcome, but not essential.
     self.some_include_lines = set()

     # A set of integer line-number spans [start_line, end_line), for
     # each forward-declare iwyu saw.  iwyu reports line numbers for
     # every forward-declare it sees in the source code.  (It won't
     # report, though, forward-declares inside '#if 0' or similar.)
     self.seen_forward_declare_lines = set()

     # A set of each line in the iwyu 'add' section.
     self.includes_and_forward_declares_to_add = OrderedSet()

     # A map from the include filename (including ""s or <>s) to the
     # full line as given by iwyu, which includes comments that iwyu
     # has put next to the #include.  This holds both 'to-add' and
     # 'to-keep' #includes.  If flags.comments is False, the comments
     # are removed before adding to this list.
     self.full_include_lines = OrderedDict()

   def Merge(self, other):
     """Merges other with this one.  They must share a filename.

     This function is intended to be used when we see two iwyu records
     in the input, both for the same file.  We can merge the two together.
     We are conservative: we union the lines to add, and intersect the
     lines to delete.

     Arguments:
       other: an IWYUOutputRecord to merge into this one.
         It must have the same value for filename that self does.
     """
     assert self.filename == other.filename, "Can't merge distinct files"
     self.lines_to_delete.intersection_update(other.lines_to_delete)
     self.some_include_lines.update(other.some_include_lines)
     self.seen_forward_declare_lines.update(other.seen_forward_declare_lines)
     self.includes_and_forward_declares_to_add.update(
         other.includes_and_forward_declares_to_add)
     self.full_include_lines.update(other.full_include_lines)

   def HasContentfulChanges(self):
     """Returns true iff this record has at least one add or delete."""
     return (self.includes_and_forward_declares_to_add or
             self.lines_to_delete)

   def __str__(self):
     return ('--- iwyu record ---\n  FILENAME: %s\n  LINES TO DELETE: %s\n'
             '  (SOME) INCLUDE LINES: %s\n  (SOME) FWD-DECL LINES: %s\n'
             '  TO ADD: %s\n  ALL INCLUDES: %s\n---\n'
             % (self.filename, self.lines_to_delete,
                self.some_include_lines, self.seen_forward_declare_lines,
                self.includes_and_forward_declares_to_add,
                self.full_include_lines))


 class IWYUOutputParser(object):
   """Parses the lines in iwyu output corresponding to one source file."""

   # iwyu adds this comment to some lines to map them to the source file.
   _LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)')

   # The output of include-what-you-use has sections that indicate what
   # #includes and forward-declares should be added to the output file,
   # what should be removed, and what the end result is.  The first line
   # of each section also has the filename.
   _ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$')
   _REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$')
   _TOTAL_SECTION_RE = re.compile(r'^The full include-list for (.*):$')
   _SECTION_END_RE = re.compile(r'^---$')

   # Alternately, if a file does not need any iwyu modifications (though
   # it still may need its #includes sorted), iwyu will emit this:
   _NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$')

   _RE_TO_NAME = {_ADD_SECTION_RE: 'add',
                  _REMOVE_SECTION_RE: 'remove',
                  _TOTAL_SECTION_RE: 'total',
                  _SECTION_END_RE: 'end',
                  _NO_EDITS_RE: 'no_edits',
                 }
   # A small state-transition machine.  key==None indicates the start
   # state.  value==None means that the key is an end state (that is,
   # its presence indicates the record is finished).
   _EXPECTED_NEXT_RE = {
       None:               frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]),
       _ADD_SECTION_RE:    frozenset([_REMOVE_SECTION_RE]),
       _REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]),
       _TOTAL_SECTION_RE:  frozenset([_SECTION_END_RE]),
       _SECTION_END_RE:    None,
       _NO_EDITS_RE:       None,
   }

   def __init__(self):
     # This is set to one of the 'section' REs above.  None is the start-state.
     self.current_section = None
     self.filename = '<unknown file>'
     self.lines_by_section = {}     # key is an RE, value is a list of lines

   def _ProcessOneLine(self, line):
     """Reads one line of input, updates self, and returns False at EORecord.

     If the line matches one of the hard-coded section names, updates
     self.filename and self.current_section.  Otherwise, the line is
     taken to be a member of the currently active section, and is added
     to self.lines_by_section.

     Arguments:
       line: one line from the iwyu input file.

     Returns:
       False if the line is the end-of-section marker, True otherwise.

     Raises:
       FixIncludesError: if there is an out-of-order section or
       mismatched filename.
     """
     line = line.rstrip()     # don't worry about line endings
     if not line:             # just ignore blank lines
       return True

     for (section_re, section_name) in self._RE_TO_NAME.items():
       m = section_re.search(line)
       if m:
         # Check or set the filename (if the re has a group, it's for filename).
         if section_re.groups >= 1:
           this_filename = m.group(1)
           if (self.current_section is not None and
               this_filename != self.filename):
             raise FixIncludesError('"%s" section for %s comes after "%s" for %s'
                                    % (section_name, this_filename,
                                       self._RE_TO_NAME[self.current_section],
                                       self.filename))
           self.filename = this_filename

         # Check and set the new section we're entering.
         if section_re not in self._EXPECTED_NEXT_RE[self.current_section]:
           if self.current_section is None:
             raise FixIncludesError('%s: "%s" section unexpectedly comes first'
                                    % (self.filename, section_name))
           else:
             raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"'
                                    % (self.filename, section_name,
                                       self._RE_TO_NAME[self.current_section]))
         self.current_section = section_re
         # We're done parsing this record if this section has nothing after it.
         return self._EXPECTED_NEXT_RE[self.current_section] is not None

     # We're not starting a new section, so just add to the current section.
     # We ignore lines before section-start, they're probably things like
     # compiler messages ("Compiling file foo").
     if self.current_section is not None:
       self.lines_by_section.setdefault(self.current_section, []).append(line)
     return True

   def ParseOneRecord(self, iwyu_output, flags):
     """Given a file object with output from an iwyu run, return per file info.

     For each source file that iwyu_output mentions (because iwyu was run on
     it), we return a structure holding the information in IWYUOutputRecord:
     1) What file these changes apply to
     2) What line numbers hold includes/fwd-declares to remove
     3) What includes/fwd-declares to add
     4) Ordering information for includes and fwd-declares

     Arguments:
       iwyu_output: a File object returning lines from an iwyu run
       flags: commandline flags, as parsed by optparse.  We use
          flags.comments, which controls whether we output comments
          generated by iwyu.
     Returns:
        An IWYUOutputRecord object, or None at EOF.

     Raises:
        FixIncludesError: for malformed-looking lines in the iwyu output.
     """
     for line in iwyu_output:
       if not self._ProcessOneLine(line):   # returns False at end-of-record
         break
     else:                                  # for/else
       return None                          # at EOF

     # Now set up all the fields in an IWYUOutputRecord.
     # IWYUOutputRecord.filename
     retval = IWYUOutputRecord(self.filename)

     # IWYUOutputRecord.lines_to_delete
     for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []):
       m = self._LINE_NUMBERS_COMMENT_RE.search(line)
       if not m:
         raise FixIncludesError('line "%s" (for %s) has no line number'
                                % (line, self.filename))
       # The RE is of the form [start_line, end_line], inclusive.
       for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
         retval.lines_to_delete.add(line_number)

     # IWYUOutputRecord.some_include_lines
     for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
                  self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
       if not _INCLUDE_RE.match(line):
         continue
       m = self._LINE_NUMBERS_COMMENT_RE.search(line)
       if not m:
         continue   # not all #include lines have line numbers, but some do
       for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
         retval.some_include_lines.add(line_number)

     # IWYUOutputRecord.seen_forward_declare_lines
     for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
                  self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
       # Everything that's not an #include is a forward-declare.
       if line.startswith('- '):    # the 'remove' lines all start with '- '.
         line = line[len('- '):]
       if _INCLUDE_RE.match(line):
         continue
       m = self._LINE_NUMBERS_COMMENT_RE.search(line)
       if m:
         retval.seen_forward_declare_lines.add((int(m.group(1)),
                                                int(m.group(2))+1))

     # IWYUOutputRecord.includes_and_forward_declares_to_add
     for line in self.lines_by_section.get(self._ADD_SECTION_RE, []):
       line = _COMMENT_RE.sub('', line)
       retval.includes_and_forward_declares_to_add.add(line)

     # IWYUOutputRecord.full_include_lines
     for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []):
       m = _INCLUDE_RE.match(line)
       if m:
         if not flags.comments:
           line = _COMMENT_RE.sub('', line)  # pretend there were no comments
         else:
           # Just remove '// line XX': that's iwyu metadata, not a real comment
           line = self._LINE_NUMBERS_COMMENT_RE.sub('', line)
         retval.full_include_lines[m.group(1)] = line

     return retval


 class LineInfo(object):
   """Information about a single line of a source file."""

   def __init__(self, line):
     """Initializes the content of the line, but no ancillary fields."""
     # The content of the line in the input file
     self.line = line

     # The 'type' of the line.  The 'type' is one of the regular
     # expression objects in _LINE_TYPES, or None for any line that
     # does not match any regular expression in _LINE_TYPES.
     self.type = None

     # True if no lines processed before this one have the same type
     # as this line.
     self.is_first_line_of_this_type = False

     # Set to true if we want to delete/ignore this line in the output
     # (for instance, because iwyu says to delete this line).  At the
     # start, the only line to delete is the 'dummy' line 0.
     self.deleted = self.line is None

     # If this line is an #include or a forward-declare, gives a
     # [begin,end) pair saying the 'span' this line is part of.  We do
     # this for two types of span: the move span (an #include or
     # forward declare, along with any preceding comments) and the
     # reorder span (a continguous block of move-spans, connected only
     # by blank lines and comments).  For lines that are not an
     # #include or forward-declare, these may have an arbitrary value.
     self.move_span = None
     self.reorder_span = None

     # If this line is an #include or a forward-declare, gives the
     # 'key' of the line.  For #includes it is the filename included,
     # including the ""s or <>s.  For a forward-declare it's the name
     # of the class/struct.  For other types of lines, this is None.
     self.key = None

   def __str__(self):
     if self.deleted:
       line = 'XX-%s-XX' % self.line
     else:
       line = '>>>%s<<<' % self.line
     if self.type is None:
       type_id = None
     else:
       type_id = _LINE_TYPES.index(self.type)
     return ('%s\n  -- type: %s (key: %s).  move_span: %s.  reorder_span: %s'
             % (line, type_id, self.key, self.move_span, self.reorder_span))


 class FileInfo(object):
   """ Details about a file's storage encoding  """
   DEFAULT_LINESEP = os.linesep
   DEFAULT_ENCODING = 'utf-8'

   def __init__(self, linesep, encoding):
     self.linesep = linesep
     self.encoding = encoding

   @staticmethod
   def parse(filename):
     """ Return a FileInfo object describing file encoding details. """
     with open(filename, 'rb') as f:
       content = f.read()

     linesep = FileInfo.guess_linesep(content)
     encoding = FileInfo.guess_encoding(content)
     return FileInfo(linesep, encoding)

   @staticmethod
   def guess_linesep(bytebuf):
     """ Return most frequent line separator of buffer. """
     win = bytebuf.count(b'\r\n')
     unix = bytebuf.count(b'\n') - win
     if win > unix:
       return '\r\n'
     elif unix > win:
       return '\n'

     return FileInfo.DEFAULT_LINESEP

   @staticmethod
   def guess_encoding(bytebuf):
     """ Return approximate encoding for buffer.

     This is heavily heuristic, and will return any supported encoding that can
     describe the file without losing information, not necessarily the *right*
     encoding. This is usually OK, because IWYU typically only adds ASCII content
     (or content pulled from the file itself).
     """
     def try_decode(buf, encoding):
       try:
         buf.decode(encoding, errors='strict')
       except UnicodeError:
         return False
       return True

     # Special-case UTF-8 BOM
     if bytebuf[0:3] == b'\xef\xbb\xbf':
       if try_decode(bytebuf, 'utf-8'):
         return 'utf-8'

     encodings = ['ascii', 'utf-8', 'windows-1250', 'windows-1252']
     for encoding in encodings:
       if try_decode(bytebuf, encoding):
         return encoding

     return FileInfo.DEFAULT_ENCODING


 def _ReadFile(filename, fileinfo):
   """Read from filename and return a list of file lines."""
   try:
     with open(filename, 'rb') as f:
       content = f.read()
       return content.decode(fileinfo.encoding).splitlines()
   except (IOError, OSError) as why:
     print("Skipping '%s': %s" % (filename, why))
   return None


 def _WriteFile(filename, fileinfo, file_lines):
   """Write the given file-lines to the file."""
   try:
     with open(filename, 'wb') as f:
       content = fileinfo.linesep.join(file_lines) + fileinfo.linesep
       content = content.encode(fileinfo.encoding)
       f.write(content)
   except (IOError, OSError) as why:
     print("Error writing '%s': %s" % (filename, why))


 def PrintFileDiff(old_file_contents, new_file_contents):
   """Print a unified diff between files, specified as lists of lines."""
   diff = difflib.unified_diff(old_file_contents, new_file_contents)
   # skip the '--- <filename>/+++ <filename>' lines at the start
   try:
     next(diff)
     next(diff)
     print('\n'.join(l.rstrip() for l in diff))
   except StopIteration:
     pass


 def _MarkHeaderGuardIfPresent(file_lines):
   """If any line in file_lines is a header-guard, mark it in file_lines.

   We define a header-guard as follows: an #ifdef where there is
   nothing contentful before or after the #ifdef.  Also, the #ifdef
   should have no #elif in it (though we don't currently test that).
   This catches the common case of an 'ifdef guard' in .h file, such
   as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it
   can also catch other whole-program #ifdefs, such as
   '#ifdef __linux\n...\n#endif'.  The issue here is that if an #ifdef
   encloses the entire file, then we are willing to put new
   #includes/fwd-declares inside the #ifdef (which normally we
   wouldn't do).  So we want to mark such #ifdefs with a special label.

   If we find such an #ifdef line -- and a single file can have at most
   one -- we change its type to a special type for header guards.

   Arguments:
     file_lines: an array of LineInfo objects with .type filled in.
   """
   # Pass over blank lines, pragmas and comments at the top of the file.
   i = 0
   for i in range(len(file_lines)):
     if (not file_lines[i].deleted and
         file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE,
                                    _PRAGMA_ONCE_LINE_RE]):
       break
   else:     # for/else: got to EOF without finding any non-blank/comment lines
     return

   # This next line is the candidate header guard-line.
   ifdef_start = i
   if file_lines[ifdef_start].type != _IF_RE:
     # Not a header guard, just return without doing anything.
     return

   # Find the end of this ifdef, to see if it's really a header guard..
   ifdef_depth = 0
   for ifdef_end in range(ifdef_start, len(file_lines)):
     if file_lines[ifdef_end].deleted:
       continue
     if file_lines[ifdef_end].type == _IF_RE:
       ifdef_depth += 1
     elif file_lines[ifdef_end].type == _ENDIF_RE:
       ifdef_depth -= 1
       if ifdef_depth == 0:   # The end of our #ifdef!
         break
   else:                      # for/else
     return False             # Weird: never found a close to this #ifdef

   # Finally, all the lines after the end of the ifdef must be blank or comments.
   for i in range(ifdef_end + 1, len(file_lines)):
     if (not file_lines[i].deleted and
         file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
       return

   # We passed the gauntlet!
   file_lines[ifdef_start].type = _HEADER_GUARD_RE

   # And the line after the header guard #ifdef is the '#define' (usually).
   if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line):
     file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE


 def _CalculateLineTypesAndKeys(file_lines, iwyu_record):
   """Fills file_line's type and key fields, where the 'type' is a regexp object.

   We match each line (line_info.line) against every regexp in
   _LINE_TYPES, and assign the first that matches, or None if none
   does.  We also use iwyu_record's some_include_lines and
   seen_forward_declare_lines to identify those lines.  In fact,
   that's the only data source we use for forward-declare lines.

   Sets file_line.type and file_line.is_first_line_of_this_type for
   each file_line in file_lines.

   Arguments:
     file_lines: an array of LineInfo objects with .line fields filled in.
     iwyu_record: the IWYUOutputRecord struct for this source file.

   Raises:
     FixIncludesError: if iwyu_record's line-number information is
       is inconsistent with what we see in the file.  (For instance,
       it says line 12 is an #include, but we say it's a blank line,
       or the file only has 11 lines.)
   """
   seen_types = set()
   in_c_style_comment = False
   for line_info in file_lines:
     if line_info.line is None:
       line_info.type = None
     elif _C_COMMENT_START_RE.match(line_info.line):
       # Note: _C_COMMENT_START_RE only matches a comment at the start
       # of a line.  Comments in the middle of a line are ignored.
       # This can cause problems with multi-line comments that start
       # in the middle of the line, but that's hopefully quite rare.
       # TODO(csilvers): check for that case.
       m = _C_COMMENT_END_RE.match(line_info.line)
       if not m:             # comment continues onto future lines
         line_info.type = _COMMENT_LINE_RE
         in_c_style_comment = True
       elif not m.group(1):  # comment extends across entire line (only)
         line_info.type = _COMMENT_LINE_RE
       else:                 # comment takes only part of line, treat as content
         # TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;'
         line_info.type = None
     elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line):
       line_info.type = _COMMENT_LINE_RE
       in_c_style_comment = False
     elif in_c_style_comment:
       line_info.type = _COMMENT_LINE_RE
     else:
       for type_re in _LINE_TYPES:
         # header-guard-define-re has a two-part decision criterion: it
         # matches the RE, *and* it comes after a header guard line.
         # That's too complex to figure out now, so we skip over it now
         # and fix it up later in _MarkHeaderGuardIfPresent().
         if type_re in (_HEADER_GUARD_DEFINE_RE,):
           continue
         m = type_re.match(line_info.line)
         if m:
           line_info.type = type_re
           if type_re == _INCLUDE_RE:
             line_info.key = m.group(1)   # get the 'key' for the #include.
           break
       else:    # for/else
         line_info.type = None   # means we didn't match any re

     line_info.is_first_line_of_this_type = (line_info.type not in seen_types)
     seen_types.add(line_info.type)

   # Now double-check against iwyu that we got all the #include lines right.
   for line_number in iwyu_record.some_include_lines:
     if file_lines[line_number].type != _INCLUDE_RE:
       raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include'
                              % (iwyu_record.filename, line_number,
                                 file_lines[line_number].line))

   # We depend entirely on the iwyu_record for the forward-declare lines.
   for (start_line, end_line) in iwyu_record.seen_forward_declare_lines:
     for line_number in range(start_line, end_line):
       if line_number >= len(file_lines):
         raise FixIncludesError('iwyu line number %s:%d is past file-end'
                                % (iwyu_record.filename, line_number))
       file_lines[line_number].type = _FORWARD_DECLARE_RE

   # While we're at it, let's do a bit more sanity checking on iwyu_record.
   for line_number in iwyu_record.lines_to_delete:
     if line_number >= len(file_lines):
       raise FixIncludesError('iwyu line number %s:%d is past file-end'
                              % (iwyu_record.filename, line_number))
     elif file_lines[line_number].type not in (_INCLUDE_RE,
                                               _FORWARD_DECLARE_RE):
       raise FixIncludesError('iwyu line number %s:%d (%s) is not'
                              ' an #include or forward declare'
                              % (iwyu_record.filename, line_number,
                                 file_lines[line_number].line))

   # Check if this file has a header guard, which for our purposes is
   # an #ifdef (or #if) that covers an entire source file.  Usually
   # this will be a standard .h header-guard, but it could be something
   # like '#if __linux/#endif'.  The point here is that if an #ifdef
   # encloses the entire file, then we are willing to put new
   # #includes/fwd-declares inside the #ifdef (which normally we
   # wouldn't do).  So we mark such #ifdefs with a special label.
   _MarkHeaderGuardIfPresent(file_lines)


 def _PreviousNondeletedLine(file_lines, line_number):
   """Returns the line number of the previous not-deleted line, or None."""
   for line_number in range(line_number - 1, -1, -1):
     if not file_lines[line_number].deleted:
       return line_number
   return None


 def _NextNondeletedLine(file_lines, line_number):
   """Returns the line number of the next not-deleted line, or None."""
   for line_number in range(line_number + 1, len(file_lines)):
     if not file_lines[line_number].deleted:
       return line_number
   return None


 def _LineNumberStartingPrecedingComments(file_lines, line_number):
   """Returns the line-number for the comment-lines preceding the given linenum.

   Looking at file_lines, look at the lines immediately preceding the
   given line-number.  If they're comment lines, return the first line
   of the comment lines preceding the given line.  Otherwise, return
   the given line number.

   As a special case, if the comments go all the way up to the first
   line of the file (line 1), we assume they're comment lines, which
   are special -- they're not associated with any source code line --
   and we return line_number in that case.

   Arguments:
     file_lines: an array of LineInfo objects, with .type fields filled in.
     line_number: an index into file_lines.

   Returns:
     The first line number of the preceding comments, or line_number
       if there are no preceding comments or they appear to be a
       top-of-file copyright notice.
   """
   retval = line_number
   while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE:
     retval -= 1
   if retval <= 1:          # top-of-line comments
     retval = line_number   # so ignore all the comment lines
   return retval


 def _CalculateMoveSpans(file_lines, forward_declare_spans):
   """Fills each input_line's move_span field.

   A 'move span' is a range of lines (from file_lines) that includes
   an #include or forward-declare, and all the comments preceding it.
   It is the unit we would move if we decided to move (or delete) this
   #include or forward-declare.

   For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span
   is set to the tuple [start_of_span, end_of_span).  All other lines
   have the move span kept at None.

   Arguments:
     file_lines: an array of LineInfo objects, with .type fields filled in.
     forward_declare_spans: a set of line-number pairs
        [start_line, end_line), each representing a single namespace.
        In practice this comes from iwyu_record.seen_forward_declare_lines.
   """
   # First let's do #includes.
   for line_number in range(len(file_lines)):
     if file_lines[line_number].type == _INCLUDE_RE:
       span_begin = _LineNumberStartingPrecedingComments(file_lines, line_number)
       for i in range(span_begin, line_number + 1):
         file_lines[i].move_span = (span_begin, line_number + 1)

   # Now forward-declares.  These spans come as input to this function.
   for (span_begin, span_end) in forward_declare_spans:
     span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin)
     for i in range(span_begin, span_end):
       file_lines[i].move_span = (span_begin, span_end)


 def _ContainsBarrierInclude(file_lines, line_range):
   """Returns true iff some line in [line_range[0], line_range[1]) is BARRIER."""
   for line_number in range(*line_range):
     if (not file_lines[line_number].deleted and
         _BARRIER_INCLUDES.search(file_lines[line_number].line)):
       return True
   return False


 def _LinesAreAllBlank(file_lines, start_line, end_line):
   """Returns true iff all lines in [start_line, end_line) are blank/deleted."""
   for line_number in range(start_line, end_line):
     if (not file_lines[line_number].deleted and
         file_lines[line_number].type != _BLANK_LINE_RE):
       return False
   return True


 def _CalculateReorderSpans(file_lines):
   """Fills each input_line's reorder_span field.

   A 'reorder span' is a range of lines (from file_lines) that only has
   #includes and forward-declares in it (and maybe blank lines, and
   comments associated with #includes or forward-declares).  In
   particular, it does not include any "real code" besides #includes
   and forward-declares: no functions, no static variable assignment,
   no macro #defines, no nothing.  We are willing to reorder #includes
   and namespaces freely inside a reorder span.

   Calculating reorder_span is easy: they're just the union of
   contiguous move-spans (with perhaps blank lines and comments
   thrown in), because move-spans share the 'no actual code'
   requirement.

   There's one exception: if any move-span matches the
   _BARRIER_INCLUDES regexp, it means that we should consider that
   move-span to be a 'barrier': nothing should get reordered from one
   side of that move-span to the other.  (This is used for #includes
   that depend on other #includes being before them to function
   properly.)  We do that by putting them into their own reorder span.

   For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder
   span is set to the tuple [start_of_span, end_of_span).  All other
   lines have an arbitrary value for the reorder span.

   Arguments:
     file_lines: an array of LineInfo objects with .type and .move_span
        fields filled in.
   """
   # Happily, move_spans are disjoint. Just make sure they're sorted and unique.
   move_spans = [s.move_span for s in file_lines if s.move_span is not None]
   sorted_move_spans = sorted(set(move_spans))

   i = 0
   while i < len(sorted_move_spans):
     reorder_span_start = sorted_move_spans[i][0]

     # If we're a 'nosort' include, we're always in a reorder span of
     # our own.  Otherwise, add in the next move span if we're
     # connected to it only by blank lines.
     if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]):
       while i < len(sorted_move_spans) - 1:
         move_span_end = sorted_move_spans[i][1]
         next_move_span_start = sorted_move_spans[i+1][0]
         if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start)
             and not _ContainsBarrierInclude(file_lines, sorted_move_spans[i+1])):
           i += 1
         else:
           break
     reorder_span_end = sorted_move_spans[i][1]
     # We'll map every line in the span to the span-extent.
     for line_number in range(reorder_span_start, reorder_span_end):
       file_lines[line_number].reorder_span = (reorder_span_start,
                                               reorder_span_end)
     i += 1


 def ParseOneFile(f, iwyu_record):
   """Given a file object, read and classify the lines of the file.

   For each file that iwyu_output mentions, we return a list of LineInfo
   objects, which is a parsed version of each line, including not only
   its content but its 'type', its 'key', etc.

   Arguments:
     f: an iterable object returning lines from a file.
     iwyu_record: the IWYUOutputRecord struct for this source file.

   Returns:
      An array of LineInfo objects.  The first element is always a dummy
      element, so the first line of the file is at retval[1], matching
      the way iwyu counts line numbers.
   """
   file_lines = [LineInfo(None)]
   for line in f:
     file_lines.append(LineInfo(line))
   _CalculateLineTypesAndKeys(file_lines, iwyu_record)
   _CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines)
   _CalculateReorderSpans(file_lines)
   return file_lines


 def _DeleteEmptyNamespaces(file_lines):
   """Delete namespaces with nothing in them.

   Empty namespaces could be caused by transformations that removed
   forward-declarations:
         namespace foo {
         class Myclass;
         }
      ->
         namespace foo {
         }
   We want to get rid of the 'empty' namespace in this case.

   This routine 'deletes' lines by setting their 'deleted' field to True.

   Arguments:
     file_lines: an array of LineInfo objects with .type fields filled in.

   Returns:
     The number of namespaces deleted.
   """
   num_namespaces_deleted = 0
   start_line = 0
   while start_line < len(file_lines):
     line_info = file_lines[start_line]
     if line_info.deleted or line_info.type != _NAMESPACE_START_RE:
       start_line += 1
       continue
     # Because multiple namespaces can be on one line
     # ("namespace foo { namespace bar { ..."), we need to count.
     # We use the max because line may have 0 '{'s if it's a macro.
     # TODO(csilvers): ignore { in comments.
     namespace_depth = max(line_info.line.count('{'), 1)
     end_line = start_line + 1
     while end_line < len(file_lines):
       line_info = file_lines[end_line]
       if line_info.deleted:
         end_line += 1
       elif line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE):
         end_line += 1                # ignore blank lines
       elif line_info.type == _NAMESPACE_START_RE:     # nested namespace
         namespace_depth += max(line_info.line.count('{'), 1)
         end_line += 1
       elif line_info.type == _NAMESPACE_END_RE:
         namespace_depth -= max(line_info.line.count('}'), 1)
         end_line += 1
         if namespace_depth <= 0:
           # Delete any comments preceding this namespace as well.
           start_line = _LineNumberStartingPrecedingComments(file_lines,
                                                             start_line)
           # And also blank lines.
           while (start_line > 0 and
                  file_lines[start_line-1].type == _BLANK_LINE_RE):
             start_line -= 1
           for line_number in range(start_line, end_line):
             file_lines[line_number].deleted = True
           num_namespaces_deleted += 1
           break
       else:   # bail: we're at a line indicating this isn't an empty namespace
         end_line = start_line + 1  # rewind to try again with nested namespaces
         break
     start_line = end_line

   return num_namespaces_deleted


 def _DeleteEmptyIfdefs(file_lines):
   """Deletes ifdefs with nothing in them.

   This could be caused by transformations that removed #includes:
         #ifdef OS_WINDOWS
         # include <windows.h>
         #endif
      ->
         #ifdef OS_WINDOWS
         #endif
   We want to get rid of the 'empty' #ifdef in this case.
   We also handle 'empty' #ifdefs with #else, if both sides of
   the #else are empty.  We also handle #ifndef and #if.

   This routine 'deletes' lines by replacing their content with None.

   Arguments:
     file_lines: an array of LineInfo objects with .type fields filled in.

   Returns:
     The number of ifdefs deleted.
   """
   num_ifdefs_deleted = 0
   start_line = 0
   while start_line < len(file_lines):
     if file_lines[start_line].type not in (_IF_RE, _HEADER_GUARD_RE):
       start_line += 1
       continue
     end_line = start_line + 1
     while end_line < len(file_lines):
       line_info = file_lines[end_line]
       if line_info.deleted:
         end_line += 1
       elif line_info.type in (_ELSE_RE, _COMMENT_LINE_RE, _BLANK_LINE_RE):
         end_line += 1                # ignore blank lines
       elif line_info.type == _ENDIF_RE:
         end_line += 1
         # Delete any comments preceding this #ifdef as well.
         start_line = _LineNumberStartingPrecedingComments(file_lines,
                                                           start_line)
         # And also blank lines.
         while (start_line > 0 and
                file_lines[start_line-1].type == _BLANK_LINE_RE):
           start_line -= 1
         for line_number in range(start_line, end_line):
           file_lines[line_number].deleted = True
         num_ifdefs_deleted += 1
         break
       else:   # bail: we're at a line indicating this isn't an empty ifdef
         end_line = start_line + 1  # rewind to try again with nested #ifdefs
         break
     start_line = end_line

   return num_ifdefs_deleted


 def _DeleteDuplicateLines(file_lines, line_ranges):
   """Goes through all lines in line_ranges, and if any are dups, deletes them.

   For all lines in line_ranges, if any is the same as a previously
   seen line, set its deleted bit to True.  The purpose of line_ranges
   is to avoid lines in #ifdefs and namespaces, that may be identical
   syntactically but have different semantics.  Ideally, line_ranges
   should include only 'top-level' lines.

   We ignore lines that consist only of comments (or are blank).  We
   ignore end-of-line comments when comparing lines for equality.
   NOTE: Because our comment-finding RE is primitive, it's best if
   line_ranges covers only #include and forward-declare lines.  In
   particular, it should not cover lines that may have C literal
   strings in them.

   Arguments:
     file_lines: an array of LineInfo objects.
     line_ranges: a list of [start_line, end_line) pairs.
   """
   seen_lines = set()
   for line_range in line_ranges:
     for line_number in range(*line_range):
       if file_lines[line_number].type in (_BLANK_LINE_RE, _COMMENT_LINE_RE):
         continue
       if _IWYU_PRAGMA_KEEP_RE.search(file_lines[line_number].line):
         continue
       uncommented_line = _COMMENT_RE.sub('', file_lines[line_number].line)
       if uncommented_line in seen_lines:
         file_lines[line_number].deleted = True
       elif not file_lines[line_number].deleted:
         seen_lines.add(uncommented_line)


 def _DeleteExtraneousBlankLines(file_lines, line_range):
   """Deletes extraneous blank lines caused by line deletion.

   Here's a example file:
      class Foo { ... };

      class Bar;

      class Baz { ... }

   If we delete the "class Bar;" line, we also want to delete one of
   the blank lines around it, otherwise we leave two blank lines
   between Foo and Baz which looks bad.  The idea is that if we have
   whitespace on both sides of a deleted span of code, the whitespace
   on one of the sides is 'extraneous'.  In this case, we should delete
   not only 'class Bar;' but also the whitespace line below it.  That
   leaves one blank line between Foo and Bar, like people would expect.

   We're careful to only delete the minimum of the number of blank
   lines that show up on either side.  If 'class Bar' had one blank
   line before it, and one hundred after it, we'd only delete one blank
   line when we delete 'class Bar'.  This matches user's expecatations.

   The situation can get tricky when two deleted spans touch (we might
   think it's safe to delete the whitespace between them when it's
   not).  To be safe, we only do this check when an entire reorder-span
   has been deleted.  So we check the given line_range, and only do
   blank-line deletion if every line in the range is deleted.

   Arguments:
     file_lines: an array of LineInfo objects, with .type filled in.
     line_range: a range [start_line, end_line).  It should correspond
        to a reorder-span.
   """
   # First make sure the entire span is deleted.
   for line_number in range(*line_range):
     if not file_lines[line_number].deleted:
       return

   before_line = _PreviousNondeletedLine(file_lines, line_range[0])
   after_line = _NextNondeletedLine(file_lines, line_range[1] - 1)
   while (before_line and file_lines[before_line].type == _BLANK_LINE_RE and
          after_line and file_lines[after_line].type == _BLANK_LINE_RE):
     # OK, we've got whitespace on both sides of a deleted span.  We
     # only want to keep whitespace on one side, so delete on the other.
     file_lines[after_line].deleted = True
     before_line = _PreviousNondeletedLine(file_lines, before_line)
     after_line = _NextNondeletedLine(file_lines, after_line)


 def _ShouldInsertBlankLine(decorated_move_span, next_decorated_move_span,
                            file_lines, flags):
   """Returns true iff we should insert a blank line between the two spans.

   Given two decorated move-spans, of the form
      (reorder_range, kind, noncomment_lines, all_lines)
   returns true if we should insert a blank line between them.  We
   always put a blank line when transitioning from an #include to a
   forward-declare and back.  When the appropriate commandline flag is
   set, we also put a blank line between the 'main' includes (foo.h)
   and the C/C++ system includes, and another between the system
   includes and the rest of the Google includes.

   If the two move spans are in different reorder_ranges, that means
   the first move_span is at the end of a reorder range.  In that case,
   a different rule for blank lines applies: if the next line is
   contentful (eg 'static int x = 5;'), or a namespace start, we want
   to insert a blank line to separate the move-span from the next
   block.  When figuring out if the next line is contentful, we skip
   over comments.

   Arguments:
     decorated_move_span: a decorated_move_span we may want to put a blank
        line after.
     next_decorated_move_span: the next decorated_move_span, which may
        be a sentinel decorated_move_span at end-of-file.
     file_lines: an array of LineInfo objects with .deleted filled in.
     flags: commandline flags, as parsed by optparse.  We use
        flags.blank_lines, which controls whether we put blank
        lines between different 'kinds' of #includes.

   Returns:
      true if we should insert a blank line after decorated_move_span.
   """
   # First handle the 'at the end of a reorder range' case.
   if decorated_move_span[0] != next_decorated_move_span[0]:
     next_line = _NextNondeletedLine(file_lines, decorated_move_span[0][1] - 1)
     # Skip over comments to figure out if the next line is contentful.
     while (next_line and next_line < len(file_lines) and
            file_lines[next_line].type == _COMMENT_LINE_RE):
       next_line += 1
     return (next_line and next_line < len(file_lines) and
             file_lines[next_line].type in (_NAMESPACE_START_RE, None))

   # We never insert a blank line between two spans of the same kind.
   # Nor do we ever insert a blank line at EOF.
   (this_kind, next_kind) = (decorated_move_span[1], next_decorated_move_span[1])
   if this_kind == next_kind or next_kind == _EOF_KIND:
     return False

   # Unless explicitly requested, we do not insert a blank line between C
   # and C++-style #includes.
   if (not flags.blank_line_between_c_and_cxx_includes and
       this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
       next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]):
     return False

   # Handle the case we're going from an include to fwd declare or
   # back.  If we get here, we can't both be fwd-declares, so it
   # suffices to check if either of us is.
   if this_kind == _FORWARD_DECLARE_KIND or next_kind == _FORWARD_DECLARE_KIND:
     return True

   # Now, depending on the flag, we insert a blank line whenever the
   # kind changes (we handled the one case where a changing kind
   # doesn't introduce a blank line, above).
   if flags.blank_lines:
     return this_kind != next_kind

   return False


 def _GetToplevelReorderSpans(file_lines):
   """Returns a sorted list of all reorder_spans not inside an #ifdef/namespace.

   This routine looks at all the reorder_spans in file_lines, ignores
   reorder spans inside #ifdefs and namespaces -- except for the 'header
   guard' ifdef that encapsulates an entire .h file -- and returns the
   rest in sorted order.

   Arguments:
     file_lines: an array of LineInfo objects with .type and
        .reorder_span filled in.

   Returns:
     A list of [start_line, end_line) reorder_spans.
   """
   in_ifdef = [False] * len(file_lines)   # lines inside an #if
   ifdef_depth = 0
   for line_number in range(len(file_lines)):
     line_info = file_lines[line_number]
     if line_info.deleted:
       continue
     if line_info.type == _IF_RE:  # does not cover the header-guard ifdef
       ifdef_depth += 1
     elif line_info.type == _ENDIF_RE:
       ifdef_depth -= 1
     if ifdef_depth > 0:
       in_ifdef[line_number] = True

   # Figuring out whether a } ends a namespace or some other languague
   # construct is hard, so as soon as we see any 'contentful' line
   # inside a namespace, we assume the entire rest of the file is in
   # the namespace.
   in_namespace = [False] * len(file_lines)
   namespace_depth = 0
   for line_number in range(len(file_lines)):
     line_info = file_lines[line_number]
     if line_info.deleted:
       continue
     if line_info.type == _NAMESPACE_START_RE:
       # The 'max' is because the namespace-re may be a macro.
       namespace_depth += max(line_info.line.count('{'), 1)
     elif line_info.type == _NAMESPACE_END_RE:
       namespace_depth -= max(line_info.line.count('}'), 1)
     if namespace_depth > 0:
       in_namespace[line_number] = True
       if line_info.type is None:
         for i in range(line_number, len(file_lines)):  # rest of file
           in_namespace[i] = True
         break

   reorder_spans = [fl.reorder_span for fl in file_lines if fl.reorder_span]
   reorder_spans = sorted(set(reorder_spans))
   good_reorder_spans = []
   for reorder_span in reorder_spans:
     for line_number in range(*reorder_span):
       if in_ifdef[line_number] or in_namespace[line_number]:
         break
     else:   # for/else
       good_reorder_spans.append(reorder_span)    # never in ifdef or namespace

   return good_reorder_spans


 def _GetFirstNamespaceLevelReorderSpan(file_lines):
   """Returns the first reorder-span inside a namespace, if it's easy to do.

   This routine is meant to handle the simple case where code consists
   of includes and forward-declares, and then a 'namespace
   my_namespace'.  We return the reorder span of the inside-namespace
   forward-declares, which is a good place to insert new
   inside-namespace forward-declares (rather than putting these new
   forward-declares at the top level).

   So it goes through the top of the file, stopping at the first
   'contentful' line.  If that line has the form 'namespace <foo> {',
   it then continues until it finds a forward-declare line, or a
   non-namespace contentful line.  In the former case, it figures out
   the reorder-span this forward-declare line is part of, while in the
   latter case it creates a new reorder-span.  It returns
   (enclosing_namespaces, reorder_span).

   Arguments:
     file_lines: an array of LineInfo objects with .type and
        .reorder_span filled in.

   Returns:
     (None, None) if we could not find a first namespace-level
     reorder-span, or (enclosing_namespaces, reorder_span), where
     enclosing_namespaces is a string that looks like (for instance)
     'namespace ns1 { namespace ns2 {', and reorder-span is a
     [start_line, end_line) pair.
   """
   simple_namespace_re = re.compile(r'^\s*namespace\s+([^{\s]+)\s*\{\s*(//.*)?$')
   namespace_prefix = ''

   for line_number in range(len(file_lines)):
     line_info = file_lines[line_number]

     if line_info.deleted:
       continue

     # If we're an empty line, just ignore us.  Likewise with #include
     # lines, which aren't 'contentful' for our purposes, and the
     # header guard, which is (by definition) the only kind of #ifdef
     # that we can be inside and still considered at the "top level".
     if line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE, _INCLUDE_RE,
                           _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
                           _PRAGMA_ONCE_LINE_RE):
       continue

     # If we're a 'contentful' line such as a (non-header-guard) #ifdef, bail.
     elif line_info.type in (_IF_RE, _NAMESPACE_END_RE, _ELSE_RE, _ENDIF_RE,
                             None):    # None is a 'normal' contentful line
       # TODO(csilvers): we could probably keep going if there are no
       # braces on the line.  We could also keep track of our #ifdef
       # depth instead of bailing on #else and #endif, and only accept
       # the fwd-decl-inside-namespace if it's at ifdef-depth 0.
       break

     elif line_info.type == _NAMESPACE_START_RE:
       # Only handle the simple case of 'namespace <foo> {'
       m = simple_namespace_re.match(line_info.line)
       if not m:
         break
       namespace_prefix += ('namespace %s { ' % m.group(1).strip())

     elif line_info.type == _FORWARD_DECLARE_RE:
       # If we're not in a namespace, keep going.  Otherwise, this is
       # just the situation we're looking for!
       if namespace_prefix:
         return (namespace_prefix, line_info.reorder_span)

     else:
       # We should have handled all the cases above!
       assert False, ('unknown line-info type',
                      _LINE_TYPES.index(line_info.type))

   # We stopped because we hit a contentful line (or, possibly, a
   # weird-looking namespace).  If we're inside the first-namespace,
   # return this position as a good place to insert forward-declares.
   if namespace_prefix:
     return (namespace_prefix, (line_number, line_number))
   return (None, None)


 # These are potential 'kind' arguments to _FirstReorderSpanWith.
 # We also sort our output in this order, to the extent possible.
 _MAIN_CU_INCLUDE_KIND = 1         # e.g. #include "foo.h" when editing foo.cc
 _C_SYSTEM_INCLUDE_KIND = 2        # e.g. #include <stdio.h>
 _CXX_SYSTEM_INCLUDE_KIND = 3      # e.g. #include <vector>
 _NONSYSTEM_INCLUDE_KIND = 4       # e.g. #include "bar.h"
 _PROJECT_INCLUDE_KIND = 5         # e.g. #include "myproject/quux.h"
 _FORWARD_DECLARE_KIND = 6         # e.g. class Baz;
 _EOF_KIND = 7                     # used at eof


 def _IsSystemInclude(line_info):
   """Given a line-info, return true iff the line is a <>-style #include."""
   # The key for #includes includes the <> or "", so this is easy. :-)
   return line_info.type == _INCLUDE_RE and line_info.key[0] == '<'

 def _IsThirdpartyInclude(line_info, thirdparty_include_dirs):
   if line_info.type != _INCLUDE_RE:
     return False
   inc_path = line_info.key[1:-1]
   return any(os.path.exists(os.path.join(inc_dir, inc_path))
              for inc_dir in thirdparty_include_dirs)

 def _IsMainCUInclude(line_info, filename):
   """Given a line-info, return true iff the line is a 'main-CU' #include line.

   A 'main-CU' #include line is one that is related to the file being edited.
   For instance, if we are editing foo.cc, foo.h is a main-CU #include, as
   is foo-inl.h.  The same holds if we are editing foo_test.cc.

   The algorithm is like so: first, canonicalize the includee by removing the
   following suffixes:
      -inl.h  .h

   Then canonicalize the includer by removing file extension and then the
   following suffixes:
      _unittest  _regtest  _test

   Rule 1: If the canonical names (filenames after removal) match --
   including all directories -- the .h file is a main-cu #include.

   Rule 2: If the basenames of the canonnical names match -- that is,
   ignoring all directories -- the .h file is a main-cu #include *if*
   it is the first #include seen.

   Arguments:
     line_info: a LineInfo structure with .type,
        .is_first_line_of_this_type, and .key filled in.
     filename: the name of the file being edited.

   Returns:
     True if line_info is an #include of a main_CU file, False else.
   """
   if line_info.type != _INCLUDE_RE or _IsSystemInclude(line_info):
     return False
   # First, normalize the includee by getting rid of -inl.h and .h
   # suffixes (for the #include) and the "'s around the #include line.
   canonical_include = re.sub(r'(-inl\.h|\.h|\.H)$',
                              '', line_info.key.replace('"', ''))
   # Then normalize includer by stripping extension and Google's test suffixes.
   canonical_file, _ = os.path.splitext(filename)
   canonical_file = re.sub(_TEST_INFIX_RE, '', canonical_file)
   # .h files in /public/ match .cc files in /internal/.
   canonical_include2 = re.sub(r'/public/', '/internal/', canonical_include)

   # Rule 1:
   if canonical_file in (canonical_include, canonical_include2):
     return True
   # Rule 2:
   if (line_info.is_first_line_of_this_type and
       os.path.basename(canonical_file) == os.path.basename(canonical_include)):
     return True

   return False


 def _GetPathRoot(path):
   """ Return the root of a path, i.e. the first path component.
   We allow / as an alternative path separator on Windows because it helps with
   testing and forward slashes are common even on Windows in portable codebases.
   """
   first_sep = path.find(os.path.sep)
   if os.path.sep != '/' and first_sep == -1:
     first_sep = path.find('/')

   if first_sep == -1:
     return None

   return path[0:first_sep]


 def _IsSameProject(line_info, edited_file, project):
   """Return true if included file and edited file are in the same project.

   An included_file is in project 'project' if the project is a prefix of the
   included_file.  'project' should end with /.

   As a special case, if project is '<tld>', then the project is defined to
   be the top-level directory of edited_file.

   Arguments:
     line_info: a LineInfo structure with .key containing the file that is
       being included.
     edited_file: the name of the file being edited.
     project: if '<tld>', set the project path to be the top-level directory
       name of the file being edited.  If not '<tld>', this value is used to
       specify the project directory.

   Returns:
     True if line_info and filename belong in the same project, False otherwise.
   """
   included_file = line_info.key[1:]
   if project != '<tld>':
     return included_file.startswith(project)
   included_root = _GetPathRoot(included_file)
   edited_root = _GetPathRoot(edited_file)
   return (included_root and edited_root and included_root == edited_root)


 def _GetLineKind(file_line, filename, flags):
   """Given a file_line + file being edited, return best *_KIND value or None.

   Arguments:
     file_line: the LineInfo structure to be analyzed
     filename: the file which contains the line to be analyzed
     flags: the program flags. Uses 'separate_project_includes' and 'thirdparty_include_dirs'
   """
   if flags.source_root:
     filename = os.path.relpath(filename, flags.source_root)
   line_without_coments = _COMMENT_RE.sub('', file_line.line)
   if file_line.deleted:
     return None
   elif _IsMainCUInclude(file_line, filename):
     return _MAIN_CU_INCLUDE_KIND
   elif _IsSystemInclude(file_line) and \
          not _IsThirdpartyInclude(file_line, flags.thirdparty_include_dirs):
     if '.' in line_without_coments:
       # e.g. <string.h>
       return _C_SYSTEM_INCLUDE_KIND
     else:
       # e.g. <string>
       return _CXX_SYSTEM_INCLUDE_KIND
   elif file_line.type == _INCLUDE_RE:
     if (flags.separate_project_includes and
         _IsSameProject(file_line, filename, flags.separate_project_includes)):
       return _PROJECT_INCLUDE_KIND
     return _NONSYSTEM_INCLUDE_KIND
   elif file_line.type == _FORWARD_DECLARE_RE:
     return _FORWARD_DECLARE_KIND
   else:
     return None


 def _FirstReorderSpanWith(file_lines, good_reorder_spans, kind, filename,
                           flags):
   """Returns [start_line,end_line) of 1st reorder_span with a line of kind kind.

   This function iterates over all the reorder_spans in file_lines, and
   calculates the first one that has a line of the given kind in it.
   If no such reorder span is found, it takes the last span of 'lower'
   kinds (main-cu kind is lowest, forward-declare is highest).  If no
   such reorder span is found, it takes the first span of 'higher'
   kind, but not considering the forward-declare kind (we don't want to
   put an #include with the first forward-declare, because it may be
   inside a class or something weird).  If there's *still* no match, we
   return the first line past leading comments, whitespace, and #ifdef
   guard lines.  If there's *still* no match, we just insert at
   end-of-file.

   As a special case, we never return a span for forward-declares that is
   after 'contentful' code, even if other forward-declares are there.
   For instance:
      using Foo::Bar;
      class Bang;
   We want to make sure to put 'namespace Foo { class Bar; }'
   *before* the using line!

   kind is one of the following enums, with examples:
      _MAIN_CU_INCLUDE_KIND:    #include "foo.h" when editing foo.cc
      _C_SYSTEM_INCLUDE_KIND:   #include <stdio.h>
      _CXX_SYSTEM_INCLUDE_KIND: #include <vector>
      _NONSYSTEM_INCLUDE_KIND:  #include "bar.h"
      _PROJECT_INCLUDE_KIND:    #include "myproject/quux.h"
      _FORWARD_DECLARE_KIND:    class Baz;

   Arguments:
     file_lines: an array of LineInfo objects with .type and
        .reorder_span filled in.
     good_reorder_spans: a sorted list of reorder_spans to consider
        (should not include reorder_spans inside #ifdefs or
        namespaces).
     kind: one of *_KIND values.
     filename: the name of the file that file_lines comes from.
        This is passed to _GetLineKind (are we a main-CU #include?)
     flags: commandline flags, as parsed by optparse.  We use
        flags.separate_project_includes to sort the #includes for the
        current project separately from other #includes.

   Returns:
     A pair of line numbers, [start_line, end_line), that is the 'best'
     reorder_span in file_lines for the given kind.
   """
   assert kind in (_MAIN_CU_INCLUDE_KIND, _C_SYSTEM_INCLUDE_KIND,
                   _CXX_SYSTEM_INCLUDE_KIND, _NONSYSTEM_INCLUDE_KIND,
                   _PROJECT_INCLUDE_KIND, _FORWARD_DECLARE_KIND), kind
   # Figure out where the first 'contentful' line is (after the first
   # 'good' span, so we skip past header guards and the like).  Basically,
   # the first contentful line is a line not in any reorder span.
   for i in range(len(good_reorder_spans) - 1):
     if good_reorder_spans[i][1] != good_reorder_spans[i+1][0]:
       first_contentful_line = good_reorder_spans[i][1]
       break
   else:     # got to the end of the file without finding a break in the spans
     if good_reorder_spans:
       first_contentful_line = good_reorder_spans[-1][1]
     else:
       first_contentful_line = 0

   # Let's just find the first and last span for each kind.
   first_reorder_spans = {}
   last_reorder_spans = {}
   for reorder_span in good_reorder_spans:
     for line_number in range(*reorder_span):
       line_kind = _GetLineKind(file_lines[line_number], filename, flags)

       # Ignore forward-declares that come after 'contentful' code; we
       # never want to insert new forward-declares there.
       if (line_kind == _FORWARD_DECLARE_KIND and
           line_number > first_contentful_line):
         continue
       if line_kind is not None:
         first_reorder_spans.setdefault(line_kind, reorder_span)
         last_reorder_spans[line_kind] = reorder_span

   # Find the first span of our kind.
   if kind in first_reorder_spans:
     return first_reorder_spans[kind]

   # Second choice: last span of the kinds above us:
   for backup_kind in range(kind - 1, _MAIN_CU_INCLUDE_KIND - 1, -1):
     if backup_kind in last_reorder_spans:
       return last_reorder_spans[backup_kind]

   # Third choice: first span of the kinds below us, but not counting
   # _FORWARD_DECLARE_KIND.
   for backup_kind in range(kind + 1, _FORWARD_DECLARE_KIND):
     if backup_kind in first_reorder_spans:
       return first_reorder_spans[backup_kind]

   # There are no reorder-spans at all, or they are only
   # _FORWARD_DECLARE spans.  Return the first line past the leading
   # comments, whitespace, and #ifdef guard lines, or the beginning
   # of the _FORWARD_DECLARE span, whichever is smaller.
   line_number = 0
   seen_header_guard = False
   while line_number < len(file_lines):
     if file_lines[line_number].deleted:
       line_number += 1
     elif file_lines[line_number].type == _HEADER_GUARD_RE:
       seen_header_guard = True
       line_number += 2    # skip over the header guard
     elif file_lines[line_number].type == _BLANK_LINE_RE:
       line_number += 1
     elif file_lines[line_number].type == _PRAGMA_ONCE_LINE_RE:
       seen_header_guard = True
       line_number += 1
     elif (file_lines[line_number].type == _COMMENT_LINE_RE
           and not seen_header_guard):
       # We put #includes after top-of-file comments.  But comments
       # inside the header guard are no longer top-of-file comments;
       # #includes go before them.
       line_number += 1
     else:
       # If the "first line" we would return is inside the forward-declare
       # reorder span, just return that span, rather than creating a new
       # span inside the existing one.
       if first_reorder_spans:
         assert list(first_reorder_spans.keys()) == [_FORWARD_DECLARE_KIND], \
             first_reorder_spans
         if line_number >= first_reorder_spans[_FORWARD_DECLARE_KIND][0]:
           return first_reorder_spans[_FORWARD_DECLARE_KIND]
       return (line_number, line_number)

   # OK, I guess just insert at the end of the file
   return (len(file_lines), len(file_lines))


 def _RemoveNamespacePrefix(fwd_decl_iwyu_line, namespace_prefix):
   """Return a version of the input line with namespace_prefix removed, or None.

   If fwd_decl_iwyu_line is
      namespace ns1 { namespace ns2 { namespace ns3 { foo } } }
   and namespace_prefix = 'namespace ns1 { namespace ns2 {', then
   this function returns 'namespace ns3 { foo }'.  It removes the
   namespace_prefix, and any } }'s at the end of the line.  If line
   does not fit this form, then this function returns None.

   Arguments:
     line: a line from iwyu about a forward-declare line to add
     namespace_prefix: a non-empty string of the form
       namespace <ns1> { namespace <ns2> { [...]

   Returns:
     A version of the input line with the namespaces in namespace
     prefix removed, or None if this is not possible because the input
     line is not of the right form.
   """
   assert namespace_prefix, "_RemoveNamespaces requires a non-empty prefix"
   if not fwd_decl_iwyu_line.startswith(namespace_prefix):
     return None

   # Remove the prefix
   fwd_decl_iwyu_line = fwd_decl_iwyu_line[len(namespace_prefix):].lstrip()

   # Remove the matching trailing }'s, preserving comments.
   num_braces = namespace_prefix.count('{')
   ending_braces_re = re.compile(r'(\s*\}){%d}\s*$' % num_braces)
   m = ending_braces_re.search(fwd_decl_iwyu_line)
   if not m:
     return None
   fwd_decl_iwyu_line = fwd_decl_iwyu_line[:m.start(0)]

   return fwd_decl_iwyu_line


 def _DecoratedMoveSpanLines(iwyu_record, file_lines, move_span_lines, flags):
   """Given a span of lines from file_lines, returns a "decorated" result.

   First, we construct the actual contents of the move-span, as a list
   of strings (one per line).  If we see an #include in the move_span,
   we replace its comments with the ones in iwyu_record, if present
   (iwyu_record will never have any comments if flags.comments is
   False).

   Second, we construct a string, of the 'contentful' part of the
   move_span -- that is, without the leading comments -- with
   whitespace removed, and a few other changes made.  This is used for
   sorting (we remove whitespace so '# include <foo>' compares properly
   against '#include <bar>').

   Third, we figure out the 'kind' of this span: system include,
   main-cu include, etc.

   We return all of these together in a tuple, along with the
   reorder-span this move span is inside.  We pick the best
   reorder-span if one isn't already present (because it's an
   #include we're adding in, for instance.)  This allows us to sort
   all the moveable content.

   Arguments:
     iwyu_record: the IWYUOutputRecord struct for this source file.
     file_lines: a list of LineInfo objects holding the parsed output of
       the file in iwyu_record.filename
     move_span_lines: A list of LineInfo objects.  For #includes and
       forward-declares already in the file, this will be a sub-list
       of file_lines.  For #includes and forward-declares we're adding
       in, it will be a newly created list.
     flags: commandline flags, as parsed by optparse.  We use
       flags.separate_project_includes to sort the #includes for the
       current project separately from other #includes.

   Returns:
     A tuple (reorder_span, kind, sort_key, all_lines_as_list)
     sort_key is the 'contentful' part of the move_span, which whitespace
       removed, and -inl.h changed to _inl.h (so it sorts later).
     all_lines_as_list is a list of strings, not of LineInfo objects.
     Returns None if the move-span has been deleted, or for some other
       reason lacks an #include or forward-declare line.
   """
   # Get to the first contentful line.
   for i in range(len(move_span_lines)):
     if (not move_span_lines[i].deleted and
         move_span_lines[i].type in (_INCLUDE_RE, _FORWARD_DECLARE_RE)):
       first_contentful_line = i
       break
   else:       # for/else
     # No include or forward-declare line seen, must be a deleted span.
     return None

   firstline = move_span_lines[first_contentful_line]
   m = _INCLUDE_RE.match(firstline.line)
   if m:
     # If we're an #include, the contentful lines are easy.  But we have
     # to do the comment-replacing first.
     sort_key = firstline.line
     iwyu_version = iwyu_record.full_include_lines.get(m.group(1), '')
     if _COMMENT_LINE_RE.search(iwyu_version):  # the iwyu version has comments
       sort_key = iwyu_version                  # replace the comments
     all_lines = ([li.line for li in move_span_lines[:-1] if not li.deleted] +
                  [sort_key])
   else:
     # We're a forward-declare.  Also easy.
     contentful_list = [li.line for li in move_span_lines[first_contentful_line:]
                        if not li.deleted]
     sort_key = ''.join(contentful_list)
     all_lines = [li.line for li in move_span_lines if not li.deleted]

   # Get rid of whitespace in the contentful_lines
   sort_key = re.sub(r'\s+', '', sort_key)
   # Replace -inl.h with _inl.h so foo-inl.h sorts after foo.h in #includes.
   sort_key = sort_key.replace('-inl.h', '_inl.h')

   # Next figure out the kind.
   kind = _GetLineKind(firstline, iwyu_record.filename, flags)

   # All we're left to do is the reorder-span we're in.  Hopefully it's easy.
   reorder_span = firstline.reorder_span
   if reorder_span is None:     # must be a new #include we're adding
     # If we're a forward-declare inside a namespace, see if there's a
     # reorder span inside the same namespace we can fit into.
     if kind == _FORWARD_DECLARE_KIND:
       (namespace_prefix, possible_reorder_span) = \
           _GetFirstNamespaceLevelReorderSpan(file_lines)
       if (namespace_prefix and possible_reorder_span and
           firstline.line.startswith(namespace_prefix)):
         # Great, we can go into this reorder_span.  We also need to
         # modify all-lines because this line doesn't need the
         # namespace prefix anymore.  Make sure we can do that before
         # succeeding.
         new_firstline = _RemoveNamespacePrefix(firstline.line, namespace_prefix)
         if new_firstline:
           assert all_lines[first_contentful_line] == firstline.line
           all_lines[first_contentful_line] = new_firstline
           reorder_span = possible_reorder_span

     # If that didn't work out, find a top-level reorder span to go into.
     if reorder_span is None:
       # TODO(csilvers): could make this more efficient by storing, per-kind.
       toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines)
       reorder_span = _FirstReorderSpanWith(file_lines, toplevel_reorder_spans,
                                            kind, iwyu_record.filename, flags)

   return (reorder_span, kind, sort_key, all_lines)


 def _CommonPrefixLength(a, b):
   """Given two lists, returns the index of 1st element not common to both."""
   end = min(len(a), len(b))
   for i in range(end):
     if a[i] != b[i]:
       return i
   return end


 def _NormalizeNamespaceForwardDeclareLines(lines):
   """'Normalize' namespace lines in a list of output lines and return new list.

   When suggesting new forward-declares to insert, iwyu uses the following
   format, putting each class on its own line with all namespaces:
      namespace foo { namespace bar { class A; } }
      namespace foo { namespace bar { class B; } }
      namespace foo { namespace bang { class C; } }
   We convert this to 'normalized' form, which puts namespaces on their
   own line and collects classes together:
      namespace foo {
      namespace bar {
      class A;
      class B;
      }  // namespace bar
      namespace bang {
      class C;
      }  // namespace bang
      }  // namespace foo

   Non-namespace lines are left alone.  Only adjacent namespace lines
   from the input are merged.

   Arguments:
     lines: a list of output-lines -- that is, lines that are ready to
        be emitted as-is to the output file.

   Returns:
     A new version of lines, with namespace lines normalized as above.
   """
   # iwyu input is very regular, which is nice.
   iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ')
   iwyu_classname_re = re.compile(r'{ ([^{}]*) }')

   retval = []
   current_namespaces = []
   # We append a blank line so the final namespace-closing happens "organically".
   for line in lines + ['']:
     namespaces_in_line = iwyu_namespace_re.findall(line)
     differ_pos = _CommonPrefixLength(namespaces_in_line, current_namespaces)
     namespaces_to_close = reversed(current_namespaces[differ_pos:])
     namespaces_to_open = namespaces_in_line[differ_pos:]
     retval.extend('}  // namespace %s' % ns for ns in namespaces_to_close)
     retval.extend('namespace %s {' % ns for ns in namespaces_to_open)
     current_namespaces = namespaces_in_line
     # Now add the current line.  If we were a namespace line, it's the
     # 'class' part of the line (everything but the 'namespace {'s).
     if namespaces_in_line:
       m = iwyu_classname_re.search(line)
       if not m:
         raise FixIncludesError('Malformed namespace line from iwyu: %s', line)
       retval.append(m.group(1))
     else:
       retval.append(line)

   assert retval and retval[-1] == '', 'What happened to our sentinel line?'
   return retval[:-1]


 def _DeleteLinesAccordingToIwyu(iwyu_record, file_lines):
   """Deletes all lines that iwyu_record tells us to, and cleans up after."""
   for line_number in iwyu_record.lines_to_delete:
     # Delete the entire move-span (us and our preceding comments).
     for i in range(*file_lines[line_number].move_span):
       file_lines[i].deleted = True

   while True:
     num_deletes = _DeleteEmptyNamespaces(file_lines)
     num_deletes += _DeleteEmptyIfdefs(file_lines)
     if num_deletes == 0:
       break

   # Also delete any duplicate lines in the input.  To avoid trouble
   # (accidentally deleting inside an #ifdef, for instance), we only
   # check 'top-level' #includes and forward-declares.
   toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines)
   _DeleteDuplicateLines(file_lines, toplevel_reorder_spans)

   # If a whole reorder span was deleted, check if it has extra
   # whitespace on both sides that we could trim.  We've already
   # deleted extra blank lines inside #ifdefs and namespaces,
   # so looking at toplevel spans is enough.
   for reorder_span in toplevel_reorder_spans:
     _DeleteExtraneousBlankLines(file_lines, reorder_span)


 def _GetSymbolNameFromForwardDeclareLine(line):
   """Given a forward declare line to add from iwyu output, get symbol.

   Two possibilities: In or not in namespace(s).
   If in namespaces, then return foo::bar::sym.
   Else just sym.
   """
   iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ')
   symbolname_re = re.compile(r'([A-Za-z0-9_]+)')
   namespaces_in_line = iwyu_namespace_re.findall(line)
   symbols_in_line = symbolname_re.findall(line)
   symbol_name = symbols_in_line[-1]
   if (namespaces_in_line):
     symbol_name = '::'.join(namespaces_in_line) + '::' + symbol_name
   return symbol_name


 def FixFileLines(iwyu_record, file_lines, flags):
   """Applies one block of lines from the iwyu output script.

   Called once we have read all the lines from the iwyu output script
   pertaining to a single source file, and parsed them into an
   iwyu_record.  At that point we edit the source file, remove the old
   #includes and forward-declares, insert the #includes and
   forward-declares, and reorder the lot, all as specified by the iwyu
   output script.  The resulting source code lines are returned.

   Arguments:
     iwyu_record: an IWYUOutputRecord object holding the parsed output
       of the include-what-you-use script (run at verbose level 1 or
       higher) pertaining to a single source file.
     file_lines: a list of LineInfo objects holding the parsed output of
       the file in iwyu_record.filename
     flags: commandline flags, as parsed by optparse.  We use
        flags.safe_headers to turn off deleting lines, and use the
        other flags indirectly (via calls to other routines).

   Returns:
     An array of 'fixed' source code lines, after modifications as
     specified by iwyu.
   """
   # First delete the includes and forward-declares that we should delete.
   # This is easy since iwyu tells us the line numbers.
   if not (flags.safe_headers and _MayBeHeaderFile(iwyu_record.filename)):
     _DeleteLinesAccordingToIwyu(iwyu_record, file_lines)

   # With these deletions, we may be able to merge together some
   # reorder-spans.  Recalculate them to see.
   _CalculateReorderSpans(file_lines)

   # For every move-span in our file -- that's every #include and
   # forward-declare we saw -- 'decorate' the move-range to allow us
   # to sort them.
   move_spans = set([fl.move_span for fl in file_lines if fl.move_span])
   decorated_move_spans = []
   for (start_line, end_line) in move_spans:
     decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines,
                                              file_lines[start_line:end_line],
                                              flags)
     if decorated_span:
       decorated_move_spans.append(decorated_span)

   # Now let's add in a decorated move-span for all the new #includes
   # and forward-declares.
   symbol_names_seen = set()
   for line in iwyu_record.includes_and_forward_declares_to_add:
     line_info = LineInfo(line)
     m = _INCLUDE_RE.match(line)
     if m:
       line_info.type = _INCLUDE_RE
       line_info.key = m.group(1)
     else:
       # Avoid duplicates that can arise if different template args
       # were suggested by different iwyu analyses for this file.
       symbol_name = _GetSymbolNameFromForwardDeclareLine(line)
       if symbol_name in symbol_names_seen:
         continue
       symbol_names_seen.add(symbol_name)
       line_info.type = _FORWARD_DECLARE_RE
     decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines,
                                              [line_info], flags)
     assert decorated_span, 'line to add is not an #include or fwd-decl?'
     decorated_move_spans.append(decorated_span)

   # Add a sentinel decorated move-span, to make life easy, and sort.
   decorated_move_spans.append(((len(file_lines), len(file_lines)),
                                _EOF_KIND, '', []))
   decorated_move_spans.sort()

   # Now go through all the lines of the input file and construct the
   # output file.  Before we get to the next reorder-span, we just
   # copy lines over verbatim (ignoring deleted lines, of course).
   # In a reorder-span, we just print the sorted content, introducing
   # blank lines when appropriate.
   output_lines = []
   line_number = 0
   while line_number < len(file_lines):
     current_reorder_span = decorated_move_spans[0][0]

     # Just copy over all the lines until the next reorder span.
     while line_number < current_reorder_span[0]:
       if not file_lines[line_number].deleted:
         output_lines.append(file_lines[line_number].line)
       line_number += 1

     # Now fill in the contents of the reorder-span from decorated_move_spans
     new_lines = []
     while (decorated_move_spans and
            decorated_move_spans[0][0] == current_reorder_span):
       new_lines.extend(decorated_move_spans[0][3])   # the full content
       if (len(decorated_move_spans) > 1 and
           _ShouldInsertBlankLine(decorated_move_spans[0],
                                  decorated_move_spans[1], file_lines, flags)):
         new_lines.append('')
       decorated_move_spans = decorated_move_spans[1:]   # pop

     if not flags.keep_iwyu_namespace_format:
       # Now do the munging to convert namespace lines from the iwyu input
       # format to the 'official style' format:
       #    'namespace foo { class Bar; }\n' -> 'namespace foo {\nclass Bar;\n}'
       # along with collecting multiple classes in the same namespace.
       new_lines = _NormalizeNamespaceForwardDeclareLines(new_lines)
     output_lines.extend(new_lines)
     line_number = current_reorder_span[1]               # go to end of span

   return [line for line in output_lines if line is not None]


 def FixOneFile(iwyu_record, file_contents, flags):
   """Parse a file guided by an iwyu_record and flags and apply IWYU fixes.
   Returns two lists of lines (old, fixed).
   """
   file_lines = ParseOneFile(file_contents, iwyu_record)
   old_lines = [fl.line for fl in file_lines
                if fl is not None and fl.line is not None]
   fixed_lines = FixFileLines(iwyu_record, file_lines, flags)
   return old_lines, fixed_lines


 def FixManyFiles(iwyu_records, flags):
   """Given a list of iwyu_records, fix each file listed in the record.

   For each iwyu record in the input, which lists the #includes and
   forward-declares to add, remove, and re-sort, loads the file, makes
   the fixes, and writes the fixed file to disk.  The flags affect the
   details of the fixing.

   Arguments:
     iwyu_records: a collection of IWYUOutputRecord objects holding
       the parsed output of the include-what-you-use script (run at
       verbose level 1 or higher) pertaining to a single source file.
       iwyu_record.filename indicates what file to edit.
     flags: commandline flags, as parsed by optparse..

   Returns:
     The number of files fixed (as opposed to ones that needed no fixing).
   """
   files_fixed = 0
   for iwyu_record in iwyu_records:
     try:
       fileinfo = FileInfo.parse(iwyu_record.filename)

       file_contents = _ReadFile(iwyu_record.filename, fileinfo)
       if not file_contents:
         continue

       old_lines, fixed_lines = FixOneFile(iwyu_record, file_contents, flags)
       if old_lines == fixed_lines:
         if not flags.quiet:
           print("No changes in file %s" % iwyu_record.filename)
         continue
       print(">>> Fixing #includes in '%s'" % iwyu_record.filename)
       if flags.dry_run:
         PrintFileDiff(old_lines, fixed_lines)
       else:
         _WriteFile(iwyu_record.filename, fileinfo, fixed_lines)

       files_fixed += 1
     except FixIncludesError as why:
       print('ERROR: %s - skipping file %s' % (why, iwyu_record.filename))

   print('IWYU %sedited %d files on your behalf.\n' % (
     flags.dry_run and 'would have ' or '', files_fixed))
   return files_fixed


 def ParseAndMergeIWYUOutput(f, files_to_process, flags):
   """
   Given a file object that has the output of the include_what_you_use
   script, parse this output into a list of IWYUOutputRecord objects.
   The records are merged so that files mentioned multiple times only
   result in a single record.

   Arguments:
     files_to_process: A set of filenames, or None.  If not None, we
        ignore files mentioned in f that are not in files_to_process.
     flags: commandline flags, as parsed by optparse.  The only flag
        we use directly is flags.ignore_re, to indicate files not to
        process; we also pass the flags to other routines.

   Returns:
     The list of IWYUOutputRecord objects.
   """
   # First collect all the iwyu data from stdin.

   # Maintain sort order by using OrderedDict instead of dict
   iwyu_output_records = OrderedDict()  # IWYUOutputRecords keyed by filename
   while True:
     iwyu_output_parser = IWYUOutputParser()
     try:
       iwyu_record = iwyu_output_parser.ParseOneRecord(f, flags)
       if not iwyu_record:
         break
     except FixIncludesError as why:
       print('ERROR: %s' % why)
       continue
     filename = iwyu_record.filename
     if files_to_process is not None and filename not in files_to_process:
       print('(skipping %s: not listed on commandline)' % filename)
       continue
     if flags.ignore_re and re.search(flags.ignore_re, filename):
       print('(skipping %s: it matches --ignore_re, which is %s)' % (
           filename, flags.ignore_re))
       continue

     if filename in iwyu_output_records:
       iwyu_output_records[filename].Merge(iwyu_record)
     else:
       iwyu_output_records[filename] = iwyu_record

   # Now ignore all the files that never had any contentful changes
   # seen for them.  (We have to wait until we're all done, since a .h
   # file may have a contentful change when #included from one .cc
   # file, but not another, and we need to have merged them above.)
   for filename in iwyu_output_records:
     if not iwyu_output_records[filename].HasContentfulChanges():
       if not flags.quiet:
         print('(skipping %s: iwyu reports no contentful changes)' % filename)
       # Mark that we're skipping this file by setting the record to None
       iwyu_output_records[filename] = None

   return [ior for ior in iwyu_output_records.values() if ior]


 def ProcessIWYUOutput(f, files_to_process, flags):
   """Fix the #include and forward-declare lines as directed by f.

   Given a file object that has the output of the include_what_you_use
   script, see every file to be edited and edit it, if appropriate.

   Arguments:
     f: an iterable object that is the output of include_what_you_use.
     files_to_process: A set of filenames, or None.  If not None, we
        ignore files mentioned in f that are not in files_to_process.
     flags: commandline flags, as parsed by optparse.  The only flag
        we use directly is flags.ignore_re, to indicate files not to
        process; we also pass the flags to other routines.

   Returns:
     The number of files that had to be modified (because they weren't
     already all correct).  In dry_run mode, returns the number of
     files that would have been modified.
   """
   records = ParseAndMergeIWYUOutput(f, files_to_process, flags)
   # Now do all the fixing, and return the number of files modified
   return FixManyFiles(records, flags)


 def SortIncludesInFiles(files_to_process, flags):
   """For each file in files_to_process, sort its #includes.

   This reads each input file, sorts the #include lines, and replaces
   the input file with the result. SortIncludesInFiles does not add
   or remove any #includes.  It also ignores forward-declares.

   Arguments:
     files_to_process: a list (or set) of filenames.
     flags: commandline flags, as parsed by optparse.  We do not use
        any flags directly, but pass them to other routines.

   Returns:
     The number of files that had to be modified (because they weren't
     already all correct, that is, already in sorted order).
   """
   sort_only_iwyu_records = []
   for filename in files_to_process:
     # An empty iwyu record has no adds or deletes, so its only effect
     # is to cause us to sort the #include lines.  (Since fix_includes
     # gets all its knowledge of where forward-declare lines are from
     # the iwyu input, with an empty iwyu record it just ignores all
     # the forward-declare lines entirely.)
     sort_only_iwyu_records.append(IWYUOutputRecord(filename))
   return FixManyFiles(sort_only_iwyu_records, flags)

 def ParseArgs(args):
   """ Parse the command line. """
   parser = optparse.OptionParser(usage=_USAGE)
   parser.add_option('-b', '--blank_lines', action='store_true', default=True,
                     help=('Put a blank line between primary header file and'
                           ' C/C++ system #includes, and another blank line'
                           ' between system #includes and google #includes'
                           ' [default]'))
   parser.add_option('--noblank_lines', action='store_false', dest='blank_lines')

   parser.add_option('--blank_line_between_c_and_cxx_includes',
                     action='store_true', default=False,
                     help=('Put a blank line between the group of C system '
                           'includes and C++ system includes. Not enabled '
                           'by default.'))

   parser.add_option('--comments', action='store_true', default=False,
                     help='Put comments after the #include lines')
   parser.add_option('--nocomments', action='store_false', dest='comments')

   parser.add_option('--safe_headers', action='store_true', default=True,
                     help=('Do not remove unused #includes/fwd-declares from'
                           ' header files; just add new ones [default]'))
   parser.add_option('--nosafe_headers', action='store_false',
                     dest='safe_headers')

   parser.add_option('-s', '--sort_only', action='store_true',
                     help=('Just sort #includes of files listed on cmdline;'
                           ' do not add or remove any #includes'))

   parser.add_option('-n', '--dry_run', action='store_true', default=False,
                     help=('Do not actually edit any files; just print diffs.'
                           ' Return code is 0 if no changes are needed,'
                           ' else min(the number of files that would be'
                           ' modified, 100)'))

   parser.add_option('-q', '--quiet', action='store_true', default=False,
                     help=('Do not output anything about files that do not '
                           'need any changes.'))

   parser.add_option('--ignore_re', default=None,
                     help=('fix_includes.py will skip editing any file whose'
                           ' name matches this regular expression.'))

   parser.add_option('--separate_project_includes', default=None,
                     help=('Sort #includes for current project separately'
                           ' from all other #includes.  This flag specifies'
                           ' the root directory of the current project.'
                           ' If the value is "<tld>", #includes that share the'
                           ' same top-level directory are assumed to be in the'
                           ' same project.  If not specified, project #includes'
                           ' will be sorted with other non-system #includes.'))

   parser.add_option('--thirdparty_include_dir', action="append",
                     dest="thirdparty_include_dirs",
                     default=[],
                     metavar="dir",
                     help=('Any includes which are found in <dir> are considered '
                           '"other-project" includes rather than "system" includes.'))

   parser.add_option('--source_root', type='str',
                     metavar="dir",
                     help=('The path to the root of the source tree. When processing '
                           'changes to a file /project/src/bar/baz.cc, setting '
                           '--source_root to /project/src will ensure that an include '
                           'of "bar/baz.h" is properly identified as the corresponding '
                           'main-compilation-unit include.'))

   parser.add_option('--invoking_command_line', default=None,
                     help=('Internal flag used by iwyu.py, It should be the'
                           ' command line used to invoke iwyu.py'))

   parser.add_option('-m', '--keep_iwyu_namespace_format', action='store_true',
                     default=False,
                     help=('Keep forward-declaration namespaces in IWYU format, '
                           'eg. namespace n1 { namespace n2 { class c1; } }.'
                           ' Do not convert to "normalized" Google format: '
                           'namespace n1 {\\nnamespace n2 {\\n class c1;'
                           '\\n}\\n}.'))
   parser.add_option('--nokeep_iwyu_namespace_format', action='store_false',
                     dest='keep_iwyu_namespace_format')

   (flags, files_to_modify) = parser.parse_args(args)
   return flags, files_to_modify

 def main(argv):
   (flags, files_to_modify) = ParseArgs(argv[1:])
   if files_to_modify:
     files_to_modify = set(files_to_modify)
   else:
     files_to_modify = None

   if (flags.separate_project_includes and
       not flags.separate_project_includes.startswith('<') and  # 'special' vals
       not flags.separate_project_includes.endswith(os.path.sep) and
       not flags.separate_project_includes.endswith('/')):
     flags.separate_project_includes += os.path.sep

   if flags.sort_only:
     if not files_to_modify:
       sys.exit('FATAL ERROR: -s flag requires a list of filenames')
     return SortIncludesInFiles(files_to_modify, flags)
   else:
     return ProcessIWYUOutput(sys.stdin, files_to_modify, flags)


 if __name__ == '__main__':
   num_files_fixed = main(sys.argv)
   sys.exit(min(num_files_fixed, 100))