blob: af72a5652115965b83677a2b14fd308ce9bf3216 [file] [log] [blame]
# encoding: utf-8
#
# This is a copy of source code from Pathspec 0.5.9
# (https://pypi.org/project/pathspec/) which is
# available under an Mozilla Public License 2.0
# (https://www.mozilla.org/en-US/MPL/2.0/).
# A copy of the license is also available in
# ../../licenses/LICENSE-pathspec.txt.
#
"""
This module implements Git's wildmatch pattern matching which itself is
derived from Rsync's wildmatch. Git uses wildmatch for its ".gitignore"
files.
"""
from __future__ import unicode_literals
import re
import warnings
import util
from compat import unicode
from pattern import RegexPattern
#: The encoding to use when parsing a byte string pattern.
_BYTES_ENCODING = 'latin1'
class GitWildMatchPattern(RegexPattern):
"""
The :class:`GitWildMatchPattern` class represents a compiled Git
wildmatch pattern.
"""
# Keep the dict-less class hierarchy.
__slots__ = ()
@classmethod
def pattern_to_regex(cls, pattern):
"""
Convert the pattern into a regular expression.
*pattern* (:class:`unicode` or :class:`bytes`) is the pattern to
convert into a regular expression.
Returns the uncompiled regular expression (:class:`unicode`, :class:`bytes`,
or :data:`None`), and whether matched files should be included
(:data:`True`), excluded (:data:`False`), or if it is a
null-operation (:data:`None`).
"""
if isinstance(pattern, unicode):
return_type = unicode
elif isinstance(pattern, bytes):
return_type = bytes
pattern = pattern.decode(_BYTES_ENCODING)
else:
raise TypeError("pattern:{!r} is not a unicode or byte string.".format(pattern))
pattern = pattern.strip()
if pattern.startswith('#'):
# A pattern starting with a hash ('#') serves as a comment
# (neither includes nor excludes files). Escape the hash with a
# back-slash to match a literal hash (i.e., '\#').
regex = None
include = None
elif pattern == '/':
# EDGE CASE: According to `git check-ignore` (v2.4.1), a single
# '/' does not match any file.
regex = None
include = None
elif pattern:
if pattern.startswith('!'):
# A pattern starting with an exclamation mark ('!') negates the
# pattern (exclude instead of include). Escape the exclamation
# mark with a back-slash to match a literal exclamation mark
# (i.e., '\!').
include = False
# Remove leading exclamation mark.
pattern = pattern[1:]
else:
include = True
if pattern.startswith('\\'):
# Remove leading back-slash escape for escaped hash ('#') or
# exclamation mark ('!').
pattern = pattern[1:]
# Split pattern into segments.
pattern_segs = pattern.split('/')
# Normalize pattern to make processing easier.
if not pattern_segs[0]:
# A pattern beginning with a slash ('/') will only match paths
# directly on the root directory instead of any descendant
# paths. So, remove empty first segment to make pattern relative
# to root.
del pattern_segs[0]
elif len(pattern_segs) == 1 or (len(pattern_segs) == 2 and not pattern_segs[1]):
# A single pattern without a beginning slash ('/') will match
# any descendant path. This is equivalent to "**/{pattern}". So,
# prepend with double-asterisks to make pattern relative to
# root.
# EDGE CASE: This also holds for a single pattern with a
# trailing slash (e.g. dir/).
if pattern_segs[0] != '**':
pattern_segs.insert(0, '**')
else:
# EDGE CASE: A pattern without a beginning slash ('/') but
# contains at least one prepended directory (e.g.
# "dir/{pattern}") should not match "**/dir/{pattern}",
# according to `git check-ignore` (v2.4.1).
pass
if not pattern_segs[-1] and len(pattern_segs) > 1:
# A pattern ending with a slash ('/') will match all descendant
# paths if it is a directory but not if it is a regular file.
# This is equivalent to "{pattern}/**". So, set last segment to
# double asterisks to include all descendants.
pattern_segs[-1] = '**'
# Build regular expression from pattern.
output = ['^']
need_slash = False
end = len(pattern_segs) - 1
for i, seg in enumerate(pattern_segs):
if seg == '**':
if i == 0 and i == end:
# A pattern consisting solely of double-asterisks ('**')
# will match every path.
output.append('.+')
elif i == 0:
# A normalized pattern beginning with double-asterisks
# ('**') will match any leading path segments.
output.append('(?:.+/)?')
need_slash = False
elif i == end:
# A normalized pattern ending with double-asterisks ('**')
# will match any trailing path segments.
output.append('/.*')
else:
# A pattern with inner double-asterisks ('**') will match
# multiple (or zero) inner path segments.
output.append('(?:/.+)?')
need_slash = True
elif seg == '*':
# Match single path segment.
if need_slash:
output.append('/')
output.append('[^/]+')
need_slash = True
else:
# Match segment glob pattern.
if need_slash:
output.append('/')
output.append(cls._translate_segment_glob(seg))
if i == end and include is True:
# A pattern ending without a slash ('/') will match a file
# or a directory (with paths underneath it). E.g., "foo"
# matches "foo", "foo/bar", "foo/bar/baz", etc.
# EDGE CASE: However, this does not hold for exclusion cases
# according to `git check-ignore` (v2.4.1).
output.append('(?:/.*)?')
need_slash = True
output.append('$')
regex = ''.join(output)
else:
# A blank pattern is a null-operation (neither includes nor
# excludes files).
regex = None
include = None
if regex is not None and return_type is bytes:
regex = regex.encode(_BYTES_ENCODING)
return regex, include
@staticmethod
def _translate_segment_glob(pattern):
"""
Translates the glob pattern to a regular expression. This is used in
the constructor to translate a path segment glob pattern to its
corresponding regular expression.
*pattern* (:class:`str`) is the glob pattern.
Returns the regular expression (:class:`str`).
"""
# NOTE: This is derived from `fnmatch.translate()` and is similar to
# the POSIX function `fnmatch()` with the `FNM_PATHNAME` flag set.
escape = False
regex = ''
i, end = 0, len(pattern)
while i < end:
# Get next character.
char = pattern[i]
i += 1
if escape:
# Escape the character.
escape = False
regex += re.escape(char)
elif char == '\\':
# Escape character, escape next character.
escape = True
elif char == '*':
# Multi-character wildcard. Match any string (except slashes),
# including an empty string.
regex += '[^/]*'
elif char == '?':
# Single-character wildcard. Match any single character (except
# a slash).
regex += '[^/]'
elif char == '[':
# Bracket expression wildcard. Except for the beginning
# exclamation mark, the whole bracket expression can be used
# directly as regex but we have to find where the expression
# ends.
# - "[][!]" matchs ']', '[' and '!'.
# - "[]-]" matchs ']' and '-'.
# - "[!]a-]" matchs any character except ']', 'a' and '-'.
j = i
# Pass bracket expression negation.
if j < end and pattern[j] == '!':
j += 1
# Pass first closing bracket if it is at the beginning of the
# expression.
if j < end and pattern[j] == ']':
j += 1
# Find closing bracket. Stop once we reach the end or find it.
while j < end and pattern[j] != ']':
j += 1
if j < end:
# Found end of bracket expression. Increment j to be one past
# the closing bracket:
#
# [...]
# ^ ^
# i j
#
j += 1
expr = '['
if pattern[i] == '!':
# Bracket expression needs to be negated.
expr += '^'
i += 1
elif pattern[i] == '^':
# POSIX declares that the regex bracket expression negation
# "[^...]" is undefined in a glob pattern. Python's
# `fnmatch.translate()` escapes the caret ('^') as a
# literal. To maintain consistency with undefined behavior,
# I am escaping the '^' as well.
expr += '\\^'
i += 1
# Build regex bracket expression. Escape slashes so they are
# treated as literal slashes by regex as defined by POSIX.
expr += pattern[i:j].replace('\\', '\\\\')
# Add regex bracket expression to regex result.
regex += expr
# Set i to one past the closing bracket.
i = j
else:
# Failed to find closing bracket, treat opening bracket as a
# bracket literal instead of as an expression.
regex += '\\['
else:
# Regular character, escape it for regex.
regex += re.escape(char)
return regex
util.register_pattern('gitwildmatch', GitWildMatchPattern)
class GitIgnorePattern(GitWildMatchPattern):
"""
The :class:`GitIgnorePattern` class is deprecated by :class:`GitWildMatchPattern`.
This class only exists to maintain compatibility with v0.4.
"""
def __init__(self, *args, **kw):
"""
Warn about deprecation.
"""
self._deprecated()
return super(GitIgnorePattern, self).__init__(*args, **kw)
@staticmethod
def _deprecated():
"""
Warn about deprecation.
"""
warnings.warn("GitIgnorePattern ('gitignore') is deprecated. Use GitWildMatchPattern ('gitwildmatch') instead.", DeprecationWarning, stacklevel=3)
@classmethod
def pattern_to_regex(cls, *args, **kw):
"""
Warn about deprecation.
"""
cls._deprecated()
return super(GitIgnorePattern, cls).pattern_to_regex(*args, **kw)
# Register `GitIgnorePattern` as "gitignore" for backward compatibility
# with v0.4.
util.register_pattern('gitignore', GitIgnorePattern)