| """Handwritten parser of dependency specifiers. |
| |
| The docstring for each __parse_* function contains ENBF-inspired grammar representing |
| the implementation. |
| """ |
| |
| import ast |
| from typing import Any, List, NamedTuple, Optional, Tuple, Union |
| |
| from ._tokenizer import DEFAULT_RULES, Tokenizer |
| |
| |
| class Node: |
| def __init__(self, value: str) -> None: |
| self.value = value |
| |
| def __str__(self) -> str: |
| return self.value |
| |
| def __repr__(self) -> str: |
| return f"<{self.__class__.__name__}('{self}')>" |
| |
| def serialize(self) -> str: |
| raise NotImplementedError |
| |
| |
| class Variable(Node): |
| def serialize(self) -> str: |
| return str(self) |
| |
| |
| class Value(Node): |
| def serialize(self) -> str: |
| return f'"{self}"' |
| |
| |
| class Op(Node): |
| def serialize(self) -> str: |
| return str(self) |
| |
| |
| MarkerVar = Union[Variable, Value] |
| MarkerItem = Tuple[MarkerVar, Op, MarkerVar] |
| # MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] |
| # MarkerList = List[Union["MarkerList", MarkerAtom, str]] |
| # mypy does not support recursive type definition |
| # https://github.com/python/mypy/issues/731 |
| MarkerAtom = Any |
| MarkerList = List[Any] |
| |
| |
| class ParsedRequirement(NamedTuple): |
| name: str |
| url: str |
| extras: List[str] |
| specifier: str |
| marker: Optional[MarkerList] |
| |
| |
| # -------------------------------------------------------------------------------------- |
| # Recursive descent parser for dependency specifier |
| # -------------------------------------------------------------------------------------- |
| def parse_requirement(source: str) -> ParsedRequirement: |
| return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) |
| |
| |
| def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: |
| """ |
| requirement = WS? IDENTIFIER WS? extras WS? requirement_details |
| """ |
| tokenizer.consume("WS") |
| |
| name_token = tokenizer.expect( |
| "IDENTIFIER", expected="package name at the start of dependency specifier" |
| ) |
| name = name_token.text |
| tokenizer.consume("WS") |
| |
| extras = _parse_extras(tokenizer) |
| tokenizer.consume("WS") |
| |
| url, specifier, marker = _parse_requirement_details(tokenizer) |
| tokenizer.expect("END", expected="end of dependency specifier") |
| |
| return ParsedRequirement(name, url, extras, specifier, marker) |
| |
| |
| def _parse_requirement_details( |
| tokenizer: Tokenizer, |
| ) -> Tuple[str, str, Optional[MarkerList]]: |
| """ |
| requirement_details = AT URL (WS requirement_marker?)? |
| | specifier WS? (requirement_marker)? |
| """ |
| |
| specifier = "" |
| url = "" |
| marker = None |
| |
| if tokenizer.check("AT"): |
| tokenizer.read() |
| tokenizer.consume("WS") |
| |
| url_start = tokenizer.position |
| url = tokenizer.expect("URL", expected="URL after @").text |
| if tokenizer.check("END", peek=True): |
| return (url, specifier, marker) |
| |
| tokenizer.expect("WS", expected="whitespace after URL") |
| |
| # The input might end after whitespace. |
| if tokenizer.check("END", peek=True): |
| return (url, specifier, marker) |
| |
| marker = _parse_requirement_marker( |
| tokenizer, span_start=url_start, after="URL and whitespace" |
| ) |
| else: |
| specifier_start = tokenizer.position |
| specifier = _parse_specifier(tokenizer) |
| tokenizer.consume("WS") |
| |
| if tokenizer.check("END", peek=True): |
| return (url, specifier, marker) |
| |
| marker = _parse_requirement_marker( |
| tokenizer, |
| span_start=specifier_start, |
| after=( |
| "version specifier" |
| if specifier |
| else "name and no valid version specifier" |
| ), |
| ) |
| |
| return (url, specifier, marker) |
| |
| |
| def _parse_requirement_marker( |
| tokenizer: Tokenizer, *, span_start: int, after: str |
| ) -> MarkerList: |
| """ |
| requirement_marker = SEMICOLON marker WS? |
| """ |
| |
| if not tokenizer.check("SEMICOLON"): |
| tokenizer.raise_syntax_error( |
| f"Expected end or semicolon (after {after})", |
| span_start=span_start, |
| ) |
| tokenizer.read() |
| |
| marker = _parse_marker(tokenizer) |
| tokenizer.consume("WS") |
| |
| return marker |
| |
| |
| def _parse_extras(tokenizer: Tokenizer) -> List[str]: |
| """ |
| extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? |
| """ |
| if not tokenizer.check("LEFT_BRACKET", peek=True): |
| return [] |
| |
| with tokenizer.enclosing_tokens( |
| "LEFT_BRACKET", |
| "RIGHT_BRACKET", |
| around="extras", |
| ): |
| tokenizer.consume("WS") |
| extras = _parse_extras_list(tokenizer) |
| tokenizer.consume("WS") |
| |
| return extras |
| |
| |
| def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: |
| """ |
| extras_list = identifier (wsp* ',' wsp* identifier)* |
| """ |
| extras: List[str] = [] |
| |
| if not tokenizer.check("IDENTIFIER"): |
| return extras |
| |
| extras.append(tokenizer.read().text) |
| |
| while True: |
| tokenizer.consume("WS") |
| if tokenizer.check("IDENTIFIER", peek=True): |
| tokenizer.raise_syntax_error("Expected comma between extra names") |
| elif not tokenizer.check("COMMA"): |
| break |
| |
| tokenizer.read() |
| tokenizer.consume("WS") |
| |
| extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") |
| extras.append(extra_token.text) |
| |
| return extras |
| |
| |
| def _parse_specifier(tokenizer: Tokenizer) -> str: |
| """ |
| specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS |
| | WS? version_many WS? |
| """ |
| with tokenizer.enclosing_tokens( |
| "LEFT_PARENTHESIS", |
| "RIGHT_PARENTHESIS", |
| around="version specifier", |
| ): |
| tokenizer.consume("WS") |
| parsed_specifiers = _parse_version_many(tokenizer) |
| tokenizer.consume("WS") |
| |
| return parsed_specifiers |
| |
| |
| def _parse_version_many(tokenizer: Tokenizer) -> str: |
| """ |
| version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? |
| """ |
| parsed_specifiers = "" |
| while tokenizer.check("SPECIFIER"): |
| span_start = tokenizer.position |
| parsed_specifiers += tokenizer.read().text |
| if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): |
| tokenizer.raise_syntax_error( |
| ".* suffix can only be used with `==` or `!=` operators", |
| span_start=span_start, |
| span_end=tokenizer.position + 1, |
| ) |
| if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): |
| tokenizer.raise_syntax_error( |
| "Local version label can only be used with `==` or `!=` operators", |
| span_start=span_start, |
| span_end=tokenizer.position, |
| ) |
| tokenizer.consume("WS") |
| if not tokenizer.check("COMMA"): |
| break |
| parsed_specifiers += tokenizer.read().text |
| tokenizer.consume("WS") |
| |
| return parsed_specifiers |
| |
| |
| # -------------------------------------------------------------------------------------- |
| # Recursive descent parser for marker expression |
| # -------------------------------------------------------------------------------------- |
| def parse_marker(source: str) -> MarkerList: |
| return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES)) |
| |
| |
| def _parse_marker(tokenizer: Tokenizer) -> MarkerList: |
| """ |
| marker = marker_atom (BOOLOP marker_atom)+ |
| """ |
| expression = [_parse_marker_atom(tokenizer)] |
| while tokenizer.check("BOOLOP"): |
| token = tokenizer.read() |
| expr_right = _parse_marker_atom(tokenizer) |
| expression.extend((token.text, expr_right)) |
| return expression |
| |
| |
| def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: |
| """ |
| marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? |
| | WS? marker_item WS? |
| """ |
| |
| tokenizer.consume("WS") |
| if tokenizer.check("LEFT_PARENTHESIS", peek=True): |
| with tokenizer.enclosing_tokens( |
| "LEFT_PARENTHESIS", |
| "RIGHT_PARENTHESIS", |
| around="marker expression", |
| ): |
| tokenizer.consume("WS") |
| marker: MarkerAtom = _parse_marker(tokenizer) |
| tokenizer.consume("WS") |
| else: |
| marker = _parse_marker_item(tokenizer) |
| tokenizer.consume("WS") |
| return marker |
| |
| |
| def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: |
| """ |
| marker_item = WS? marker_var WS? marker_op WS? marker_var WS? |
| """ |
| tokenizer.consume("WS") |
| marker_var_left = _parse_marker_var(tokenizer) |
| tokenizer.consume("WS") |
| marker_op = _parse_marker_op(tokenizer) |
| tokenizer.consume("WS") |
| marker_var_right = _parse_marker_var(tokenizer) |
| tokenizer.consume("WS") |
| return (marker_var_left, marker_op, marker_var_right) |
| |
| |
| def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: |
| """ |
| marker_var = VARIABLE | QUOTED_STRING |
| """ |
| if tokenizer.check("VARIABLE"): |
| return process_env_var(tokenizer.read().text.replace(".", "_")) |
| elif tokenizer.check("QUOTED_STRING"): |
| return process_python_str(tokenizer.read().text) |
| else: |
| tokenizer.raise_syntax_error( |
| message="Expected a marker variable or quoted string" |
| ) |
| |
| |
| def process_env_var(env_var: str) -> Variable: |
| if ( |
| env_var == "platform_python_implementation" |
| or env_var == "python_implementation" |
| ): |
| return Variable("platform_python_implementation") |
| else: |
| return Variable(env_var) |
| |
| |
| def process_python_str(python_str: str) -> Value: |
| value = ast.literal_eval(python_str) |
| return Value(str(value)) |
| |
| |
| def _parse_marker_op(tokenizer: Tokenizer) -> Op: |
| """ |
| marker_op = IN | NOT IN | OP |
| """ |
| if tokenizer.check("IN"): |
| tokenizer.read() |
| return Op("in") |
| elif tokenizer.check("NOT"): |
| tokenizer.read() |
| tokenizer.expect("WS", expected="whitespace after 'not'") |
| tokenizer.expect("IN", expected="'in' after 'not'") |
| return Op("not in") |
| elif tokenizer.check("OP"): |
| return Op(tokenizer.read().text) |
| else: |
| return tokenizer.raise_syntax_error( |
| "Expected marker operator, one of " |
| "<=, <, !=, ==, >=, >, ~=, ===, in, not in" |
| ) |