blob: b29f0911302019c154ea373a561cf7f79c08eaef [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import ast
import os
import re
from glob import glob
from itertools import chain
from typing import Iterable, List, Optional, Set
from docs.exts.docs_build.docs_builder import ALL_PROVIDER_YAMLS
from docs.exts.docs_build.errors import DocBuildError
ROOT_PROJECT_DIR = os.path.abspath(
os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir)
)
ROOT_PACKAGE_DIR = os.path.join(ROOT_PROJECT_DIR, "airflow")
DOCS_DIR = os.path.join(ROOT_PROJECT_DIR, "docs")
def find_existing_guide_operator_names(src_dir_pattern: str) -> Set[str]:
"""
Find names of existing operators.
:return names of existing operators.
"""
operator_names = set()
paths = glob(src_dir_pattern, recursive=True)
for path in paths:
with open(path) as f:
operator_names |= set(re.findall(".. _howto/operator:(.+?):", f.read()))
return operator_names
def extract_ast_class_def_by_name(ast_tree, class_name):
"""
Extracts class definition by name
:param ast_tree: AST tree
:param class_name: name of the class.
:return: class node found
"""
for node in ast.walk(ast_tree):
if isinstance(node, ast.ClassDef) and node.name == class_name:
return node
return None
def _generate_missing_guide_error(path, line_no, operator_name):
return DocBuildError(
file_path=path,
line_no=line_no,
message=(
f"Link to the guide is missing in operator's description: {operator_name}.\n"
f"Please add link to the guide to the description in the following form:\n"
f"\n"
f".. seealso::\n"
f" For more information on how to use this operator, take a look at the guide:\n"
f" :ref:`howto/operator:{operator_name}`\n"
),
)
def check_guide_links_in_operator_descriptions() -> List[DocBuildError]:
"""Check if there are links to guides in operator's descriptions."""
build_errors = []
build_errors.extend(
_check_missing_guide_references(
operator_names=find_existing_guide_operator_names(
f"{DOCS_DIR}/apache-airflow/howto/operator/**/*.rst"
),
python_module_paths=chain(
glob(f"{ROOT_PACKAGE_DIR}/operators/*.py"),
glob(f"{ROOT_PACKAGE_DIR}/sensors/*.py"),
),
)
)
for provider in ALL_PROVIDER_YAMLS:
operator_names = {
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators/**/*.rst"),
*find_existing_guide_operator_names(f"{DOCS_DIR}/{provider['package-name']}/operators.rst"),
}
# Extract all potential python modules that can contain operators
python_module_paths = chain(
glob(f"{provider['package-dir']}/**/operators/*.py", recursive=True),
glob(f"{provider['package-dir']}/**/sensors/*.py", recursive=True),
glob(f"{provider['package-dir']}/**/transfers/*.py", recursive=True),
)
build_errors.extend(
_check_missing_guide_references(
operator_names=operator_names, python_module_paths=python_module_paths
)
)
return build_errors
def _check_missing_guide_references(operator_names, python_module_paths) -> List[DocBuildError]:
build_errors = []
for py_module_path in python_module_paths:
with open(py_module_path) as f:
py_content = f.read()
if "This module is deprecated" in py_content:
continue
for existing_operator in operator_names:
if f"class {existing_operator}" not in py_content:
continue
# This is a potential file with necessary class definition.
# To make sure it's a real Python class definition, we build AST tree
ast_tree = ast.parse(py_content)
class_def = extract_ast_class_def_by_name(ast_tree, existing_operator)
if class_def is None:
continue
docstring = ast.get_docstring(class_def)
if docstring:
if "This class is deprecated." in docstring:
continue
if f":ref:`howto/operator:{existing_operator}`" in docstring:
continue
build_errors.append(
_generate_missing_guide_error(py_module_path, class_def.lineno, existing_operator)
)
return build_errors
def assert_file_not_contains(
*, file_path: str, pattern: str, message: Optional[str] = None
) -> Optional[DocBuildError]:
"""
Asserts that file does not contain the pattern. Return message error if it does.
:param file_path: file
:param pattern: pattern
:param message: message to return
"""
return _extract_file_content(file_path, message, pattern, False)
def assert_file_contains(
*, file_path: str, pattern: str, message: Optional[str] = None
) -> Optional[DocBuildError]:
"""
Asserts that file does contain the pattern. Return message error if it does not.
:param file_path: file
:param pattern: pattern
:param message: message to return
"""
return _extract_file_content(file_path, message, pattern, True)
def _extract_file_content(file_path: str, message: Optional[str], pattern: str, expected_contain: bool):
if not message:
message = f"Pattern '{pattern}' could not be found in '{file_path}' file."
with open(file_path, "rb", 0) as doc_file:
pattern_compiled = re.compile(pattern)
found = False
for num, line in enumerate(doc_file, 1):
line_decode = line.decode()
result = re.search(pattern_compiled, line_decode)
if not expected_contain and result:
return DocBuildError(file_path=file_path, line_no=num, message=message)
elif expected_contain and result:
found = True
if expected_contain and not found:
return DocBuildError(file_path=file_path, line_no=None, message=message)
return None
def filter_file_list_by_pattern(file_paths: Iterable[str], pattern: str) -> List[str]:
"""
Filters file list to those that content matches the pattern
:param file_paths: file paths to check
:param pattern: pattern to match
:return: list of files matching the pattern
"""
output_paths = []
pattern_compiled = re.compile(pattern)
for file_path in file_paths:
with open(file_path, "rb", 0) as text_file:
text_file_content = text_file.read().decode()
if re.findall(pattern_compiled, text_file_content):
output_paths.append(file_path)
return output_paths
def find_modules(deprecated_only: bool = False) -> Set[str]:
"""
Finds all modules.
:param deprecated_only: whether only deprecated modules should be found.
:return: set of all modules found
"""
file_paths = glob(f"{ROOT_PACKAGE_DIR}/**/*.py", recursive=True)
# Exclude __init__.py
file_paths = [f for f in file_paths if not f.endswith("__init__.py")]
if deprecated_only:
file_paths = filter_file_list_by_pattern(file_paths, r"This module is deprecated.")
# Make path relative
file_paths = [os.path.relpath(f, ROOT_PROJECT_DIR) for f in file_paths]
# Convert filename to module
modules_names = {file_path.rpartition(".")[0].replace("/", ".") for file_path in file_paths}
return modules_names
def check_exampleinclude_for_example_dags() -> List[DocBuildError]:
"""Checks all exampleincludes for example dags."""
all_docs_files = glob(f"{DOCS_DIR}/**/*.rst", recursive=True)
build_errors = []
for doc_file in all_docs_files:
build_error = assert_file_not_contains(
file_path=doc_file,
pattern=r"literalinclude::.+(?:example_dags|tests/system/)",
message=(
"literalinclude directive is prohibited for example DAGs. \n"
"You should use the exampleinclude directive to include example DAGs."
),
)
if build_error:
build_errors.append(build_error)
return build_errors
def check_enforce_code_block() -> List[DocBuildError]:
"""Checks all code:: blocks."""
all_docs_files = glob(f"{DOCS_DIR}/**/*.rst", recursive=True)
build_errors = []
for doc_file in all_docs_files:
build_error = assert_file_not_contains(
file_path=doc_file,
pattern=r"^.. code::",
message=(
"We recommend using the code-block directive instead of the code directive. "
"The code-block directive is more feature-full."
),
)
if build_error:
build_errors.append(build_error)
return build_errors
def find_example_dags(provider_dir):
system_tests_dir = provider_dir.replace(f"{ROOT_PACKAGE_DIR}/", "")
yield from glob(f"{provider_dir}/**/*example_dags", recursive=True)
yield from glob(f"{ROOT_PROJECT_DIR}/tests/system/{system_tests_dir}/*/", recursive=True)
def check_example_dags_in_provider_tocs() -> List[DocBuildError]:
"""Checks that each documentation for provider packages has a link to example DAGs in the TOC."""
build_errors = []
for provider in ALL_PROVIDER_YAMLS:
example_dags_dirs = list(find_example_dags(provider['package-dir']))
if not example_dags_dirs:
continue
doc_file_path = f"{DOCS_DIR}/{provider['package-name']}/index.rst"
if len(example_dags_dirs) == 1:
package_rel_path = os.path.relpath(example_dags_dirs[0], start=ROOT_PROJECT_DIR)
expected_text = f"Example DAGs <https://github.com/apache/airflow/tree/.*/{package_rel_path}>"
suggested_text = f"Example DAGs <https://github.com/apache/airflow/tree/main/{package_rel_path}>"
else:
expected_text = "Example DAGs <example-dags>"
suggested_text = "Example DAGs <example-dags>"
build_error = assert_file_contains(
file_path=doc_file_path,
pattern=expected_text,
message=(
f"A link to the example DAGs in table of contents is missing. Can you please add it?\n\n"
f" {suggested_text}"
),
)
if build_error:
build_errors.append(build_error)
return build_errors
def check_pypi_repository_in_provider_tocs() -> List[DocBuildError]:
"""Checks that each documentation for provider packages has a link to PyPI files in the TOC."""
build_errors = []
for provider in ALL_PROVIDER_YAMLS:
doc_file_path = f"{DOCS_DIR}/{provider['package-name']}/index.rst"
expected_text = f"PyPI Repository <https://pypi.org/project/{provider['package-name']}/>"
build_error = assert_file_contains(
file_path=doc_file_path,
pattern=re.escape(expected_text),
message=(
f"A link to the PyPI in table of contents is missing. Can you please add it?\n\n"
f" {expected_text}"
),
)
if build_error:
build_errors.append(build_error)
return build_errors
def run_all_check() -> List[DocBuildError]:
"""Run all checks from this module"""
general_errors = []
general_errors.extend(check_guide_links_in_operator_descriptions())
general_errors.extend(check_enforce_code_block())
general_errors.extend(check_exampleinclude_for_example_dags())
general_errors.extend(check_example_dags_in_provider_tocs())
general_errors.extend(check_pypi_repository_in_provider_tocs())
return general_errors