blob: e81d6ac869dd298862f35576cdf529476408de9b [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import gzip
import os
from pathlib import Path
import click
from .command import Bash, Command, default_bin
from .cmake import CMake
from .git import git
from .logger import logger
from ..lang.cpp import CppCMakeDefinition, CppConfiguration
from ..lang.rust import Cargo
from ..lang.python import Autopep8, Flake8, NumpyDoc
from .rat import Rat, exclusion_from_globs
from .tmpdir import tmpdir
class LintValidationException(Exception):
pass
class LintResult:
def __init__(self, success, reason=None):
self.success = success
def ok(self):
if not self.success:
raise LintValidationException
@staticmethod
def from_cmd(command_result):
return LintResult(command_result.returncode == 0)
def cpp_linter(src, build_dir, clang_format=True, cpplint=True,
clang_tidy=False, iwyu=False, iwyu_all=False,
fix=False):
""" Run clang-format, cpplint and clang-tidy on cpp/ codebase. """
logger.info("Running C++ linters")
cmake = CMake()
if not cmake.available:
logger.error("cpp linter requested but cmake binary not found.")
return
# A cmake build directory is required to populate `compile_commands.json`
# which in turn is required by clang-tidy. It also provides a convenient
# way to hide clang-format/clang-tidy invocation via the Generate
# (ninja/make) targets.
# ARROW_LINT_ONLY exits early but ignore building compile_command.json
lint_only = not (iwyu or clang_tidy)
cmake_args = {"with_python": False, "with_lint_only": lint_only}
cmake_def = CppCMakeDefinition(src.cpp, CppConfiguration(**cmake_args))
build = cmake_def.build(build_dir)
if clang_format:
target = "format" if fix else "check-format"
yield LintResult.from_cmd(build.run(target, check=False))
if cpplint:
yield LintResult.from_cmd(build.run("lint", check=False))
yield LintResult.from_cmd(build.run("lint_cpp_cli", check=False))
if clang_tidy:
yield LintResult.from_cmd(build.run("check-clang-tidy", check=False))
if iwyu:
if iwyu_all:
iwyu_cmd = "iwyu-all"
else:
iwyu_cmd = "iwyu"
yield LintResult.from_cmd(build.run(iwyu_cmd, check=False))
class CMakeFormat(Command):
def __init__(self, cmake_format_bin):
self.bin = cmake_format_bin
def cmake_linter(src, fix=False):
""" Run cmake-format.py on all CMakeFiles.txt """
logger.info("Running cmake-format linters")
if not fix:
logger.warn("run-cmake-format modifies files, regardless of --fix")
arrow_cmake_format = os.path.join(src.path, "run-cmake-format.py")
cmake_format = CMakeFormat(cmake_format_bin=arrow_cmake_format)
yield LintResult.from_cmd(cmake_format("--check"))
def python_linter(src, fix=False):
"""Run Python linters on python/pyarrow, python/examples, setup.py
and dev/. """
setup_py = os.path.join(src.python, "setup.py")
setup_cfg = os.path.join(src.python, "setup.cfg")
logger.info("Running Python formatter (autopep8)")
autopep8 = Autopep8()
if not autopep8.available:
logger.error(
"Python formatter requested but autopep8 binary not found. "
"Please run `pip install -r dev/archery/requirements-lint.txt`")
return
# Gather files for autopep8
patterns = ["python/pyarrow/**/*.py",
"python/pyarrow/**/*.pyx",
"python/pyarrow/**/*.pxd",
"python/pyarrow/**/*.pxi",
"python/examples/**/*.py",
"dev/archery/**/*.py",
]
files = [setup_py]
for pattern in patterns:
files += list(map(str, Path(src.path).glob(pattern)))
args = ['--global-config', setup_cfg, '--ignore-local-config']
if fix:
args += ['-j0', '--in-place']
args += sorted(files)
yield LintResult.from_cmd(autopep8(*args))
else:
# XXX `-j0` doesn't work well with `--exit-code`, so instead
# we capture the diff and check whether it's empty
# (https://github.com/hhatto/autopep8/issues/543)
args += ['-j0', '--diff']
args += sorted(files)
diff = autopep8.run_captured(*args)
if diff:
print(diff.decode('utf8'))
yield LintResult(success=False)
else:
yield LintResult(success=True)
# Run flake8 after autopep8 (the latter may have modified some files)
logger.info("Running Python linter (flake8)")
flake8 = Flake8()
if not flake8.available:
logger.error(
"Python linter requested but flake8 binary not found. "
"Please run `pip install -r dev/archery/requirements-lint.txt`")
return
flake8_exclude = ['.venv*']
yield LintResult.from_cmd(
flake8("--extend-exclude=" + ','.join(flake8_exclude),
setup_py, src.pyarrow, os.path.join(src.python, "examples"),
src.dev, check=False))
config = os.path.join(src.python, ".flake8.cython")
yield LintResult.from_cmd(
flake8("--config=" + config, src.pyarrow, check=False))
def python_numpydoc(symbols=None, allow_rules=None, disallow_rules=None):
"""Run numpydoc linter on python.
Pyarrow must be available for import.
"""
logger.info("Running Python docstring linters")
# by default try to run on all pyarrow package
symbols = symbols or {
'pyarrow',
'pyarrow.compute',
'pyarrow.csv',
'pyarrow.dataset',
'pyarrow.feather',
'pyarrow.flight',
'pyarrow.fs',
'pyarrow.gandiva',
'pyarrow.ipc',
'pyarrow.json',
'pyarrow.orc',
'pyarrow.parquet',
'pyarrow.plasma',
'pyarrow.types',
}
try:
numpydoc = NumpyDoc(symbols)
except RuntimeError as e:
logger.error(str(e))
yield LintResult(success=False)
return
results = numpydoc.validate(
# limit the validation scope to the pyarrow package
from_package='pyarrow',
allow_rules=allow_rules,
disallow_rules=disallow_rules
)
if len(results) == 0:
yield LintResult(success=True)
return
number_of_violations = 0
for obj, result in results:
errors = result['errors']
# inspect doesn't play nice with cython generated source code,
# to use a hacky way to represent a proper __qualname__
doc = getattr(obj, '__doc__', '')
name = getattr(obj, '__name__', '')
qualname = getattr(obj, '__qualname__', '')
module = getattr(obj, '__module__', '')
instance = getattr(obj, '__self__', '')
if instance:
klass = instance.__class__.__name__
else:
klass = ''
try:
cython_signature = doc.splitlines()[0]
except Exception:
cython_signature = ''
desc = '.'.join(filter(None, [module, klass, qualname or name]))
click.echo()
click.echo(click.style(desc, bold=True, fg='yellow'))
if cython_signature:
qualname_with_signature = '.'.join([module, cython_signature])
click.echo(
click.style(
'-> {}'.format(qualname_with_signature),
fg='yellow'
)
)
for error in errors:
number_of_violations += 1
click.echo('{}: {}'.format(*error))
msg = 'Total number of docstring violations: {}'.format(
number_of_violations
)
click.echo()
click.echo(click.style(msg, fg='red'))
yield LintResult(success=False)
def rat_linter(src, root):
"""Run apache-rat license linter."""
logger.info("Running apache-rat linter")
exclusion = exclusion_from_globs(
os.path.join(src.dev, "release", "rat_exclude_files.txt"))
# Creates a git-archive of ArrowSources, apache-rat expects a gzip
# compressed tar archive.
archive_path = os.path.join(root, "apache-arrow.tar.gz")
src.archive(archive_path, compressor=gzip.compress)
report = Rat().report(archive_path)
violations = list(report.validate(exclusion=exclusion))
for violation in violations:
print("apache-rat license violation: {}".format(violation))
yield LintResult(len(violations) == 0)
def r_linter(src):
"""Run R linter."""
logger.info("Running R linter")
r_lint_sh = os.path.join(src.r, "lint.sh")
yield LintResult.from_cmd(Bash().run(r_lint_sh, check=False))
def rust_linter(src):
"""Run Rust linter."""
logger.info("Running Rust linter")
cargo = Cargo()
if not cargo.available:
logger.error("Rust linter requested but cargo executable not found.")
return
yield LintResult.from_cmd(cargo.run("+stable", "fmt", "--all", "--",
"--check", cwd=src.rust,
check=False))
class Hadolint(Command):
def __init__(self, hadolint_bin=None):
self.bin = default_bin(hadolint_bin, "hadolint")
def is_docker_image(path):
dirname = os.path.dirname(path)
filename = os.path.basename(path)
excluded = dirname.startswith(
"dev") or dirname.startswith("python/manylinux")
return filename.startswith("Dockerfile") and not excluded
def docker_linter(src):
"""Run Hadolint docker linter."""
logger.info("Running Docker linter")
hadolint = Hadolint()
if not hadolint.available:
logger.error(
"hadolint linter requested but hadolint binary not found.")
return
for path in git.ls_files(git_dir=src.path):
if is_docker_image(path):
yield LintResult.from_cmd(hadolint.run(path, check=False,
cwd=src.path))
def linter(src, fix=False, *, clang_format=False, cpplint=False,
clang_tidy=False, iwyu=False, iwyu_all=False,
python=False, numpydoc=False, cmake_format=False, rat=False,
r=False, rust=False, docker=False):
"""Run all linters."""
with tmpdir(prefix="arrow-lint-") as root:
build_dir = os.path.join(root, "cpp-build")
# Linters yield LintResult without raising exceptions on failure.
# This allows running all linters in one pass and exposing all
# errors to the user.
results = []
if clang_format or cpplint or clang_tidy or iwyu:
results.extend(cpp_linter(src, build_dir,
clang_format=clang_format,
cpplint=cpplint,
clang_tidy=clang_tidy,
iwyu=iwyu,
iwyu_all=iwyu_all,
fix=fix))
if python:
results.extend(python_linter(src, fix=fix))
if numpydoc:
results.extend(python_numpydoc())
if cmake_format:
results.extend(cmake_linter(src, fix=fix))
if rat:
results.extend(rat_linter(src, root))
if r:
results.extend(r_linter(src))
if rust:
results.extend(rust_linter(src))
if docker:
results.extend(docker_linter(src))
# Raise error if one linter failed, ensuring calling code can exit with
# non-zero.
for result in results:
result.ok()