dev/archery/archery/lang/python.py - arrow-experimental-rs-arrow2 - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import inspect
 import tokenize
 from contextlib import contextmanager

 try:
     from numpydoc.validate import Docstring, validate
 except ImportError:
     have_numpydoc = False
 else:
     have_numpydoc = True

 from ..utils.command import Command, capture_stdout, default_bin


 class Flake8(Command):
     def __init__(self, flake8_bin=None):
         self.bin = default_bin(flake8_bin, "flake8")


 class Autopep8(Command):
     def __init__(self, autopep8_bin=None):
         self.bin = default_bin(autopep8_bin, "autopep8")

     @capture_stdout()
     def run_captured(self, *args, **kwargs):
         return self.run(*args, **kwargs)


 def _tokenize_signature(s):
     lines = s.encode('ascii').splitlines()
     generator = iter(lines).__next__
     return tokenize.tokenize(generator)


 def _convert_typehint(tokens):
     names = []
     opening_bracket_reached = False
     for token in tokens:
         # omit the tokens before the opening bracket
         if not opening_bracket_reached:
             if token.string == '(':
                 opening_bracket_reached = True
             else:
                 continue

         if token.type == 1:  # type 1 means NAME token
             names.append(token)
         else:
             if len(names) == 1:
                 yield (names[0].type, names[0].string)
             elif len(names) == 2:
                 # two "NAME" tokens follow each other which means a cython
                 # typehint like `bool argument`, so remove the typehint
                 # note that we could convert it to python typehints, but hints
                 # are not supported by _signature_fromstr
                 yield (names[1].type, names[1].string)
             elif len(names) > 2:
                 raise ValueError('More than two NAME tokens follow each other')
             names = []
             yield (token.type, token.string)


 def inspect_signature(obj):
     """
     Custom signature inspection primarily for cython generated callables.

     Cython puts the signatures to the first line of the docstrings, which we
     can reuse to parse the python signature from, but some gymnastics are
     required, like removing the cython typehints.

     It converts the cython signature:
         array(obj, type=None, mask=None, size=None, from_pandas=None,
               bool safe=True, MemoryPool memory_pool=None)
     To:
         <Signature (obj, type=None, mask=None, size=None, from_pandas=None,
                     safe=True, memory_pool=None)>
     """
     cython_signature = obj.__doc__.splitlines()[0]
     cython_tokens = _tokenize_signature(cython_signature)
     python_tokens = _convert_typehint(cython_tokens)
     python_signature = tokenize.untokenize(python_tokens)
     return inspect._signature_fromstr(inspect.Signature, obj, python_signature)


 class NumpyDoc:

     def __init__(self, symbols=None):
         if not have_numpydoc:
             raise RuntimeError(
                 'Numpydoc is not available, install the development version '
                 'with command: pip install '
                 'git+https://github.com/numpy/numpydoc'
             )
         self.symbols = set(symbols or {'pyarrow'})

     def traverse(self, fn, obj, from_package):
         """Apply a function on publicly exposed API components.

         Recursively iterates over the members of the passed object. It omits
         any '_' prefixed and thirdparty (non pyarrow) symbols.

         Parameters
         ----------
         obj : Any
         from_package : string, default 'pyarrow'
             Predicate to only consider objects from this package.
         """
         todo = [obj]
         seen = set()

         while todo:
             obj = todo.pop()
             if obj in seen:
                 continue
             else:
                 seen.add(obj)

             fn(obj)

             for name in dir(obj):
                 if name.startswith('_'):
                     continue

                 member = getattr(obj, name)
                 module = getattr(member, '__module__', None)
                 if not (module and module.startswith(from_package)):
                     continue

                 todo.append(member)

     @contextmanager
     def _apply_patches(self):
         """
         Patch Docstring class to bypass loading already loaded python objects.
         """
         orig_load_obj = Docstring._load_obj
         orig_signature = inspect.signature

         @staticmethod
         def _load_obj(obj):
             # By default it expects a qualname and import the object, but we
             # have already loaded object after the API traversal.
             if isinstance(obj, str):
                 return orig_load_obj(obj)
             else:
                 return obj

         def signature(obj):
             # inspect.signature tries to parse __text_signature__ if other
             # properties like __signature__ doesn't exists, but cython
             # doesn't set that property despite that embedsignature cython
             # directive is set. The only way to inspect a cython compiled
             # callable's signature to parse it from __doc__ while
             # embedsignature directive is set during the build phase.
             # So path inspect.signature function to attempt to parse the first
             # line of callable.__doc__ as a signature.
             try:
                 return orig_signature(obj)
             except Exception as orig_error:
                 try:
                     return inspect_signature(obj)
                 except Exception:
                     raise orig_error

         try:
             Docstring._load_obj = _load_obj
             inspect.signature = signature
             yield
         finally:
             Docstring._load_obj = orig_load_obj
             inspect.signature = orig_signature

     def validate(self, from_package='', allow_rules=None,
                  disallow_rules=None):
         results = []

         def callback(obj):
             result = validate(obj)

             errors = []
             for errcode, errmsg in result.get('errors', []):
                 if allow_rules and errcode not in allow_rules:
                     continue
                 if disallow_rules and errcode in disallow_rules:
                     continue
                 errors.append((errcode, errmsg))

             if len(errors):
                 result['errors'] = errors
                 results.append((obj, result))

         with self._apply_patches():
             for symbol in self.symbols:
                 try:
                     obj = Docstring._load_obj(symbol)
                 except (ImportError, AttributeError):
                     print('{} is not available for import'.format(symbol))
                 else:
                     self.traverse(callback, obj, from_package=from_package)

         return results
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import inspect
	import tokenize
	from contextlib import contextmanager

	try:
	from numpydoc.validate import Docstring, validate
	except ImportError:
	have_numpydoc = False
	else:
	have_numpydoc = True

	from ..utils.command import Command, capture_stdout, default_bin


	class Flake8(Command):
	def __init__(self, flake8_bin=None):
	self.bin = default_bin(flake8_bin, "flake8")


	class Autopep8(Command):
	def __init__(self, autopep8_bin=None):
	self.bin = default_bin(autopep8_bin, "autopep8")

	@capture_stdout()
	def run_captured(self, args, *kwargs):
	return self.run(args, *kwargs)


	def _tokenize_signature(s):
	lines = s.encode('ascii').splitlines()
	generator = iter(lines).__next__
	return tokenize.tokenize(generator)


	def _convert_typehint(tokens):
	names = []
	opening_bracket_reached = False
	for token in tokens:
	# omit the tokens before the opening bracket
	if not opening_bracket_reached:
	if token.string == '(':
	opening_bracket_reached = True
	else:
	continue

	if token.type == 1: # type 1 means NAME token
	names.append(token)
	else:
	if len(names) == 1:
	yield (names[0].type, names[0].string)
	elif len(names) == 2:
	# two "NAME" tokens follow each other which means a cython
	# typehint like `bool argument`, so remove the typehint
	# note that we could convert it to python typehints, but hints
	# are not supported by _signature_fromstr
	yield (names[1].type, names[1].string)
	elif len(names) > 2:
	raise ValueError('More than two NAME tokens follow each other')
	names = []
	yield (token.type, token.string)


	def inspect_signature(obj):
	"""
	Custom signature inspection primarily for cython generated callables.

	Cython puts the signatures to the first line of the docstrings, which we
	can reuse to parse the python signature from, but some gymnastics are
	required, like removing the cython typehints.

	It converts the cython signature:
	array(obj, type=None, mask=None, size=None, from_pandas=None,
	bool safe=True, MemoryPool memory_pool=None)
	To:
	<Signature (obj, type=None, mask=None, size=None, from_pandas=None,
	safe=True, memory_pool=None)>
	"""
	cython_signature = obj.__doc__.splitlines()[0]
	cython_tokens = _tokenize_signature(cython_signature)
	python_tokens = _convert_typehint(cython_tokens)
	python_signature = tokenize.untokenize(python_tokens)
	return inspect._signature_fromstr(inspect.Signature, obj, python_signature)


	class NumpyDoc:

	def __init__(self, symbols=None):
	if not have_numpydoc:
	raise RuntimeError(
	'Numpydoc is not available, install the development version '
	'with command: pip install '
	'git+https://github.com/numpy/numpydoc'
	)
	self.symbols = set(symbols or {'pyarrow'})

	def traverse(self, fn, obj, from_package):
	"""Apply a function on publicly exposed API components.

	Recursively iterates over the members of the passed object. It omits
	any '_' prefixed and thirdparty (non pyarrow) symbols.

	Parameters
	----------
	obj : Any
	from_package : string, default 'pyarrow'
	Predicate to only consider objects from this package.
	"""
	todo = [obj]
	seen = set()

	while todo:
	obj = todo.pop()
	if obj in seen:
	continue
	else:
	seen.add(obj)

	fn(obj)

	for name in dir(obj):
	if name.startswith('_'):
	continue

	member = getattr(obj, name)
	module = getattr(member, '__module__', None)
	if not (module and module.startswith(from_package)):
	continue

	todo.append(member)

	@contextmanager
	def _apply_patches(self):
	"""
	Patch Docstring class to bypass loading already loaded python objects.
	"""
	orig_load_obj = Docstring._load_obj
	orig_signature = inspect.signature

	@staticmethod
	def _load_obj(obj):
	# By default it expects a qualname and import the object, but we
	# have already loaded object after the API traversal.
	if isinstance(obj, str):
	return orig_load_obj(obj)
	else:
	return obj

	def signature(obj):
	# inspect.signature tries to parse __text_signature__ if other
	# properties like __signature__ doesn't exists, but cython
	# doesn't set that property despite that embedsignature cython
	# directive is set. The only way to inspect a cython compiled
	# callable's signature to parse it from __doc__ while
	# embedsignature directive is set during the build phase.
	# So path inspect.signature function to attempt to parse the first
	# line of callable.__doc__ as a signature.
	try:
	return orig_signature(obj)
	except Exception as orig_error:
	try:
	return inspect_signature(obj)
	except Exception:
	raise orig_error

	try:
	Docstring._load_obj = _load_obj
	inspect.signature = signature
	yield
	finally:
	Docstring._load_obj = orig_load_obj
	inspect.signature = orig_signature

	def validate(self, from_package='', allow_rules=None,
	disallow_rules=None):
	results = []

	def callback(obj):
	result = validate(obj)

	errors = []
	for errcode, errmsg in result.get('errors', []):
	if allow_rules and errcode not in allow_rules:
	continue
	if disallow_rules and errcode in disallow_rules:
	continue
	errors.append((errcode, errmsg))

	if len(errors):
	result['errors'] = errors
	results.append((obj, result))

	with self._apply_patches():
	for symbol in self.symbols:
	try:
	obj = Docstring._load_obj(symbol)
	except (ImportError, AttributeError):
	print('{} is not available for import'.format(symbol))
	else:
	self.traverse(callback, obj, from_package=from_package)

	return results