| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Miscellaneous utility code |
| |
| import os |
| import contextlib |
| import functools |
| import gc |
| import socket |
| import sys |
| import textwrap |
| import types |
| import warnings |
| |
| |
| _DEPR_MSG = ( |
| "pyarrow.{} is deprecated as of {}, please use pyarrow.{} instead." |
| ) |
| |
| |
| def doc(*docstrings, **params): |
| """ |
| A decorator that takes docstring templates, concatenates them, and finally |
| performs string substitution on them. |
| This decorator will add a variable "_docstring_components" to the wrapped |
| callable to keep track of the original docstring template for potential future use. |
| If the docstring is a template, it will be saved as a string. |
| Otherwise, it will be saved as a callable and the docstring will be obtained via |
| the __doc__ attribute. |
| This decorator cannot be used on Cython classes due to a CPython constraint, |
| which enforces the __doc__ attribute to be read-only. |
| See https://github.com/python/cpython/issues/91309 |
| |
| Parameters |
| ---------- |
| *docstrings : None, str, or callable |
| The string / docstring / docstring template to be prepended in order |
| before the default docstring under the callable. |
| **params |
| The key/value pairs used to format the docstring template. |
| """ |
| |
| def decorator(decorated): |
| docstring_components = [] |
| |
| # collect docstrings and docstring templates |
| for docstring in docstrings: |
| if docstring is None: |
| continue |
| if hasattr(docstring, "_docstring_components"): |
| docstring_components.extend( |
| docstring._docstring_components |
| ) |
| elif isinstance(docstring, str) or docstring.__doc__: |
| docstring_components.append(docstring) |
| |
| # append the callable's docstring last |
| if decorated.__doc__: |
| docstring_components.append(textwrap.dedent(decorated.__doc__)) |
| |
| params_applied = [ |
| component.format(**params) |
| if isinstance(component, str) and len(params) > 0 |
| else component |
| for component in docstring_components |
| ] |
| |
| decorated.__doc__ = "".join( |
| [ |
| component |
| if isinstance(component, str) |
| else textwrap.dedent(component.__doc__ or "") |
| for component in params_applied |
| ] |
| ) |
| |
| decorated._docstring_components = ( |
| docstring_components |
| ) |
| return decorated |
| |
| return decorator |
| |
| |
| def _deprecate_api(old_name, new_name, api, next_version, type=FutureWarning): |
| msg = _DEPR_MSG.format(old_name, next_version, new_name) |
| |
| def wrapper(*args, **kwargs): |
| warnings.warn(msg, type) |
| return api(*args, **kwargs) |
| return wrapper |
| |
| |
| def _deprecate_class(old_name, new_class, next_version, |
| instancecheck=True): |
| """ |
| Raise warning if a deprecated class is used in an isinstance check. |
| """ |
| class _DeprecatedMeta(type): |
| def __instancecheck__(self, other): |
| warnings.warn( |
| _DEPR_MSG.format(old_name, next_version, new_class.__name__), |
| FutureWarning, |
| stacklevel=2 |
| ) |
| return isinstance(other, new_class) |
| |
| return _DeprecatedMeta(old_name, (new_class,), {}) |
| |
| |
| def _is_iterable(obj): |
| try: |
| iter(obj) |
| return True |
| except TypeError: |
| return False |
| |
| |
| def _is_path_like(path): |
| return isinstance(path, str) or hasattr(path, '__fspath__') |
| |
| |
| def _stringify_path(path): |
| """ |
| Convert *path* to a string or unicode path if possible. |
| """ |
| if isinstance(path, str): |
| return os.path.expanduser(path) |
| |
| # checking whether path implements the filesystem protocol |
| try: |
| return os.path.expanduser(path.__fspath__()) |
| except AttributeError: |
| pass |
| |
| raise TypeError("not a path-like object") |
| |
| |
| def product(seq): |
| """ |
| Return a product of sequence items. |
| """ |
| return functools.reduce(lambda a, b: a*b, seq, 1) |
| |
| |
| def get_contiguous_span(shape, strides, itemsize): |
| """ |
| Return a contiguous span of N-D array data. |
| |
| Parameters |
| ---------- |
| shape : tuple |
| strides : tuple |
| itemsize : int |
| Specify array shape data |
| |
| Returns |
| ------- |
| start, end : int |
| The span end points. |
| """ |
| if not strides: |
| start = 0 |
| end = itemsize * product(shape) |
| else: |
| start = 0 |
| end = itemsize |
| for i, dim in enumerate(shape): |
| if dim == 0: |
| start = end = 0 |
| break |
| stride = strides[i] |
| if stride > 0: |
| end += stride * (dim - 1) |
| elif stride < 0: |
| start += stride * (dim - 1) |
| if end - start != itemsize * product(shape): |
| raise ValueError('array data is non-contiguous') |
| return start, end |
| |
| |
| def find_free_port(): |
| sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| with contextlib.closing(sock) as sock: |
| sock.bind(('', 0)) |
| sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) |
| return sock.getsockname()[1] |
| |
| |
| def guid(): |
| from uuid import uuid4 |
| return uuid4().hex |
| |
| |
| def _break_traceback_cycle_from_frame(frame): |
| # Clear local variables in all inner frames, so as to break the |
| # reference cycle. |
| this_frame = sys._getframe(0) |
| refs = gc.get_referrers(frame) |
| while refs: |
| for frame in refs: |
| if frame is not this_frame and isinstance(frame, types.FrameType): |
| break |
| else: |
| # No frame found in referrers (finished?) |
| break |
| refs = None |
| # Clear the frame locals, to try and break the cycle (it is |
| # somewhere along the chain of execution frames). |
| frame.clear() |
| # To visit the inner frame, we need to find it among the |
| # referrers of this frame (while `frame.f_back` would let |
| # us visit the outer frame). |
| refs = gc.get_referrers(frame) |
| refs = frame = this_frame = None |
| |
| |
| def download_tzdata_on_windows(): |
| r""" |
| Download and extract latest IANA timezone database into the |
| location expected by Arrow which is %USERPROFILE%\Downloads\tzdata. |
| """ |
| if sys.platform != 'win32': |
| raise TypeError(f"Timezone database is already provided by {sys.platform}") |
| |
| import tarfile |
| |
| tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") |
| tzdata_compressed = os.path.join(tzdata_path, "tzdata.tar.gz") |
| os.makedirs(tzdata_path, exist_ok=True) |
| |
| from urllib.request import urlopen |
| with urlopen('https://data.iana.org/time-zones/tzdata-latest.tar.gz') as response: |
| with open(tzdata_compressed, 'wb') as f: |
| f.write(response.read()) |
| |
| assert os.path.exists(tzdata_compressed) |
| |
| tarfile.open(tzdata_compressed).extractall(tzdata_path) |
| |
| with urlopen('https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml') as response_zones: # noqa |
| with open(os.path.join(tzdata_path, "windowsZones.xml"), 'wb') as f: |
| f.write(response_zones.read()) |