blob: 783382e8fd8dfeeeca3acdcec0411478dd610bec [file] [log] [blame]
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import contextlib
import hashlib
import os
import shutil
import uuid
from abc import abstractmethod
from email import message_from_string
from .common import safe_mkdtemp, safe_open
from .compatibility import PY3, AbstractClass
from .tracer import TRACER
from .variables import ENV
from .version import __version__ as PEX_VERSION
try:
import requests
except ImportError:
requests = None
try:
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
# Newer versions of CacheControl move the importing of `lockfile` to
# class instansiation, and raise an ImportError in the __init__ method.
#
# https://github.com/ionrock/cachecontrol/commit/a561ac9c1ef15db9bc6ead5d08443e6198cc69e1
FileCache(directory=None) # noqa
except ImportError:
CacheControl = FileCache = None
if PY3:
import urllib.request as urllib_request
else:
import urllib2 as urllib_request
try:
# The hashlib.algorithms_guaranteed function is available in >=2.7.9 and >=3.2.
HASHLIB_ALGORITHMS = frozenset(hashlib.algorithms_guaranteed)
except AttributeError:
# And the hashlib.algorithms function covers the rest of the 2.7 versions we support.
HASHLIB_ALGORITHMS = frozenset(hashlib.algorithms)
class Context(AbstractClass):
"""Encapsulate the networking necessary to do requirement resolution.
At a minimum, the Context must implement ``open(link)`` by returning a
file-like object. Reference implementations of ``read(link)`` and
``fetch(link)`` are provided based upon ``open(link)`` but may be further
specialized by individual implementations.
"""
DEFAULT_ENCODING = 'iso-8859-1'
class Error(Exception):
"""Error base class for Contexts to wrap application-specific exceptions."""
pass
_REGISTRY = []
@classmethod
def register(cls, context_impl):
"""Register a concrete implementation of a :class:`Context` to be recognized."""
cls._REGISTRY.insert(0, context_impl)
@classmethod
def get(cls):
for context_class in cls._REGISTRY:
try:
context = context_class()
TRACER.log('Constructed %s context %r' % (context.__class__.__name__, context), V=4)
return context
except cls.Error:
continue
raise cls.Error('Could not initialize a request context.')
@abstractmethod
def open(self, link):
"""Return an open file-like object to the link.
:param link: The :class:`Link` to open.
"""
def read(self, link):
"""Return the binary content associated with the link.
:param link: The :class:`Link` to read.
"""
with contextlib.closing(self.open(link)) as fp:
return fp.read()
def content(self, link):
"""Return the encoded content associated with the link.
:param link: The :class:`Link` to read.
"""
def fetch(self, link, into=None):
"""Fetch the binary content associated with the link and write to a file.
:param link: The :class:`Link` to fetch.
:keyword into: If specified, write into the directory ``into``. If ``None``, creates a new
temporary directory that persists for the duration of the interpreter.
"""
target = os.path.join(into or safe_mkdtemp(), link.filename)
if os.path.exists(target):
# Assume that if the local file already exists, it is safe to use.
return target
with TRACER.timed('Fetching %s' % link.url, V=2):
target_tmp = '%s.%s' % (target, uuid.uuid4())
with contextlib.closing(self.open(link)) as in_fp:
with safe_open(target_tmp, 'wb') as out_fp:
shutil.copyfileobj(in_fp, out_fp)
os.rename(target_tmp, target)
return target
def resolve(self, link):
"""Resolves final link throught all the redirections.
:param link: The :class:`Link` to open.
"""
return link
class UrllibContext(Context):
"""Default Python standard library Context."""
def open(self, link):
return urllib_request.urlopen(link.url)
def content(self, link):
if link.local:
raise self.Error('Context.content only works with remote URLs.')
with contextlib.closing(self.open(link)) as fp:
encoding = message_from_string(str(fp.headers)).get_content_charset(self.DEFAULT_ENCODING)
return fp.read().decode(encoding, 'replace')
def resolve(self, link):
request = urllib_request.Request(link.url)
request.get_method = lambda: 'HEAD'
with contextlib.closing(urllib_request.urlopen(request)) as response:
return link.wrap(response.url)
def __init__(self, *args, **kw):
TRACER.log('Warning, using a UrllibContext which is known to be flaky.')
TRACER.log('Please build pex with the requests module for more reliable downloads.')
super(UrllibContext, self).__init__(*args, **kw)
Context.register(UrllibContext)
class StreamFilelike(object):
"""A file-like object wrapper around requests streams that performs hash validation."""
@classmethod
def detect_algorithm(cls, link):
"""Detect the hashing algorithm from the fragment in the link, if any."""
if any(link.fragment.startswith('%s=' % algorithm) for algorithm in HASHLIB_ALGORITHMS):
algorithm, value = link.fragment.split('=', 2)
try:
return hashlib.new(algorithm), value
except ValueError: # unsupported algorithm
return None, None
return None, None
def __init__(self, request, link, chunk_size=16384):
self._iterator = request.iter_content(chunk_size)
self.encoding = request.encoding
self._bytes = b''
self._link = link
self._hasher, self._hash_value = self.detect_algorithm(link)
def read(self, length=None):
while length is None or len(self._bytes) < length:
try:
next_chunk = next(self._iterator)
if self._hasher:
self._hasher.update(next_chunk)
self._bytes += next_chunk
except StopIteration:
self._validate()
break
if length is None:
length = len(self._bytes)
chunk, self._bytes = self._bytes[:length], self._bytes[length:]
return chunk
def _validate(self):
if self._hasher:
if self._hash_value != self._hasher.hexdigest():
raise Context.Error('%s failed checksum!' % (self._link.url))
else:
TRACER.log('Validated %s (%s)' % (self._link.filename, self._link.fragment), V=3)
def close(self):
pass
class RequestsContext(Context):
"""A requests-based Context."""
USER_AGENT = 'pex/%s' % PEX_VERSION
@staticmethod
def _create_session(max_retries):
session = requests.session()
retrying_adapter = requests.adapters.HTTPAdapter(max_retries=max_retries)
session.mount('http://', retrying_adapter)
session.mount('https://', retrying_adapter)
return session
def __init__(self, session=None, verify=True, env=ENV):
if requests is None:
raise RuntimeError('requests is not available. Cannot use a RequestsContext.')
self._verify = verify
max_retries = env.PEX_HTTP_RETRIES
if max_retries < 0:
raise ValueError('max_retries may not be negative.')
self._max_retries = max_retries
self._session = session or self._create_session(self._max_retries)
def open(self, link):
# requests does not support file:// -- so we must short-circuit manually
if link.local:
return open(link.local_path, 'rb') # noqa: T802
for attempt in range(self._max_retries + 1):
try:
return StreamFilelike(self._session.get(
link.url, verify=self._verify, stream=True, headers={'User-Agent': self.USER_AGENT}),
link)
except requests.exceptions.ReadTimeout:
# Connect timeouts are handled by the HTTPAdapter, unfortunately read timeouts are not
# so we'll retry them ourselves.
TRACER.log('Read timeout trying to fetch %s, retrying. %d retries remain.' % (
link.url,
self._max_retries - attempt))
except requests.exceptions.RequestException as e:
raise self.Error(e)
raise self.Error(
requests.packages.urllib3.exceptions.MaxRetryError(
None,
link,
'Exceeded max retries of %d' % self._max_retries))
def content(self, link):
if link.local:
raise self.Error('Context.content only works with remote URLs.')
with contextlib.closing(self.open(link)) as request:
return request.read().decode(request.encoding or self.DEFAULT_ENCODING, 'replace')
def resolve(self, link):
return link.wrap(self._session.head(
link.url, verify=self._verify, allow_redirects=True,
headers={'User-Agent': self.USER_AGENT}
).url)
if requests:
Context.register(RequestsContext)
class CachedRequestsContext(RequestsContext):
"""A requests-based Context with CacheControl support."""
DEFAULT_CACHE = os.path.join(ENV.PEX_ROOT, 'cache')
def __init__(self, cache=None, **kw):
self._cache = os.path.realpath(os.path.expanduser(cache or self.DEFAULT_CACHE))
super(CachedRequestsContext, self).__init__(
CacheControl(requests.session(), cache=FileCache(self._cache)), **kw)
if CacheControl:
Context.register(CachedRequestsContext)