#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

"""
DownloadableFileSource - Abstract class for sources downloaded from a URI
=========================================================================

This DownloadableFileSource class is a convenience class on can derive for
implementing sources that get downloaded from a URI.

It provides utilities around handling mirrors, tracking and fetching the source.

Any derived classes must write their own stage() and get_unique_key()
implementation.


SourceMirror extra data "http-auth"
--------------------------------------------
The DownloadableFileSource, and consequently any :class:`Source <buildstream.source.Source>`
implementations which derive from DownloadableFileSource, support the "http-auth"
extra data returned by :class:`SourceMirror <buildstream.sourcemirror.SourceMirror>` plugins
through :func:`Source.translate_url() <buildstream.source.Source.translate_url>`.

This functionality is available **Since: 2.2**.

This allows one to use :class:`SourceMirror <buildstream.sourcemirror.SourceMirror>` plugins
to add an authorization header to the ``GET`` requests.


**Example:**

.. code:: python

   class MySourceMirror(SourceMirror):

        def translate_url(
            self,
            *,
            alias: str,
            alias_url: str,
            source_url: str,
            extra_data: Optional[Dict[str, Any]],
        ) -> str:

            #
            # Set the "http-auth" extra data
            #
            if extra_data is not None:
                extra_data["http-auth"] = "bearer"

            # ...

Only the "http-auth" value ``bearer`` is supported.


**Example:**

If the URL reported by :func:`SourceMirror.translate_url() <buildstream.sourcemirror.SourceMirror.translate_url>`
is ``http://flying-ponies.com/downloads/pony.tgz``, then a corresponding entry will be expected in the
user's ``~/.netrc``:

.. code::

   flying-ponies.com
       password 1234

DownloadableFileSource will add the following header to the ``GET`` request to download the file:

.. code::

   Authorization: Bearer 1234

"""


import os
import urllib.request
import urllib.error
import contextlib
import shutil
import netrc

from .source import Source, SourceError
from . import utils


class _NetrcFTPOpener(urllib.request.FTPHandler):
    def __init__(self, netrc_config):
        self.netrc = netrc_config

    def _unsplit(self, host, port, user, passwd):
        if port:
            host = "{}:{}".format(host, port)
        if user:
            if passwd:
                user = "{}:{}".format(user, passwd)
            host = "{}@{}".format(user, host)

        return host

    def ftp_open(self, req):
        uri = urllib.parse.urlparse(req.full_url)

        username = uri.username
        password = uri.password

        if uri.username is None and self.netrc:
            entry = self.netrc.authenticators(uri.hostname)
            if entry:
                username, _, password = entry

        req.host = self._unsplit(uri.hostname, uri.port, username, password)

        return super().ftp_open(req)


class _NetrcPasswordManager:
    def __init__(self, netrc_config):
        self.netrc = netrc_config

    def add_password(self, realm, uri, user, passwd):
        pass

    def find_user_password(self, realm, authuri):
        if not self.netrc:
            return None, None
        parts = urllib.parse.urlsplit(authuri)
        entry = self.netrc.authenticators(parts.hostname)
        if not entry:
            return None, None
        else:
            login, _, password = entry
            return login, password


def _download_file(opener_creator, url, etag, directory, bearer_auth):
    opener = opener_creator.get_url_opener(bearer_auth)
    default_name = os.path.basename(url)
    request = urllib.request.Request(url)
    request.add_header("Accept", "*/*")
    request.add_header("User-Agent", "BuildStream/2")

    if opener_creator.netrc_config and bearer_auth:
        parts = urllib.parse.urlsplit(url)
        entry = opener_creator.netrc_config.authenticators(parts.hostname)
        if entry:
            _, _, password = entry
            auth_header = "Bearer " + password
            request.add_header("Authorization", auth_header)

    if etag is not None:
        request.add_header("If-None-Match", etag)

    try:
        with contextlib.closing(opener.open(request)) as response:
            info = response.info()

            # some servers don't honor the 'If-None-Match' header
            if etag and info["ETag"] == etag:
                return None, None, None

            etag = info["ETag"]
            length = info.get("Content-Length")

            filename = info.get_filename(default_name)
            filename = os.path.basename(filename)
            local_file = os.path.join(directory, filename)
            with open(local_file, "wb") as dest:
                shutil.copyfileobj(response, dest)

                actual_length = dest.tell()
                if length and actual_length < int(length):
                    raise ValueError(f"Partial file {actual_length}/{length}")

    except urllib.error.HTTPError as e:
        if e.code == 304:
            # 304 Not Modified.
            # Because we use etag only for matching ref, currently specified ref is what
            # we would have downloaded.
            return None, None, None

        return None, None, str(e)
    except (urllib.error.URLError, OSError, ValueError) as e:
        # Note that urllib.request.Request in the try block may throw a
        # ValueError for unknown url types, so we handle it here.
        return None, None, str(e)

    return local_file, etag, None


class DownloadableFileSource(Source):
    # pylint: disable=attribute-defined-outside-init

    COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ["url", "ref"]

    __default_mirror_file = None

    def configure(self, node):
        self.original_url = node.get_str("url")
        self.ref = node.get_str("ref", None)

        extra_data = {}
        self.url = self.translate_url(self.original_url, extra_data=extra_data)
        self.bearer_auth = extra_data.get("http-auth") == "bearer"

        self._mirror_dir = os.path.join(self.get_mirror_directory(), utils.url_directory_name(self.original_url))

    def preflight(self):
        return

    def get_unique_key(self):
        return [self.original_url, self.ref]

    def is_cached(self) -> bool:
        return os.path.isfile(self._get_mirror_file())

    def load_ref(self, node):
        self.ref = node.get_str("ref", None)

    def get_ref(self):
        return self.ref

    def set_ref(self, ref, node):
        node["ref"] = self.ref = ref

    def track(self):  # pylint: disable=arguments-differ
        # there is no 'track' field in the source to determine what/whether
        # or not to update refs, because tracking a ref is always a conscious
        # decision by the user.
        new_ref = self._ensure_mirror("Tracking {}".format(self.url))

        if self.ref and self.ref != new_ref:
            detail = (
                "When tracking, new ref differs from current ref:\n"
                + "  Tracked URL: {}\n".format(self.url)
                + "  Current ref: {}\n".format(self.ref)
                + "  New ref: {}\n".format(new_ref)
            )
            self.warn("Potential man-in-the-middle attack!", detail=detail)

        return new_ref

    def fetch(self):  # pylint: disable=arguments-differ

        # Just a defensive check, it is impossible for the
        # file to be already cached because Source.fetch() will
        # not be called if the source is already cached.
        #
        if os.path.isfile(self._get_mirror_file()):
            return  # pragma: nocover

        # Download the file, raise hell if the sha256sums don't match,
        # and mirror the file otherwise.
        sha256 = self._ensure_mirror(
            "Fetching {}".format(self.url),
        )
        if sha256 != self.ref:
            raise SourceError(
                "File downloaded from {} has sha256sum '{}', not '{}'!".format(self.url, sha256, self.ref)
            )

    def _get_etag(self, ref):
        etagfilename = os.path.join(self._mirror_dir, "{}.etag".format(ref))
        if os.path.exists(etagfilename):
            with open(etagfilename, "r", encoding="utf-8") as etagfile:
                return etagfile.read()

        return None

    def _store_etag(self, ref, etag):
        etagfilename = os.path.join(self._mirror_dir, "{}.etag".format(ref))
        with utils.save_file_atomic(etagfilename) as etagfile:
            etagfile.write(etag)

    def _ensure_mirror(self, activity_name: str):
        # Downloads from the url and caches it according to its sha256sum.
        with self.tempdir() as td:
            # We do not use etag in case what we have in cache is
            # not matching ref in order to be able to recover from
            # corrupted download.
            if self.ref and self.is_cached():
                # Do not re-download the file if the ETag matches.
                etag = self._get_etag(self.ref)
            else:
                etag = None

            url_opener_creator = _UrlOpenerCreator(self._parse_netrc())

            local_file, new_etag, error = self.blocking_activity(
                _download_file, (url_opener_creator, self.url, etag, td, self.bearer_auth), activity_name
            )

            if error:
                raise SourceError("{}: Error mirroring {}: {}".format(self, self.url, error), temporary=True)

            if local_file is None:
                return self.ref

            # Make sure url-specific mirror dir exists.
            try:
                os.makedirs(self._mirror_dir, exist_ok=True)
            except FileExistsError as e:
                raise SourceError(
                    "{}: Mirror directory exists but is not a directory: {}".format(self, self._mirror_dir)
                ) from e

            # Store by sha256sum
            sha256 = utils.sha256sum(local_file)
            # Even if the file already exists, move the new file over.
            # In case the old file was corrupted somehow.
            os.rename(local_file, self._get_mirror_file(sha256))

            if new_etag:
                self._store_etag(sha256, new_etag)
            return sha256

    def _parse_netrc(self):
        netrc_config = None
        try:
            netrc_config = netrc.netrc()
        except OSError:
            # If the .netrc file was not found, FileNotFoundError will be
            # raised, but OSError will be raised directly by the netrc package
            # in the case that $HOME is not set.
            #
            # This will catch both cases.
            pass
        except netrc.NetrcParseError as e:
            self.warn("{}: While reading .netrc: {}".format(self, e))
        return netrc_config

    def _get_mirror_file(self, sha=None):
        if sha is not None:
            return os.path.join(self._mirror_dir, sha)

        if self.__default_mirror_file is None:
            self.__default_mirror_file = os.path.join(self._mirror_dir, self.ref)

        return self.__default_mirror_file


class _UrlOpenerCreator:
    def __init__(self, netrc_config):
        self.netrc_config = netrc_config

    def get_url_opener(self, bearer_auth):
        if self.netrc_config and not bearer_auth:
            netrc_pw_mgr = _NetrcPasswordManager(self.netrc_config)
            http_auth = urllib.request.HTTPBasicAuthHandler(netrc_pw_mgr)
            ftp_handler = _NetrcFTPOpener(self.netrc_config)
            return urllib.request.build_opener(http_auth, ftp_handler)
        return urllib.request.build_opener()
