# -*-python-*-
#
# Copyright (C) 1999-2023 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# accept.py: parse/handle the various Accept headers from the client
#
# -----------------------------------------------------------------------

import re


def language(hdr):
    "Parse an Accept-Language header."

    # parse the header, storing results in a _LanguageSelector object
    return _parse(hdr, _LanguageSelector())


_re_token = re.compile(r'\s*([^\s;,"]+|"[^"]*")+\s*')
_re_param = re.compile(r';\s*([^;,"]+|"[^"]*")+\s*')
_re_split_param = re.compile(r"([^\s=])\s*=\s*(.*)")


def _parse(hdr, result):
    # quick exit for empty or not-supplied header
    if not hdr:
        return result

    pos = 0
    while pos < len(hdr):
        name = _re_token.match(hdr, pos)
        if not name:
            raise AcceptLanguageParseError()
        a = result.item_class(name.group(1).lower())
        pos = name.end()
        while 1:
            # are we looking at a parameter?
            match = _re_param.match(hdr, pos)
            if not match:
                break
            param = match.group(1)
            pos = match.end()

            # split up the pieces of the parameter
            match = _re_split_param.match(param)
            if not match:
                # the "=" was probably missing
                continue

            pname = match.group(1).lower()
            if pname == "q" or pname == "qs":
                try:
                    a.quality = float(match.group(2))
                except ValueError:
                    # bad float literal
                    pass
            elif pname == "level":
                try:
                    a.level = float(match.group(2))
                except ValueError:
                    # bad float literal
                    pass
            elif pname == "charset":
                a.charset = match.group(2).lower()

        result.append(a)
        if hdr[pos : (pos + 1)] == ",":
            pos = pos + 1

    return result


class _AcceptItem:
    def __init__(self, name):
        self.name = name
        self.quality = 1.0
        self.level = 0.0
        self.charset = ""

    def __str__(self):
        s = self.name
        if self.quality != 1.0:
            s = "%s;q=%.3f" % (s, self.quality)
        if self.level != 0.0:
            s = "%s;level=%.3f" % (s, self.level)
        if self.charset:
            s = "%s;charset=%s" % (s, self.charset)
        return s


class _LanguageRange(_AcceptItem):
    def matches(self, tag):
        "Match the tag against self. Returns the qvalue, or None if non-matching."
        if tag == self.name:
            return self.quality

        # are we a prefix of the available language-tag
        name = self.name + "-"
        if tag[: len(name)] == name:
            return self.quality
        return None


class _LanguageSelector:
    """Instances select an available language based on the user's request.

    Languages found in the user's request are added to this object with the
    append() method (they should be instances of _LanguageRange). After the
    languages have been added, then the caller can use select_from() to
    determine which user-request language(s) best matches the set of
    available languages.

    Strictly speaking, this class is pretty close for more than just
    language matching. It has been implemented to enable q-value based
    matching between requests and availability. Some minor tweaks may be
    necessary, but simply using a new 'item_class' should be sufficient
    to allow the _parse() function to construct a selector which holds
    the appropriate item implementations (e.g. _LanguageRange is the
    concrete _AcceptItem class that handles matching of language tags).
    """

    item_class = _LanguageRange

    def __init__(self):
        self.requested = []

    def select_from(self, avail):
        """Select one of the available choices based on the request.

        Note: if there isn't a match, then the first available choice is
        considered the default. Also, if a number of matches are equally
        relevant, then the first-requested will be used.

        avail is a list of language-tag strings of available languages
        """

        # tuples of (qvalue, language-tag)
        matches = []

        # try matching all pairs of desired vs available, recording the
        # resulting qvalues. we also need to record the longest language-range
        # that matches since the most specific range "wins"
        for tag in avail:
            longest = 0
            final = 0.0

            # check this tag against the requests from the user
            for want in self.requested:
                qvalue = want.matches(tag)
                if qvalue is not None and len(want.name) > longest:
                    # we have a match and it is longer than any we may have had.
                    # the final qvalue should be from this tag.
                    final = qvalue
                    longest = len(want.name)

            # a non-zero qvalue is a potential match
            if final:
                matches.append((final, tag))

        # if there are no matches, then return the default language tag
        if not matches:
            return avail[0]

        # get the highest qvalue and its corresponding tag
        matches.sort()
        qvalue, tag = matches[-1]

        # if the qvalue is zero, then we have no valid matches. return the
        # default language tag.
        if not qvalue:
            return avail[0]

        # if there are two or more matches, and the second-highest has a
        # qvalue equal to the best, then we have multiple "best" options.
        # select the one that occurs first in self.requested
        if len(matches) >= 2 and matches[-2][0] == qvalue:
            # remove non-best matches
            while matches[0][0] != qvalue:
                del matches[0]

            # sequence through self.requested, in order
            for want in self.requested:
                # try to find this one in our best matches
                for qvalue, tag in matches:
                    if want.matches(tag):
                        # this requested item is one of the "best" options
                        #
                        # NOTE: this request item could match *other* "best" options,
                        # so returning *this* one is rather non-deterministic.
                        # theoretically, we could go further here, and do another
                        # search based on the ordering in 'avail'. however, note
                        # that this generally means that we are picking from multiple
                        # *SUB* languages, so I'm all right with the non-determinism
                        # at this point. stupid client should send a qvalue if they
                        # want to refine.
                        return tag

            # NOTREACHED

        # return the best match
        return tag

    def append(self, item):
        self.requested.append(item)


class AcceptLanguageParseError(Exception):
    pass


def _test():
    s = language("en")
    assert s.select_from(["en"]) == "en"
    assert s.select_from(["en", "de"]) == "en"
    assert s.select_from(["de", "en"]) == "en"

    # Netscape 4.x and early version of Mozilla may not send a q value
    s = language("en, ja")
    assert s.select_from(["en", "ja"]) == "en"

    s = language("fr, de;q=0.9, en-gb;q=0.7, en;q=0.6, en-gb-foo;q=0.8")
    assert s.select_from(["en"]) == "en"
    assert s.select_from(["en-gb-foo"]) == "en-gb-foo"
    assert s.select_from(["de", "fr"]) == "fr"
    assert s.select_from(["de", "en-gb"]) == "de"
    assert s.select_from(["en-gb", "en-gb-foo"]) == "en-gb-foo"
    assert s.select_from(["en-bar"]) == "en-bar"
    assert s.select_from(["en-gb-bar", "en-gb-foo"]) == "en-gb-foo"
