| # -*-python-*- |
| # |
| # Copyright (C) 1999-2023 The ViewCVS Group. All Rights Reserved. |
| # |
| # By using this file, you agree to the terms and conditions set forth in |
| # the LICENSE.html file which can be found at the top level of the ViewVC |
| # distribution or at http://viewvc.org/license-1.html. |
| # |
| # For more information, visit http://viewvc.org/ |
| # |
| # ----------------------------------------------------------------------- |
| # |
| # accept.py: parse/handle the various Accept headers from the client |
| # |
| # ----------------------------------------------------------------------- |
| |
| import re |
| |
| |
| def language(hdr): |
| "Parse an Accept-Language header." |
| |
| # parse the header, storing results in a _LanguageSelector object |
| return _parse(hdr, _LanguageSelector()) |
| |
| |
| _re_token = re.compile(r'\s*([^\s;,"]+|"[^"]*")+\s*') |
| _re_param = re.compile(r';\s*([^;,"]+|"[^"]*")+\s*') |
| _re_split_param = re.compile(r"([^\s=])\s*=\s*(.*)") |
| |
| |
| def _parse(hdr, result): |
| # quick exit for empty or not-supplied header |
| if not hdr: |
| return result |
| |
| pos = 0 |
| while pos < len(hdr): |
| name = _re_token.match(hdr, pos) |
| if not name: |
| raise AcceptLanguageParseError() |
| a = result.item_class(name.group(1).lower()) |
| pos = name.end() |
| while 1: |
| # are we looking at a parameter? |
| match = _re_param.match(hdr, pos) |
| if not match: |
| break |
| param = match.group(1) |
| pos = match.end() |
| |
| # split up the pieces of the parameter |
| match = _re_split_param.match(param) |
| if not match: |
| # the "=" was probably missing |
| continue |
| |
| pname = match.group(1).lower() |
| if pname == "q" or pname == "qs": |
| try: |
| a.quality = float(match.group(2)) |
| except ValueError: |
| # bad float literal |
| pass |
| elif pname == "level": |
| try: |
| a.level = float(match.group(2)) |
| except ValueError: |
| # bad float literal |
| pass |
| elif pname == "charset": |
| a.charset = match.group(2).lower() |
| |
| result.append(a) |
| if hdr[pos : (pos + 1)] == ",": |
| pos = pos + 1 |
| |
| return result |
| |
| |
| class _AcceptItem: |
| def __init__(self, name): |
| self.name = name |
| self.quality = 1.0 |
| self.level = 0.0 |
| self.charset = "" |
| |
| def __str__(self): |
| s = self.name |
| if self.quality != 1.0: |
| s = "%s;q=%.3f" % (s, self.quality) |
| if self.level != 0.0: |
| s = "%s;level=%.3f" % (s, self.level) |
| if self.charset: |
| s = "%s;charset=%s" % (s, self.charset) |
| return s |
| |
| |
| class _LanguageRange(_AcceptItem): |
| def matches(self, tag): |
| "Match the tag against self. Returns the qvalue, or None if non-matching." |
| if tag == self.name: |
| return self.quality |
| |
| # are we a prefix of the available language-tag |
| name = self.name + "-" |
| if tag[: len(name)] == name: |
| return self.quality |
| return None |
| |
| |
| class _LanguageSelector: |
| """Instances select an available language based on the user's request. |
| |
| Languages found in the user's request are added to this object with the |
| append() method (they should be instances of _LanguageRange). After the |
| languages have been added, then the caller can use select_from() to |
| determine which user-request language(s) best matches the set of |
| available languages. |
| |
| Strictly speaking, this class is pretty close for more than just |
| language matching. It has been implemented to enable q-value based |
| matching between requests and availability. Some minor tweaks may be |
| necessary, but simply using a new 'item_class' should be sufficient |
| to allow the _parse() function to construct a selector which holds |
| the appropriate item implementations (e.g. _LanguageRange is the |
| concrete _AcceptItem class that handles matching of language tags). |
| """ |
| |
| item_class = _LanguageRange |
| |
| def __init__(self): |
| self.requested = [] |
| |
| def select_from(self, avail): |
| """Select one of the available choices based on the request. |
| |
| Note: if there isn't a match, then the first available choice is |
| considered the default. Also, if a number of matches are equally |
| relevant, then the first-requested will be used. |
| |
| avail is a list of language-tag strings of available languages |
| """ |
| |
| # tuples of (qvalue, language-tag) |
| matches = [] |
| |
| # try matching all pairs of desired vs available, recording the |
| # resulting qvalues. we also need to record the longest language-range |
| # that matches since the most specific range "wins" |
| for tag in avail: |
| longest = 0 |
| final = 0.0 |
| |
| # check this tag against the requests from the user |
| for want in self.requested: |
| qvalue = want.matches(tag) |
| if qvalue is not None and len(want.name) > longest: |
| # we have a match and it is longer than any we may have had. |
| # the final qvalue should be from this tag. |
| final = qvalue |
| longest = len(want.name) |
| |
| # a non-zero qvalue is a potential match |
| if final: |
| matches.append((final, tag)) |
| |
| # if there are no matches, then return the default language tag |
| if not matches: |
| return avail[0] |
| |
| # get the highest qvalue and its corresponding tag |
| matches.sort() |
| qvalue, tag = matches[-1] |
| |
| # if the qvalue is zero, then we have no valid matches. return the |
| # default language tag. |
| if not qvalue: |
| return avail[0] |
| |
| # if there are two or more matches, and the second-highest has a |
| # qvalue equal to the best, then we have multiple "best" options. |
| # select the one that occurs first in self.requested |
| if len(matches) >= 2 and matches[-2][0] == qvalue: |
| # remove non-best matches |
| while matches[0][0] != qvalue: |
| del matches[0] |
| |
| # sequence through self.requested, in order |
| for want in self.requested: |
| # try to find this one in our best matches |
| for qvalue, tag in matches: |
| if want.matches(tag): |
| # this requested item is one of the "best" options |
| # |
| # NOTE: this request item could match *other* "best" options, |
| # so returning *this* one is rather non-deterministic. |
| # theoretically, we could go further here, and do another |
| # search based on the ordering in 'avail'. however, note |
| # that this generally means that we are picking from multiple |
| # *SUB* languages, so I'm all right with the non-determinism |
| # at this point. stupid client should send a qvalue if they |
| # want to refine. |
| return tag |
| |
| # NOTREACHED |
| |
| # return the best match |
| return tag |
| |
| def append(self, item): |
| self.requested.append(item) |
| |
| |
| class AcceptLanguageParseError(Exception): |
| pass |
| |
| |
| def _test(): |
| s = language("en") |
| assert s.select_from(["en"]) == "en" |
| assert s.select_from(["en", "de"]) == "en" |
| assert s.select_from(["de", "en"]) == "en" |
| |
| # Netscape 4.x and early version of Mozilla may not send a q value |
| s = language("en, ja") |
| assert s.select_from(["en", "ja"]) == "en" |
| |
| s = language("fr, de;q=0.9, en-gb;q=0.7, en;q=0.6, en-gb-foo;q=0.8") |
| assert s.select_from(["en"]) == "en" |
| assert s.select_from(["en-gb-foo"]) == "en-gb-foo" |
| assert s.select_from(["de", "fr"]) == "fr" |
| assert s.select_from(["de", "en-gb"]) == "de" |
| assert s.select_from(["en-gb", "en-gb-foo"]) == "en-gb-foo" |
| assert s.select_from(["en-bar"]) == "en-bar" |
| assert s.select_from(["en-gb-bar", "en-gb-foo"]) == "en-gb-foo" |