| #!/usr/bin/python |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| |
| import argparse |
| import json |
| import os |
| import re |
| import sys |
| |
| parser = argparse.ArgumentParser(description='Compares two JSON translation ' |
| 'files, as used by the Apache Guacamole web application, listing ' |
| 'the strings which appear to be missing or incorrect.') |
| |
| parser.add_argument('--no-missing', dest='check_missing', action='store_false', |
| help='Disables checking for strings which are present in ORIGINAL but ' |
| 'are missing from TRANSLATED. Assuming ORIGINAL represents the set of ' |
| 'strings actually used by the web application, these strings are ' |
| 'those which are missing and need to be defined for the translation ' |
| 'to be complete. By default, the comparison will check for missing ' |
| 'translations.') |
| |
| parser.add_argument('--no-unused', dest='check_unused', action='store_false', |
| help='Disables checking for strings which are present in TRANSLATED ' |
| 'but not in ORIGINAL. Assuming ORIGINAL represents the set of strings ' |
| 'actually used by the web application, these strings are those which ' |
| 'are defined by the translation but unused. By default, the ' |
| 'comparison will check for unused translations.') |
| |
| parser.add_argument('--check-copied', action='store_true', help='Enables ' |
| 'checking for strings defined in TRANSLATED which are identical to ' |
| 'the corresponding strings in ORIGINAL. Such strings may have been ' |
| 'incorrectly copied verbatim from the original without being ' |
| 'translated at all. It is also possible that both languages simply ' |
| 'use the same text for that string, and the string is correct. As ' |
| 'this test can produce false positives, it is disabled by default.') |
| |
| parser.add_argument('ORIGINAL', nargs='?', help='The JSON file which should ' |
| 'be used as the basis for comparison. This should be JSON which can ' |
| 'be expected to contain every string used by the web application and ' |
| 'no others. Typically, this will be the primary, original language of ' |
| 'the web application. In the case of Apache Guacamole, this should be ' |
| 'English. If omitted, the file "en.json" within the same directory ' |
| 'as TRANSLATED will be used by default.') |
| |
| parser.add_argument('TRANSLATED', help='The JSON file which should be ' |
| 'compared against ORIGINAL. This should be the JSON which has been ' |
| 'translated from ORIGINAL, and thus should contain the same set of ' |
| 'strings if the translation is complete.') |
| |
| args = parser.parse_args() |
| |
| def flatten_strings(translation, prefix=u''): |
| """Reads all translation strings from the given JSON, taking into account |
| namespacing, flattening nested namespaces into a single set of key/value |
| pairs. |
| |
| For example, the following call: |
| |
| flatten_strings({ |
| u'TOP' : { |
| u'LETTERS' : { |
| u'A' : u'A', |
| u'B' : u'B' |
| }, |
| u'NUMBERS' : { |
| u'ONE' : u'1', |
| u'TWO' : u'2', |
| u'THREE' : u'3' |
| } |
| } |
| }) |
| |
| would return: |
| |
| { |
| u'TOP.LETTERS.A' : u'A', |
| u'TOP.LETTERS.B' : u'B', |
| u'TOP.NUMBERS.ONE' : u'1', |
| u'TOP.NUMBERS.TWO' : u'2', |
| u'TOP.NUMBERS.THREE' : u'3' |
| } |
| |
| Parameters |
| ---------- |
| translation : dict or unicode |
| The dict object to read translation strings from, where each key is a |
| translation key or namespace and each value is a translation string or |
| a dict containing the translations nested within that namespace. |
| this object is simply a Unicode string, it will be assumed to be the |
| value of a translation string, and the prefix provided will be assumed |
| to be the name. |
| |
| prefix : unicode, optional |
| The namespace prefix to apply to all translation strings within the |
| given object, if any. This parameter is optional. If omitted, an empty |
| string will be used. |
| |
| Returns |
| ------- |
| dict |
| An dict whose properties are the names of all translation strings |
| contained within the given object. |
| |
| """ |
| |
| strings = {} |
| |
| # If the provided object is a string, the prefix is the string name |
| if isinstance(translation, unicode): |
| strings[prefix] = translation |
| return strings |
| |
| # Otherwise, if the prefix is non-empty, append a period for children |
| if prefix: |
| prefix += u'.' |
| |
| # For each property of the given object, read all string names |
| for key, child in translation.items(): |
| |
| # Add all string names within the child under its prefix |
| for flattened, value in flatten_strings(child, prefix + key).items(): |
| strings[flattened] = value |
| |
| return strings |
| |
| class Translation: |
| """A set of namespaced translation strings read from a JSON file, as |
| supported by angular-translate and used by Apache Guacamole. |
| |
| Attributes |
| ---------- |
| lang_key : unicode |
| The unique key identifying the JSON translation file and the language |
| within that file. This will simply be the filename without the ".json" |
| extension. |
| lang_name : unicode |
| The name of the language as defined within the JSON translation file by |
| the special "NAME" key. Not all translations will define a "NAME", as |
| some translations (those provided by Guacamole extensions) are used as |
| overlays for the base translation for that language defined at the web |
| application level. If no "NAME" key is present, `lang_name` will be |
| `None`. |
| strings : dict |
| The flattened set of translation key/value pairs. Each key will contain |
| all applicable namespaces, separated by periods, as produced by |
| `flatten_strings()`. There will be no nested keys. |
| |
| """ |
| |
| |
| def __init__(self, path): |
| """ |
| Parses the details and contents of the JSON translation file at the |
| given path. |
| |
| Parameters |
| ---------- |
| path : str |
| The path to the JSON file containing the translation to be read. |
| |
| """ |
| |
| json_data = open(path).read() |
| filename = os.path.basename(path) |
| |
| self.lang_key = os.path.splitext(filename)[0] |
| self.strings = flatten_strings(json.loads(json_data)) |
| self.lang_name = self.strings.get(u'NAME', None) |
| |
| def get_missing(self, expected): |
| """Returns a list of translation keys which are present in the given |
| translation but missing from this translation. |
| |
| Parameters |
| ---------- |
| expected : Translation |
| The translation to compare this translation against. |
| |
| Returns |
| ------- |
| list |
| A list of translation keys which are present in the given |
| translation but are NOT present in this translation. |
| |
| """ |
| return [ key for key in expected.strings if not key in self.strings ] |
| |
| def get_identical(self, other): |
| """Returns a list of translation keys which map to the same exact value |
| in both this translation and the given translation. |
| |
| Parameters |
| ---------- |
| other : Translation |
| The translation to compare this translation against. |
| |
| Returns |
| ------- |
| list |
| A list of translation keys which map to the same exact value in |
| both translations. |
| |
| """ |
| return [ key for key, value in self.strings.items() |
| if key in other.strings and other.strings[key] == value ] |
| |
| # |
| # Translation keys which are expected to always be inherited from the base |
| # translation and thus should be missing from all translations |
| # |
| |
| expected_missing = { |
| u'APP.NAME', |
| u'APP.VERSION' |
| } |
| |
| # |
| # Regular expression which matches strings that are expected to be copied |
| # verbatim |
| # |
| |
| expected_copied = re.compile('|'.join([ |
| '^$', # Empty string |
| '^@:', # References to other strings |
| '^\\d+$', # Numbers |
| '^(VNC|RDP|SSH|SFTP|Telnet)$', # Protocol names |
| '^(Apache )?Guacamole$' # Guacamole itself |
| ])) |
| |
| # |
| # Read provided input files |
| # |
| |
| orig = Translation(args.ORIGINAL |
| or '{}/en.json'.format(os.path.dirname(args.TRANSLATED))) |
| |
| trans = Translation(args.TRANSLATED) |
| |
| print u'Original language: {} ({})'.format(orig.lang_key, orig.lang_name) |
| print u'Translation language: {} ({})'.format(trans.lang_key, trans.lang_name) |
| |
| # Ignore keys that are expected to be missing |
| orig.strings = { key:value for key, value in orig.strings.items() |
| if key not in expected_missing } |
| |
| # |
| # Perform requested tests |
| # |
| |
| missing = trans.get_missing(orig) if args.check_missing else [] |
| unused = orig.get_missing(trans) if args.check_unused else [] |
| copied = orig.get_identical(trans) if args.check_copied else [] |
| |
| # Exclude keys which are expected to be copied |
| copied = [ key for key in copied |
| if not expected_copied.match(orig.strings[key]) ] |
| |
| # |
| # Group any errors encountered by type |
| # |
| |
| if missing: |
| print('\nThe following strings are missing from the translation and ' |
| 'should be added:\n') |
| for name in sorted(missing): |
| print ' {}'.format(name) |
| |
| if unused: |
| print('\nThe following strings are either NOT defined for the original ' |
| 'language or are expected to be inherited from the original ' |
| 'language and should be removed:\n') |
| for name in sorted(unused): |
| print ' {}'.format(name) |
| |
| if copied: |
| print('\nThe following strings are identical to the original language ' |
| 'and MIGHT be untranslated:\n') |
| for name in sorted(copied): |
| print ' {}'.format(name) |
| |
| # |
| # Count total number of errors and summarize result |
| # |
| |
| errors = len(missing) + len(unused) + len(copied) |
| |
| if errors: |
| print '\n{} error(s) total.'.format(errors) |
| sys.exit(1) |
| |
| print '\nCheck completed successfully. No errors.' |
| |