# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import json
import os
import re
import sys
parser = argparse.ArgumentParser(description='Compares two JSON translation '
'files, as used by the Apache Guacamole web application, listing '
'the strings which appear to be missing or incorrect.')
parser.add_argument('--no-missing', dest='check_missing', action='store_false',
help='Disables checking for strings which are present in ORIGINAL but '
'are missing from TRANSLATED. Assuming ORIGINAL represents the set of '
'strings actually used by the web application, these strings are '
'those which are missing and need to be defined for the translation '
'to be complete. By default, the comparison will check for missing '
parser.add_argument('--no-unused', dest='check_unused', action='store_false',
help='Disables checking for strings which are present in TRANSLATED '
'but not in ORIGINAL. Assuming ORIGINAL represents the set of strings '
'actually used by the web application, these strings are those which '
'are defined by the translation but unused. By default, the '
'comparison will check for unused translations.')
parser.add_argument('--check-copied', action='store_true', help='Enables '
'checking for strings defined in TRANSLATED which are identical to '
'the corresponding strings in ORIGINAL. Such strings may have been '
'incorrectly copied verbatim from the original without being '
'translated at all. It is also possible that both languages simply '
'use the same text for that string, and the string is correct. As '
'this test can produce false positives, it is disabled by default.')
parser.add_argument('ORIGINAL', nargs='?', help='The JSON file which should '
'be used as the basis for comparison. This should be JSON which can '
'be expected to contain every string used by the web application and '
'no others. Typically, this will be the primary, original language of '
'the web application. In the case of Apache Guacamole, this should be '
'English. If omitted, the file "en.json" within the same directory '
'as TRANSLATED will be used by default.')
parser.add_argument('TRANSLATED', help='The JSON file which should be '
'compared against ORIGINAL. This should be the JSON which has been '
'translated from ORIGINAL, and thus should contain the same set of '
'strings if the translation is complete.')
args = parser.parse_args()
def flatten_strings(translation, prefix=u''):
"""Reads all translation strings from the given JSON, taking into account
namespacing, flattening nested namespaces into a single set of key/value
For example, the following call:
u'TOP' : {
u'LETTERS' : {
u'A' : u'A',
u'B' : u'B'
u'NUMBERS' : {
u'ONE' : u'1',
u'TWO' : u'2',
u'THREE' : u'3'
would return:
u'TOP.LETTERS.A' : u'A',
u'TOP.LETTERS.B' : u'B',
u'TOP.NUMBERS.ONE' : u'1',
u'TOP.NUMBERS.TWO' : u'2',
translation : dict or unicode
The dict object to read translation strings from, where each key is a
translation key or namespace and each value is a translation string or
a dict containing the translations nested within that namespace.
this object is simply a Unicode string, it will be assumed to be the
value of a translation string, and the prefix provided will be assumed
to be the name.
prefix : unicode, optional
The namespace prefix to apply to all translation strings within the
given object, if any. This parameter is optional. If omitted, an empty
string will be used.
An dict whose properties are the names of all translation strings
contained within the given object.
strings = {}
# If the provided object is a string, the prefix is the string name
if isinstance(translation, unicode):
strings[prefix] = translation
return strings
# Otherwise, if the prefix is non-empty, append a period for children
if prefix:
prefix += u'.'
# For each property of the given object, read all string names
for key, child in translation.items():
# Add all string names within the child under its prefix
for flattened, value in flatten_strings(child, prefix + key).items():
strings[flattened] = value
return strings
class Translation:
"""A set of namespaced translation strings read from a JSON file, as
supported by angular-translate and used by Apache Guacamole.
lang_key : unicode
The unique key identifying the JSON translation file and the language
within that file. This will simply be the filename without the ".json"
lang_name : unicode
The name of the language as defined within the JSON translation file by
the special "NAME" key. Not all translations will define a "NAME", as
some translations (those provided by Guacamole extensions) are used as
overlays for the base translation for that language defined at the web
application level. If no "NAME" key is present, `lang_name` will be
strings : dict
The flattened set of translation key/value pairs. Each key will contain
all applicable namespaces, separated by periods, as produced by
`flatten_strings()`. There will be no nested keys.
def __init__(self, path):
Parses the details and contents of the JSON translation file at the
given path.
path : str
The path to the JSON file containing the translation to be read.
json_data = open(path).read()
filename = os.path.basename(path)
self.lang_key = os.path.splitext(filename)[0]
self.strings = flatten_strings(json.loads(json_data))
self.lang_name = self.strings.get(u'NAME', None)
def get_missing(self, expected):
"""Returns a list of translation keys which are present in the given
translation but missing from this translation.
expected : Translation
The translation to compare this translation against.
A list of translation keys which are present in the given
translation but are NOT present in this translation.
return [ key for key in expected.strings if not key in self.strings ]
def get_identical(self, other):
"""Returns a list of translation keys which map to the same exact value
in both this translation and the given translation.
other : Translation
The translation to compare this translation against.
A list of translation keys which map to the same exact value in
both translations.
return [ key for key, value in self.strings.items()
if key in other.strings and other.strings[key] == value ]
# Translation keys which are expected to always be inherited from the base
# translation and thus should be missing from all translations
expected_missing = {
# Regular expression which matches strings that are expected to be copied
# verbatim
expected_copied = re.compile('|'.join([
'^$', # Empty string
'^@:', # References to other strings
'^\\d+$', # Numbers
'^(VNC|RDP|SSH|SFTP|Telnet)$', # Protocol names
'^(Apache )?Guacamole$' # Guacamole itself
# Read provided input files
orig = Translation(args.ORIGINAL
or '{}/en.json'.format(os.path.dirname(args.TRANSLATED)))
trans = Translation(args.TRANSLATED)
print u'Original language: {} ({})'.format(orig.lang_key, orig.lang_name)
print u'Translation language: {} ({})'.format(trans.lang_key, trans.lang_name)
# Ignore keys that are expected to be missing
orig.strings = { key:value for key, value in orig.strings.items()
if key not in expected_missing }
# Perform requested tests
missing = trans.get_missing(orig) if args.check_missing else []
unused = orig.get_missing(trans) if args.check_unused else []
copied = orig.get_identical(trans) if args.check_copied else []
# Exclude keys which are expected to be copied
copied = [ key for key in copied
if not expected_copied.match(orig.strings[key]) ]
# Group any errors encountered by type
if missing:
print('\nThe following strings are missing from the translation and '
'should be added:\n')
for name in sorted(missing):
print ' {}'.format(name)
if unused:
print('\nThe following strings are either NOT defined for the original '
'language or are expected to be inherited from the original '
'language and should be removed:\n')
for name in sorted(unused):
print ' {}'.format(name)
if copied:
print('\nThe following strings are identical to the original language '
'and MIGHT be untranslated:\n')
for name in sorted(copied):
print ' {}'.format(name)
# Count total number of errors and summarize result
errors = len(missing) + len(unused) + len(copied)
if errors:
print '\n{} error(s) total.'.format(errors)
print '\nCheck completed successfully. No errors.'