blob: ec2c7d4ace546f2afe7e664818b9658d371168da [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
// CSOFF: LineLengthCheck
/**
* <p>Implementation of Number to String Conversion algorithm specified by
* XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007.</p>
*
* <p>This algorithm differs from that specified in XSLT 1.0 in the following
* ways:</p>
* <ul>
* <li>input numbers are greater than or equal to zero rather than greater than zero;</li>
* <li>introduces format tokens { w, W, Ww };</li>
* <li>introduces ordinal parameter to generate ordinal numbers;</li>
* </ul>
*
* <p>Implementation Defaults and Limitations</p>
* <ul>
* <li>If language parameter is unspecified (null or empty string), then the value
* of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).</li>
* <li>Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).</li>
* <li>Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).</li>
* </ul>
*
* <p>Implementation Notes</p>
* <ul>
* <li>In order to handle format tokens outside the Unicode BMP, all processing is
* done in Unicode Scalar Values represented with Integer and Integer[]
* types. Without affecting behavior, this may be subsequently optimized to
* use int and int[] types.</li>
* <li>In order to communicate various sub-parameters, including ordinalization, a <em>features</em>
* is employed, which consists of comma separated name and optional value tokens, where name and value
* are separated by an equals '=' sign.</li>
* <li>Ordinal numbers are selected by specifying a word based format token in combination with a 'ordinal' feature with no value, in which case
* the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female"
* selects female ordinals.</li>
* </ul>
*
* <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
*/
public class NumberConverter {
/** alphabetical */
public static final int LETTER_VALUE_ALPHABETIC = 1;
/** traditional */
public static final int LETTER_VALUE_TRADITIONAL = 2;
/** no token type */
private static final int TOKEN_NONE = 0;
/** alhphanumeric token type */
private static final int TOKEN_ALPHANUMERIC = 1;
/** nonalphanumeric token type */
private static final int TOKEN_NONALPHANUMERIC = 2;
/** default token */
private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' };
/** default separator */
private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' };
/** default language */
private static final String DEFAULT_LANGUAGE = "eng";
/** prefix token */
private Integer[] prefix;
/** suffix token */
private Integer[] suffix;
/** sequence of tokens, as parsed from format */
private Integer[][] tokens;
/** sequence of separators, as parsed from format */
private Integer[][] separators;
/** grouping separator */
private int groupingSeparator;
/** grouping size */
private int groupingSize;
/** letter value */
private int letterValue;
/** letter value system */
private String features;
/** language */
private String language;
/** country */
private String country;
/**
* Construct parameterized number converter.
* @param format format for the page number (may be null or empty, which is treated as null)
* @param groupingSeparator grouping separator (if zero, then no grouping separator applies)
* @param groupingSize grouping size (if zero or negative, then no grouping size applies)
* @param letterValue letter value (must be one of the above letter value enumeration values)
* @param features features (feature sub-parameters)
* @param language (may be null or empty, which is treated as null)
* @param country (may be null or empty, which is treated as null)
* @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate)
*/
public NumberConverter(String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country)
throws IllegalArgumentException {
this.groupingSeparator = groupingSeparator;
this.groupingSize = groupingSize;
this.letterValue = letterValue;
this.features = features;
this.language = (language != null) ? language.toLowerCase() : null;
this.country = (country != null) ? country.toLowerCase() : null;
parseFormatTokens(format);
}
/**
* Convert a number to string according to conversion parameters.
* @param number number to conver
* @return string representing converted number
*/
public String convert(long number) {
List<Long> numbers = new ArrayList<Long>();
numbers.add(number);
return convert(numbers);
}
/**
* Convert list of numbers to string according to conversion parameters.
* @param numbers list of numbers to convert
* @return string representing converted list of numbers
*/
public String convert(List<Long> numbers) {
List<Integer> scalars = new ArrayList<Integer>();
if (prefix != null) {
appendScalars(scalars, prefix);
}
convertNumbers(scalars, numbers);
if (suffix != null) {
appendScalars(scalars, suffix);
}
return scalarsToString(scalars);
}
private void parseFormatTokens(String format) throws IllegalArgumentException {
List<Integer[]> tokens = new ArrayList<Integer[]>();
List<Integer[]> separators = new ArrayList<Integer[]>();
if ((format == null) || (format.length() == 0)) {
format = "1";
}
int tokenType = TOKEN_NONE;
List<Integer> token = new ArrayList<Integer>();
Integer[] ca = UTF32.toUTF32(format, 0, true);
for (Integer c : ca) {
int tokenTypeNew = isAlphaNumeric(c) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC;
if (tokenTypeNew != tokenType) {
if (token.size() > 0) {
if (tokenType == TOKEN_ALPHANUMERIC) {
tokens.add(token.toArray(new Integer[token.size()]));
} else {
separators.add(token.toArray(new Integer[token.size()]));
}
token.clear();
}
tokenType = tokenTypeNew;
}
token.add(c);
}
if (token.size() > 0) {
if (tokenType == TOKEN_ALPHANUMERIC) {
tokens.add(token.toArray(new Integer [ token.size() ]));
} else {
separators.add(token.toArray(new Integer [ token.size() ]));
}
}
if (!separators.isEmpty()) {
this.prefix = separators.remove(0);
}
if (!separators.isEmpty()) {
this.suffix = separators.remove(separators.size() - 1);
}
this.separators = separators.toArray(new Integer [ separators.size() ] []);
this.tokens = tokens.toArray(new Integer [ tokens.size() ] []);
}
private static boolean isAlphaNumeric(int c) {
switch (Character.getType(c)) {
case Character.DECIMAL_DIGIT_NUMBER: // Nd
case Character.LETTER_NUMBER: // Nl
case Character.OTHER_NUMBER: // No
case Character.UPPERCASE_LETTER: // Lu
case Character.LOWERCASE_LETTER: // Ll
case Character.TITLECASE_LETTER: // Lt
case Character.MODIFIER_LETTER: // Lm
case Character.OTHER_LETTER: // Lo
return true;
default:
return false;
}
}
private void convertNumbers(List<Integer> scalars, List<Long> numbers) {
Integer[] tknLast = DEFAULT_TOKEN;
int tknIndex = 0;
int tknCount = tokens.length;
int sepIndex = 0;
int sepCount = separators.length;
int numIndex = 0;
for (Long number : numbers) {
Integer[] sep = null;
Integer[] tkn;
if (tknIndex < tknCount) {
if (numIndex > 0) {
if (sepIndex < sepCount) {
sep = separators [ sepIndex++ ];
} else {
sep = DEFAULT_SEPARATOR;
}
}
tkn = tokens [ tknIndex++ ];
} else {
tkn = tknLast;
}
appendScalars(scalars, convertNumber(number, sep, tkn));
tknLast = tkn;
numIndex++;
}
}
private Integer[] convertNumber(long number, Integer[] separator, Integer[] token) {
List<Integer> sl = new ArrayList<Integer>();
if (separator != null) {
appendScalars(sl, separator);
}
if (token != null) {
appendScalars(sl, formatNumber(number, token));
}
return sl.toArray(new Integer [ sl.size() ]);
}
private Integer[] formatNumber(long number, Integer[] token) {
Integer[] fn = null;
assert token.length > 0;
if (number < 0) {
throw new IllegalArgumentException("number must be non-negative");
} else if (token.length == 1) {
int s = token[0];
switch (s) {
case (int) '1':
fn = formatNumberAsDecimal(number, (int) '1', 1);
break;
case (int) 'W':
case (int) 'w':
fn = formatNumberAsWord(number, (s == (int) 'W') ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER);
break;
case (int) 'A': // handled as numeric sequence
case (int) 'a': // handled as numeric sequence
case (int) 'I': // handled as numeric special
case (int) 'i': // handled as numeric special
default:
if (isStartOfDecimalSequence(s)) {
fn = formatNumberAsDecimal(number, s, 1);
} else if (isStartOfAlphabeticSequence(s)) {
fn = formatNumberAsSequence(number, s, getSequenceBase(s), null);
} else if (isStartOfNumericSpecial(s)) {
fn = formatNumberAsSpecial(number, s);
} else {
fn = null;
}
break;
}
} else if ((token.length == 2) && (token[0] == (int) 'W') && (token[1] == (int) 'w')) {
fn = formatNumberAsWord(number, Character.TITLECASE_LETTER);
} else if (isPaddedOne(token)) {
int s = token[token.length - 1];
fn = formatNumberAsDecimal(number, s, token.length);
} else {
throw new IllegalArgumentException("invalid format token: \"" + UTF32.fromUTF32(token) + "\"");
}
if (fn == null) {
fn = formatNumber(number, DEFAULT_TOKEN);
}
assert fn != null;
return fn;
}
/**
* Format NUMBER as decimal using characters denoting digits that start at ONE,
* adding one or more (zero) padding characters as needed to fill out field WIDTH.
* @param number to be formatted
* @param one unicode scalar value denoting numeric value 1
* @param width non-negative integer denoting field width of number, possible including padding
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsDecimal(long number, int one, int width) {
assert Character.getNumericValue(one) == 1;
assert Character.getNumericValue(one - 1) == 0;
assert Character.getNumericValue(one + 8) == 9;
List<Integer> sl = new ArrayList<Integer>();
int zero = one - 1;
while (number > 0) {
long digit = number % 10;
sl.add(0, zero + (int) digit);
number = number / 10;
}
while (width > sl.size()) {
sl.add(0, zero);
}
if ((groupingSize != 0) && (groupingSeparator != 0)) {
sl = performGrouping(sl, groupingSize, groupingSeparator);
}
return sl.toArray(new Integer [ sl.size() ]);
}
private static List<Integer> performGrouping(List<Integer> sl, int groupingSize, int groupingSeparator) {
assert groupingSize > 0;
assert groupingSeparator != 0;
if (sl.size() > groupingSize) {
List<Integer> gl = new ArrayList<Integer>();
for (int i = 0, n = sl.size(), g = 0; i < n; i++) {
int k = n - i - 1;
if (g == groupingSize) {
gl.add(0, groupingSeparator);
g = 1;
} else {
g++;
}
gl.add(0, sl.get(k));
}
return gl;
} else {
return sl;
}
}
/**
* Format NUMBER as using sequence of characters that start at ONE, and
* having BASE radix.
* @param number to be formatted
* @param one unicode scalar value denoting start of sequence (numeric value 1)
* @param base number of elements in sequence
* @param map if non-null, then maps sequences indices to unicode scalars
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsSequence(long number, int one, int base, int[] map) {
assert base > 1;
assert (map == null) || (map.length >= base);
List<Integer> sl = new ArrayList<Integer>();
if (number == 0) {
return null;
} else {
long n = number;
while (n > 0) {
int d = (int) ((n - 1) % (long) base);
int s = (map != null) ? map [ d ] : (one + d);
sl.add(0, s);
n = (n - 1) / base;
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Format NUMBER as using special system that starts at ONE.
* @param number to be formatted
* @param one unicode scalar value denoting start of system (numeric value 1)
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsSpecial(long number, int one) {
SpecialNumberFormatter f = getSpecialFormatter(one, letterValue, features, language, country);
if (f != null) {
return f.format(number, one, letterValue, features, language, country);
} else {
return null;
}
}
/**
* Format NUMBER as word according to TYPE, which must be either
* Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or
* Character.TITLECASE_LETTER. Makes use of this.language to
* determine language of word.
* @param number to be formatted
* @param caseType unicode character type for case conversion
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsWord(long number, int caseType) {
SpecialNumberFormatter f = null;
if (isLanguage("eng")) {
f = new EnglishNumberAsWordFormatter(caseType);
} else if (isLanguage("spa")) {
f = new SpanishNumberAsWordFormatter(caseType);
} else if (isLanguage("fra")) {
f = new FrenchNumberAsWordFormatter(caseType);
} else {
f = new EnglishNumberAsWordFormatter(caseType);
}
return f.format(number, 0, letterValue, features, language, country);
}
private boolean isLanguage(String iso3Code) {
if (language == null) {
return false;
} else if (language.equals(iso3Code)) {
return true;
} else {
return isSameLanguage(iso3Code, language);
}
}
private static String[][] equivalentLanguages = {
{ "eng", "en" },
{ "fra", "fre", "fr" },
{ "spa", "es" },
};
private static boolean isSameLanguage(String i3c, String lc) {
for (String[] el : equivalentLanguages) {
assert el.length >= 2;
if (el[0].equals(i3c)) {
for (String anEl : el) {
if (anEl.equals(lc)) {
return true;
}
}
return false;
}
}
return false;
}
private static boolean hasFeature(String features, String feature) {
if (features != null) {
assert feature != null;
assert feature.length() != 0;
String[] fa = features.split(",");
for (String f : fa) {
String[] fp = f.split("=");
assert fp.length > 0;
String fn = fp[0];
String fv = (fp.length > 1) ? fp[1] : "";
if (fn.equals(feature)) {
return true;
}
}
}
return false;
}
/* not yet used
private static String getFeatureValue ( String features, String feature ) {
if ( features != null ) {
assert feature != null;
assert feature.length() != 0;
String[] fa = features.split(",");
for ( String f : fa ) {
String[] fp = f.split("=");
assert fp.length > 0;
String fn = fp[0];
String fv = ( fp.length > 1 ) ? fp[1] : "";
if ( fn.equals ( feature ) ) {
return fv;
}
}
}
return "";
}
*/
private static void appendScalars(List<Integer> scalars, Integer[] sa) {
Collections.addAll(scalars, sa);
}
private static String scalarsToString(List<Integer> scalars) {
Integer[] sa = scalars.toArray(new Integer [ scalars.size() ]);
return UTF32.fromUTF32(sa);
}
private static boolean isPaddedOne(Integer[] token) {
if (getDecimalValue(token [ token.length - 1 ]) != 1) {
return false;
} else {
for (int i = 0, n = token.length - 1; i < n; i++) {
if (getDecimalValue(token [ i ]) != 0) {
return false;
}
}
return true;
}
}
private static int getDecimalValue(Integer scalar) {
int s = scalar;
if (Character.getType(s) == Character.DECIMAL_DIGIT_NUMBER) {
return Character.getNumericValue(s);
} else {
return -1;
}
}
private static boolean isStartOfDecimalSequence(int s) {
return (Character.getNumericValue(s) == 1)
&& (Character.getNumericValue(s - 1) == 0)
&& (Character.getNumericValue(s + 8) == 9);
}
private static int[][] supportedAlphabeticSequences = {
{ 'A', 26 }, // A...Z
{ 'a', 26 }, // a...z
};
private static boolean isStartOfAlphabeticSequence(int s) {
for (int[] ss : supportedAlphabeticSequences) {
assert ss.length >= 2;
if (ss[0] == s) {
return true;
}
}
return false;
}
private static int getSequenceBase(int s) {
for (int[] ss : supportedAlphabeticSequences) {
assert ss.length >= 2;
if (ss[0] == s) {
return ss[1];
}
}
return 0;
}
private static int[][] supportedSpecials = {
{ 'I' }, // latin - uppercase roman numerals
{ 'i' }, // latin - lowercase roman numerals
{ '\u0391' }, // greek - uppercase isopsephry numerals
{ '\u03B1' }, // greek - lowercase isopsephry numerals
{ '\u05D0' }, // hebrew - gematria numerals
{ '\u0623' }, // arabic - abjadi numberals
{ '\u0627' }, // arabic - either abjadi or hijai alphabetic sequence
{ '\u0E01' }, // thai - default alphabetic sequence
{ '\u3042' }, // kana - hiragana (gojuon) - default alphabetic sequence
{ '\u3044' }, // kana - hiragana (iroha)
{ '\u30A2' }, // kana - katakana (gojuon) - default alphabetic sequence
{ '\u30A4' }, // kana - katakana (iroha)
};
private static boolean isStartOfNumericSpecial(int s) {
for (int[] ss : supportedSpecials) {
assert ss.length >= 1;
if (ss[0] == s) {
return true;
}
}
return false;
}
private SpecialNumberFormatter getSpecialFormatter(int one, int letterValue, String features, String language, String country) {
if (one == (int) 'I') {
return new RomanNumeralsFormatter();
} else if (one == (int) 'i') {
return new RomanNumeralsFormatter();
} else if (one == (int) '\u0391') {
return new IsopsephryNumeralsFormatter();
} else if (one == (int) '\u03B1') {
return new IsopsephryNumeralsFormatter();
} else if (one == (int) '\u05D0') {
return new GematriaNumeralsFormatter();
} else if (one == (int) '\u0623') {
return new ArabicNumeralsFormatter();
} else if (one == (int) '\u0627') {
return new ArabicNumeralsFormatter();
} else if (one == (int) '\u0E01') {
return new ThaiNumeralsFormatter();
} else if (one == (int) '\u3042') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u3044') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u30A2') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u30A4') {
return new KanaNumeralsFormatter();
} else {
return null;
}
}
private static Integer[] toUpperCase(Integer[] sa) {
assert sa != null;
for (int i = 0, n = sa.length; i < n; i++) {
Integer s = sa [ i ];
sa [ i ] = Character.toUpperCase(s);
}
return sa;
}
private static Integer[] toLowerCase(Integer[] sa) {
assert sa != null;
for (int i = 0, n = sa.length; i < n; i++) {
Integer s = sa [ i ];
sa [ i ] = Character.toLowerCase(s);
}
return sa;
}
/* not yet used
private static Integer[] toTitleCase ( Integer[] sa ) {
assert sa != null;
if ( sa.length > 0 ) {
sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] );
}
return sa;
}
*/
private static List<String> convertWordCase(List<String> words, int caseType) {
List<String> wl = new ArrayList<String>();
for (String w : words) {
wl.add(convertWordCase(w, caseType));
}
return wl;
}
private static String convertWordCase(String word, int caseType) {
if (caseType == Character.UPPERCASE_LETTER) {
return word.toUpperCase();
} else if (caseType == Character.LOWERCASE_LETTER) {
return word.toLowerCase();
} else if (caseType == Character.TITLECASE_LETTER) {
StringBuffer sb = new StringBuffer();
for (int i = 0, n = word.length(); i < n; i++) {
String s = word.substring(i, i + 1);
if (i == 0) {
sb.append(s.toUpperCase());
} else {
sb.append(s.toLowerCase());
}
}
return sb.toString();
} else {
return word;
}
}
private static String joinWords(List<String> words, String separator) {
StringBuffer sb = new StringBuffer();
for (String w : words) {
if (sb.length() > 0) {
sb.append(separator);
}
sb.append(w);
}
return sb.toString();
}
/**
* Special number formatter.
*/
interface SpecialNumberFormatter {
/**
* Format number with special numeral system.
* @param number to be formatted
* @param one unicode scalar value denoting numeric value 1
* @param letterValue letter value (must be one of the above letter value enumeration values)
* @param features features (feature sub-parameters)
* @param language denotes applicable language
* @param country denotes applicable country
* @return formatted number as array of unicode scalars
*/
Integer[] format(long number, int one, int letterValue, String features, String language, String country);
}
/**
* English Word Numerals
*/
private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" };
private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" };
private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" };
private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" };
private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" };
private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" };
private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
EnglishNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List<String> wl = new ArrayList<String>();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(englishWordOnes [ 0 ]);
} else if (ordinal && (number < 10)) {
wl.add(englishWordOnesOrd [ (int) number ]);
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
wl = formatOnesInThousand(wl, billions);
if (ordinal && ((number % 1000000000) == 0)) {
wl.add(englishWordOthersOrd[3]);
} else {
wl.add(englishWordOthers[3]);
}
}
if (millions > 0) {
wl = formatOnesInThousand(wl, millions);
if (ordinal && ((number % 1000000) == 0)) {
wl.add(englishWordOthersOrd[2]);
} else {
wl.add(englishWordOthers[2]);
}
}
if (thousands > 0) {
wl = formatOnesInThousand(wl, thousands);
if (ordinal && ((number % 1000) == 0)) {
wl.add(englishWordOthersOrd[1]);
} else {
wl.add(englishWordOthers[1]);
}
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones, ordinal);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List<String> formatOnesInThousand(List<String> wl, int number) {
return formatOnesInThousand(wl, number, false);
}
private List<String> formatOnesInThousand(List<String> wl, int number, boolean ordinal) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
wl.add(englishWordOnes [ hundreds ]);
if (ordinal && ((number % 100) == 0)) {
wl.add(englishWordOthersOrd[0]);
} else {
wl.add(englishWordOthers[0]);
}
}
if (tens > 0) {
if (tens == 1) {
if (ordinal) {
wl.add(englishWordTeensOrd [ ones ]);
} else {
wl.add(englishWordTeens [ ones ]);
}
} else {
if (ordinal && (ones == 0)) {
wl.add(englishWordTensOrd [ tens ]);
} else {
wl.add(englishWordTens [ tens ]);
}
if (ones > 0) {
if (ordinal) {
wl.add(englishWordOnesOrd [ ones ]);
} else {
wl.add(englishWordOnes [ ones ]);
}
}
}
} else if (ones > 0) {
if (ordinal) {
wl.add(englishWordOnesOrd [ ones ]);
} else {
wl.add(englishWordOnes [ ones ]);
}
}
return wl;
}
}
/**
* French Word Numerals
*/
private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" };
private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" };
private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" };
private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" };
private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
FrenchNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List<String> wl = new ArrayList<String>();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(frenchWordOnes [ 0 ]);
} else if (ordinal && (number <= 10)) {
boolean female = hasFeature(features, "female");
if (female) {
wl.add(frenchWordOnesOrdFemale [ (int) number ]);
} else {
wl.add(frenchWordOnesOrdMale [ (int) number ]);
}
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
wl = formatOnesInThousand(wl, billions);
if (billions == 1) {
wl.add(frenchWordOthers[5]);
} else {
wl.add(frenchWordOthers[6]);
}
}
if (millions > 0) {
wl = formatOnesInThousand(wl, millions);
if (millions == 1) {
wl.add(frenchWordOthers[3]);
} else {
wl.add(frenchWordOthers[4]);
}
}
if (thousands > 0) {
if (thousands > 1) {
wl = formatOnesInThousand(wl, thousands);
}
wl.add(frenchWordOthers[2]);
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List<String> formatOnesInThousand(List<String> wl, int number) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
if (hundreds > 1) {
wl.add(frenchWordOnes [ hundreds ]);
}
if ((hundreds > 1) && (tens == 0) && (ones == 0)) {
wl.add(frenchWordOthers[1]);
} else {
wl.add(frenchWordOthers[0]);
}
}
if (tens > 0) {
if (tens == 1) {
wl.add(frenchWordTeens [ ones ]);
} else if (tens < 7) {
if (ones == 1) {
wl.add(frenchWordTens [ tens ]);
wl.add("et");
wl.add(frenchWordOnes [ ones ]);
} else {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ tens ]);
if (ones > 0) {
sb.append('-');
sb.append(frenchWordOnes [ ones ]);
}
wl.add(sb.toString());
}
} else if (tens == 7) {
if (ones == 1) {
wl.add(frenchWordTens [ 6 ]);
wl.add("et");
wl.add(frenchWordTeens [ ones ]);
} else {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ 6 ]);
sb.append('-');
sb.append(frenchWordTeens [ ones ]);
wl.add(sb.toString());
}
} else if (tens == 8) {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ tens ]);
if (ones > 0) {
sb.append('-');
sb.append(frenchWordOnes [ ones ]);
} else {
sb.append('s');
}
wl.add(sb.toString());
} else if (tens == 9) {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ 8 ]);
sb.append('-');
sb.append(frenchWordTeens [ ones ]);
wl.add(sb.toString());
}
} else if (ones > 0) {
wl.add(frenchWordOnes [ ones ]);
}
return wl;
}
}
/**
* Spanish Word Numerals
*/
private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" };
private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" };
private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" };
private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" };
private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" };
private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" };
private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" };
private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" };
private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
SpanishNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List<String> wl = new ArrayList<String>();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(spanishWordOnes [ 0 ]);
} else if (ordinal && (number <= 10)) {
boolean female = hasFeature(features, "female");
if (female) {
wl.add(spanishWordOnesOrdFemale [ (int) number ]);
} else {
wl.add(spanishWordOnesOrdMale [ (int) number ]);
}
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
if (billions > 1) {
wl = formatOnesInThousand(wl, billions);
}
wl.add(spanishWordOthers[2]);
wl.add(spanishWordOthers[4]);
}
if (millions > 0) {
if (millions == 1) {
wl.add(spanishWordOthers[0]);
} else {
wl = formatOnesInThousand(wl, millions);
}
if (millions > 1) {
wl.add(spanishWordOthers[4]);
} else {
wl.add(spanishWordOthers[3]);
}
}
if (thousands > 0) {
if (thousands > 1) {
wl = formatOnesInThousand(wl, thousands);
}
wl.add(spanishWordOthers[2]);
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List<String> formatOnesInThousand(List<String> wl, int number) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
if ((hundreds == 1) && (tens == 0) && (ones == 0)) {
wl.add(spanishWordOthers[1]);
} else {
wl.add(spanishWordHundreds [ hundreds ]);
}
}
if (tens > 0) {
if (tens == 1) {
wl.add(spanishWordTeens [ ones ]);
} else if (tens == 2) {
wl.add(spanishWordTweens [ ones ]);
} else {
wl.add(spanishWordTens [ tens ]);
if (ones > 0) {
wl.add("y");
wl.add(spanishWordOnes [ ones ]);
}
}
} else if (ones > 0) {
wl.add(spanishWordOnes [ ones ]);
}
return wl;
}
}
/**
* Roman (Latin) Numerals
*/
private static int[] romanMapping = {
100000,
90000,
50000,
40000,
10000,
9000,
5000,
4000,
1000,
900,
500,
400,
100,
90,
50,
40,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1
};
private static String[] romanStandardForms = {
null,
null,
null,
null,
null,
null,
null,
null,
"m",
"cm",
"d",
"cd",
"c",
"xc",
"l",
"xl",
"x",
"ix",
null,
null,
null,
"v",
"iv",
null,
null,
"i"
};
private static String[] romanLargeForms = {
"\u2188",
"\u2182\u2188",
"\u2187",
"\u2182\u2187",
"\u2182",
"\u2180\u2182",
"\u2181",
"\u2180\u2181",
"m",
"cm",
"d",
"cd",
"c",
"xc",
"l",
"xl",
"x",
"ix",
null,
null,
null,
"v",
"iv",
null,
null,
"i"
};
private static String[] romanNumberForms = {
"\u2188",
"\u2182\u2188",
"\u2187",
"\u2182\u2187",
"\u2182",
"\u2180\u2182",
"\u2181",
"\u2180\u2181",
"\u216F",
"\u216D\u216F",
"\u216E",
"\u216D\u216E",
"\u216D",
"\u2169\u216D",
"\u216C",
"\u2169\u216C",
"\u2169",
"\u2168",
"\u2167",
"\u2166",
"\u2165",
"\u2164",
"\u2163",
"\u2162",
"\u2161",
"\u2160"
};
private static class RomanNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List<Integer> sl = new ArrayList<Integer>();
if (number == 0) {
return null;
} else {
String[] forms;
int maxNumber;
if (hasFeature(features, "unicode-number-forms")) {
forms = romanNumberForms;
maxNumber = 199999;
} else if (hasFeature(features, "large")) {
forms = romanLargeForms;
maxNumber = 199999;
} else {
forms = romanStandardForms;
maxNumber = 4999;
}
if (number > maxNumber) {
return null;
} else {
while (number > 0) {
for (int i = 0, n = romanMapping.length; i < n; i++) {
int d = romanMapping [ i ];
if ((number >= d) && (forms [ i ] != null)) {
appendScalars(sl, UTF32.toUTF32(forms [ i ], 0, true));
number = number - d;
break;
}
}
}
if (one == (int) 'I') {
return toUpperCase(sl.toArray(new Integer [ sl.size() ]));
} else if (one == (int) 'i') {
return toLowerCase(sl.toArray(new Integer [ sl.size() ]));
} else {
return null;
}
}
}
}
}
/**
* Isopsephry (Greek) Numerals
*/
private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
return null;
}
}
/**
* Gematria (Hebrew) Numerals
*/
private static int[] hebrewGematriaAlphabeticMap = {
// ones
0x05D0, // ALEF
0x05D1, // BET
0x05D2, // GIMEL
0x05D3, // DALET
0x05D4, // HE
0x05D5, // VAV
0x05D6, // ZAYIN
0x05D7, // HET
0x05D8, // TET
// tens
0x05D9, // YOD
0x05DB, // KAF
0x05DC, // LAMED
0x05DE, // MEM
0x05E0, // NUN
0x05E1, // SAMEKH
0x05E2, // AYIN
0x05E4, // PE
0x05E6, // TSADHI
// hundreds
0x05E7, // QOF
0x05E8, // RESH
0x05E9, // SHIN
0x05EA, // TAV
0x05DA, // FINAL KAF
0x05DD, // FINAL MEM
0x05DF, // FINAL NUN
0x05E3, // FINAL PE
0x05E5, // FINAL TSADHI
};
private class GematriaNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if (one == 0x05D0) {
if (letterValue == LETTER_VALUE_ALPHABETIC) {
return formatNumberAsSequence(number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap);
} else if (letterValue == LETTER_VALUE_TRADITIONAL) {
if ((number == 0) || (number > 1999)) {
return null;
} else {
return formatAsGematriaNumber(number, features, language, country);
}
} else {
return null;
}
} else {
return null;
}
}
private Integer[] formatAsGematriaNumber(long number, String features, String language, String country) {
List<Integer> sl = new ArrayList<Integer>();
assert hebrewGematriaAlphabeticMap.length == 27;
assert hebrewGematriaAlphabeticMap[0] == 0x05D0; // ALEF
assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV
assert number != 0;
assert number < 2000;
int[] map = hebrewGematriaAlphabeticMap;
int thousands = (int) ((number / 1000) % 10);
int hundreds = (int) ((number / 100) % 10);
int tens = (int) ((number / 10) % 10);
int ones = (int) ((number / 1) % 10);
if (thousands > 0) {
sl.add(map [ 0 + (thousands - 1) ]);
sl.add(0x05F3);
}
if (hundreds > 0) {
if (hundreds < 5) {
sl.add(map [ 18 + (hundreds - 1) ]);
} else if (hundreds < 9) {
sl.add(map [ 18 + (4 - 1) ]);
sl.add(0x05F4);
sl.add(map [ 18 + (hundreds - 5) ]);
} else if (hundreds == 9) {
sl.add(map [ 18 + (4 - 1) ]);
sl.add(map [ 18 + (4 - 1) ]);
sl.add(0x05F4);
sl.add(map [ 18 + (hundreds - 9) ]);
}
assert hundreds < 10;
}
if (number == 15) {
sl.add(map [ 9 - 1]);
sl.add(0x05F4);
sl.add(map [ 6 - 1]);
} else if (number == 16) {
sl.add(map [ 9 - 1 ]);
sl.add(0x05F4);
sl.add(map [ 7 - 1 ]);
} else {
if (tens > 0) {
assert tens < 10;
sl.add(map [ 9 + (tens - 1) ]);
}
if (ones > 0) {
assert ones < 10;
sl.add(map [ 0 + (ones - 1) ]);
}
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Arabic Numerals
*/
private static int[] arabicAbjadiAlphabeticMap = {
// ones
0x0623, // ALEF WITH HAMZA ABOVE
0x0628, // BEH
0x062C, // JEEM
0x062F, // DAL
0x0647, // HEH
0x0648, // WAW
0x0632, // ZAIN
0x062D, // HAH
0x0637, // TAH
// tens
0x0649, // ALEF MAQSURA
0x0643, // KAF
0x0644, // LAM
0x0645, // MEEM
0x0646, // NOON
0x0633, // SEEN
0x0639, // AIN
0x0641, // FEH
0x0635, // SAD
// hundreds
0x0642, // QAF
0x0631, // REH
0x0634, // SHEEN
0x062A, // TEH
0x062B, // THEH
0x062E, // KHAH
0x0630, // THAL
0x0636, // DAD
0x0638, // ZAH
// thousands
0x063A, // GHAIN
};
private static int[] arabicHijaiAlphabeticMap = {
0x0623, // ALEF WITH HAMZA ABOVE
0x0628, // BEH
0x062A, // TEH
0x062B, // THEH
0x062C, // JEEM
0x062D, // HAH
0x062E, // KHAH
0x062F, // DAL
0x0630, // THAL
0x0631, // REH
0x0632, // ZAIN
0x0633, // SEEN
0x0634, // SHEEN
0x0635, // SAD
0x0636, // DAD
0x0637, // TAH
0x0638, // ZAH
0x0639, // AIN
0x063A, // GHAIN
0x0641, // FEH
0x0642, // QAF
0x0643, // KAF
0x0644, // LAM
0x0645, // MEEM
0x0646, // NOON
0x0647, // HEH
0x0648, // WAW
0x0649, // ALEF MAQSURA
};
private class ArabicNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if (one == 0x0627) {
int[] map;
if (letterValue == LETTER_VALUE_TRADITIONAL) {
map = arabicAbjadiAlphabeticMap;
} else if (letterValue == LETTER_VALUE_ALPHABETIC) {
map = arabicHijaiAlphabeticMap;
} else {
map = arabicAbjadiAlphabeticMap;
}
return formatNumberAsSequence(number, one, map.length, map);
} else if (one == 0x0623) {
if ((number == 0) || (number > 1999)) {
return null;
} else {
return formatAsAbjadiNumber(number, features, language, country);
}
} else {
return null;
}
}
private Integer[] formatAsAbjadiNumber(long number, String features, String language, String country) {
List<Integer> sl = new ArrayList<Integer>();
assert arabicAbjadiAlphabeticMap.length == 28;
assert arabicAbjadiAlphabeticMap[0] == 0x0623; // ALEF WITH HAMZA ABOVE
assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN
assert number != 0;
assert number < 2000;
int[] map = arabicAbjadiAlphabeticMap;
int thousands = (int) ((number / 1000) % 10);
int hundreds = (int) ((number / 100) % 10);
int tens = (int) ((number / 10) % 10);
int ones = (int) ((number / 1) % 10);
if (thousands > 0) {
assert thousands < 2;
sl.add(map [ 27 + (thousands - 1) ]);
}
if (hundreds > 0) {
assert thousands < 10;
sl.add(map [ 18 + (hundreds - 1) ]);
}
if (tens > 0) {
assert tens < 10;
sl.add(map [ 9 + (tens - 1) ]);
}
if (ones > 0) {
assert ones < 10;
sl.add(map [ 0 + (ones - 1) ]);
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Kana (Japanese) Numerals
*/
private static int[] hiraganaGojuonAlphabeticMap = {
0x3042, // A
0x3044, // I
0x3046, // U
0x3048, // E
0x304A, // O
0x304B, // KA
0x304D, // KI
0x304F, // KU
0x3051, // KE
0x3053, // KO
0x3055, // SA
0x3057, // SI
0x3059, // SU
0x305B, // SE
0x305D, // SO
0x305F, // TA
0x3061, // TI
0x3064, // TU
0x3066, // TE
0x3068, // TO
0x306A, // NA
0x306B, // NI
0x306C, // NU
0x306D, // NE
0x306E, // NO
0x306F, // HA
0x3072, // HI
0x3075, // HU
0x3078, // HE
0x307B, // HO
0x307E, // MA
0x307F, // MI
0x3080, // MU
0x3081, // ME
0x3082, // MO
0x3084, // YA
0x3086, // YU
0x3088, // YO
0x3089, // RA
0x308A, // RI
0x308B, // RU
0x308C, // RE
0x308D, // RO
0x308F, // WA
0x3090, // WI
0x3091, // WE
0x3092, // WO
0x3093, // N
};
private static int[] katakanaGojuonAlphabeticMap = {
0x30A2, // A
0x30A4, // I
0x30A6, // U
0x30A8, // E
0x30AA, // O
0x30AB, // KA
0x30AD, // KI
0x30AF, // KU
0x30B1, // KE
0x30B3, // KO
0x30B5, // SA
0x30B7, // SI
0x30B9, // SU
0x30BB, // SE
0x30BD, // SO
0x30BF, // TA
0x30C1, // TI
0x30C4, // TU
0x30C6, // TE
0x30C8, // TO
0x30CA, // NA
0x30CB, // NI
0x30CC, // NU
0x30CD, // NE
0x30CE, // NO
0x30CF, // HA
0x30D2, // HI
0x30D5, // HU
0x30D8, // HE
0x30DB, // HO
0x30DE, // MA
0x30DF, // MI
0x30E0, // MU
0x30E1, // ME
0x30E2, // MO
0x30E4, // YA
0x30E6, // YU
0x30E8, // YO
0x30E9, // RA
0x30EA, // RI
0x30EB, // RU
0x30EC, // RE
0x30ED, // RO
0x30EF, // WA
0x30F0, // WI
0x30F1, // WE
0x30F2, // WO
0x30F3, // N
};
private class KanaNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if ((one == 0x3042) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap);
} else if ((one == 0x30A2) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap);
} else {
return null;
}
}
}
/**
* Thai Numerals
*/
private static int[] thaiAlphabeticMap = {
0x0E01,
0x0E02,
0x0E03,
0x0E04,
0x0E05,
0x0E06,
0x0E07,
0x0E08,
0x0E09,
0x0E0A,
0x0E0B,
0x0E0C,
0x0E0D,
0x0E0E,
0x0E0F,
0x0E10,
0x0E11,
0x0E12,
0x0E13,
0x0E14,
0x0E15,
0x0E16,
0x0E17,
0x0E18,
0x0E19,
0x0E1A,
0x0E1B,
0x0E1C,
0x0E1D,
0x0E1E,
0x0E1F,
0x0E20,
0x0E21,
0x0E22,
0x0E23,
// 0x0E24, // RU - not used in modern sequence
0x0E25,
// 0x0E26, // LU - not used in modern sequence
0x0E27,
0x0E28,
0x0E29,
0x0E2A,
0x0E2B,
0x0E2C,
0x0E2D,
0x0E2E,
};
private class ThaiNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if ((one == 0x0E01) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, thaiAlphabeticMap.length, thaiAlphabeticMap);
} else {
return null;
}
}
}
}