blob: 113c441ce4748f9abee0c57936e68aefae5682d2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.shared.utils;
import java.util.Locale;
import org.apache.commons.text.StringEscapeUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.ComparisonFailure;
import org.junit.Test;
/**
* Test case for character case changes, to precisely point the situations when character case comparison doesn't
* give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
* (platform locale dependent, with sometimes unexpected results)
* but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
*
* @author Hervé Boutemy
* @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
*/
public class CaseTest extends Assert {
private static final Locale LOCALE_TURKISH = new Locale("tr");
/** common ASCII 'i' */
private static final char DOTTED_i = '\u0069';
/** common ASCII 'I' */
private static final char DOTLESS_I = '\u0049';
/** turkish dotless i = ı */
private static final char DOTLESS_i = '\u0131';
/** turkish dotted I = İ */
private static final char DOTTED_I = '\u0130';
/** http://en.wikipedia.org/wiki/Dot_(diacritic) */
private static final char COMBINING_DOT_ABOVE = '\u0307';
private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();
@AfterClass
public static void restoreDefaultLocale() {
Locale.setDefault(SAVED_DEFAULT_LOCALE);
}
/**
* test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
* @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
*/
@Test
public void testTurkishI() {
// check common i and I
assertEquals("common lowercase i should have a dot", 'i', DOTTED_i);
assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I);
final String iIıİ = "iIıİ";
// check source encoding doesn't wreck havoc */
assertUnicodeEquals(
"misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I);
// check toUpperCase and toLowerCase difference with turkish and english locales
assertUnicodeEquals(
"'iIıİ'.toUpperCase('tr')=='İIIİ'",
"" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
iIıİ.toUpperCase(LOCALE_TURKISH));
assertUnicodeEquals(
"'iIıİ'.toLowerCase('tr')=='iııi'",
"" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
iIıİ.toLowerCase(LOCALE_TURKISH));
assertUnicodeEquals(
"'iIıİ'.toUpperCase('en')=='IIIİ'",
"" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
iIıİ.toUpperCase(Locale.ENGLISH));
String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE
assertUnicodeEquals(
"'iIıİ'.toLowerCase('en')=='iiıi'",
"" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""),
lower);
// check equalsIgnoreCase() , which has no locale
for (int i = 0; i < iIıİ.length(); i++) {
char currentI = iIıİ.charAt(i);
StringBuilder sb = new StringBuilder(iIıİ.length());
for (int j = 0; j < iIıİ.length(); j++) {
sb.append(currentI);
}
String current = sb.toString();
assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ));
}
}
/**
* Assert equals, and in case the result isn't as expected, display content unicode-escaped.
* @param message
* @param expected
* @param actual
*/
private void assertUnicodeEquals(String message, String expected, String actual) {
if (expected.equals(actual)) {
return;
}
throw new ComparisonFailure(
message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual));
}
/**
* Test case change on all ascii characters with every available locale, to check that turkish i is the only
* exception on these characters.
*/
@Test
public void testAsciiAvailableLocales() {
final String lower = "abcdefghijklmnopqrstuvwxyz";
final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (Locale locale : Locale.getAvailableLocales()) {
// check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
Locale.setDefault(locale);
assertEquals(lower.toUpperCase(), lower.toUpperCase(locale));
assertEquals(upper.toLowerCase(), upper.toLowerCase(locale));
// check result
String expectedToUpperCase = upper;
String expectedToLowerCase = lower;
if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage())
|| new Locale("az").getLanguage().equals(locale.getLanguage())) {
expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I);
expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i);
}
assertEquals(
"'" + lower + "'.toUpperCase('" + locale.toString() + "')",
expectedToUpperCase,
lower.toUpperCase(locale));
assertEquals(
"'" + upper + "'.toLowerCase('" + locale.toString() + "')",
expectedToLowerCase,
upper.toLowerCase(locale));
// check that toLowerCase on lower and toUpperCase on upper don't cause harm
assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale));
assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale));
// check equalsIgnoreCase
assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower));
assertTrue(
"'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
upper.equalsIgnoreCase(expectedToLowerCase));
assertTrue(
"'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
expectedToUpperCase.equalsIgnoreCase(lower));
}
}
}