blob: 232a06dc7372b423d19c6ee380cc35664745ef3c [file] [log] [blame]
package org.apache.maven.shared.utils;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Locale;
import org.apache.commons.lang3.StringEscapeUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.ComparisonFailure;
import org.junit.Test;
/**
* Test case for character case changes, to precisely point the situations when character case comparison doesn't
* give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
* (platform locale dependent, with sometimes unexpected results)
* but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
*
* @author Hervé Boutemy
* @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
*/
public class CaseTest
extends Assert
{
private final static Locale LOCALE_TURKISH = new Locale( "tr" );
/** common ASCII 'i' */
private final static char DOTTED_i = '\u0069';
/** common ASCII 'I' */
private final static char DOTLESS_I = '\u0049';
/** turkish dotless i = ı */
private final static char DOTLESS_i = '\u0131';
/** turkish dotted I = İ */
private final static char DOTTED_I = '\u0130';
/** http://en.wikipedia.org/wiki/Dot_(diacritic) */
private final static char COMBINING_DOT_ABOVE = '\u0307';
private final static Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();
@AfterClass
public static void restoreDefaultLocale()
{
Locale.setDefault( SAVED_DEFAULT_LOCALE );
}
/**
* test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
* @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
*/
@Test
public void testTurkishI()
{
// check common i and I
assertEquals( "common lowercase i should have a dot", 'i', DOTTED_i );
assertEquals( "common uppercase I should not have a dot", 'I', DOTLESS_I );
final String iIıİ = "iIıİ";
// check source encoding doesn't wreck havoc */
assertUnicodeEquals( "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i
+ DOTTED_I );
// check toUpperCase and toLowerCase difference with turkish and english locales
assertUnicodeEquals( "'iIıİ'.toUpperCase('tr')=='İIIİ'", "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
iIıİ.toUpperCase( LOCALE_TURKISH ) );
assertUnicodeEquals( "'iIıİ'.toLowerCase('tr')=='iııi'", "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
iIıİ.toLowerCase( LOCALE_TURKISH ) );
assertUnicodeEquals( "'iIıİ'.toUpperCase('en')=='IIIİ'", "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
iIıİ.toUpperCase( Locale.ENGLISH ) );
String lower = iIıİ.toLowerCase( Locale.ENGLISH ); // on some platforms, ends with extra COMBINED DOT ABOVE
assertUnicodeEquals( "'iIıİ'.toLowerCase('en')=='iiıi'", "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i
+ ( lower.length() > 4 ? COMBINING_DOT_ABOVE : "" ), lower );
// check equalsIgnoreCase() , which has no locale
for ( int i = 0; i < iIıİ.length(); i++ )
{
char currentI = iIıİ.charAt( i );
StringBuilder sb = new StringBuilder( iIıİ.length() );
for ( int j = 0; j < iIıİ.length(); j++ )
{
sb.append( currentI );
}
String current = sb.toString();
assertTrue( "'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase( iIıİ ) );
}
}
/**
* Assert equals, and in case the result isn't as expected, display content unicode-escaped.
* @param message
* @param expected
* @param actual
*/
private void assertUnicodeEquals( String message, String expected, String actual )
{
if ( expected.equals( actual ) )
{
return;
}
throw new ComparisonFailure( message, StringEscapeUtils.escapeJava( expected ),
StringEscapeUtils.escapeJava( actual ) );
}
/**
* Test case change on all ascii characters with every available locale, to check that turkish i is the only
* exception on these characters.
*/
@Test
public void testAsciiAvailableLocales()
{
final String lower = "abcdefghijklmnopqrstuvwxyz";
final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for ( Locale locale : Locale.getAvailableLocales() )
{
// check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
Locale.setDefault( locale );
assertEquals( lower.toUpperCase(), lower.toUpperCase( locale ) );
assertEquals( upper.toLowerCase(), upper.toLowerCase( locale ) );
// check result
String expectedToUpperCase = upper;
String expectedToLowerCase = lower;
if ( LOCALE_TURKISH.getLanguage().equals( locale.getLanguage() ) )
{
expectedToUpperCase = upper.replace( DOTLESS_I, DOTTED_I );
expectedToLowerCase = lower.replace( DOTTED_i, DOTLESS_i );
}
assertEquals( "'" + lower + "'.toUpperCase('" + locale.toString() + "')", expectedToUpperCase,
lower.toUpperCase( locale ) );
assertEquals( "'" + upper + "'.toLowerCase('" + locale.toString() + "')", expectedToLowerCase,
upper.toLowerCase( locale ) );
// check that toLowerCase on lower and toUpperCase on upper don't cause harm
assertEquals( "'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase( locale ) );
assertEquals( "'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase( locale ) );
// check equalsIgnoreCase
assertTrue( "'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase( lower ) );
assertTrue( "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
upper.equalsIgnoreCase( expectedToLowerCase ) );
assertTrue( "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
expectedToUpperCase.equalsIgnoreCase( lower ) );
}
}
}