src/test/java/org/apache/maven/shared/utils/CaseTest.java - maven-shared-utils - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.maven.shared.utils;

 import java.util.Locale;

 import org.apache.commons.text.StringEscapeUtils;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.ComparisonFailure;
 import org.junit.Test;

 /**
  * Test case for character case changes, to precisely point the situations when character case comparison doesn't
  * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
  * (platform locale dependent, with sometimes unexpected results)
  * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
  *
  * @author Hervé Boutemy
  * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
  */
 public class CaseTest extends Assert {
     private static final Locale LOCALE_TURKISH = new Locale("tr");

     /** common ASCII 'i' */
     private static final char DOTTED_i = '\u0069';

     /** common ASCII 'I' */
     private static final char DOTLESS_I = '\u0049';

     /** turkish dotless i = ı */
     private static final char DOTLESS_i = '\u0131';

     /** turkish dotted I = İ */
     private static final char DOTTED_I = '\u0130';

     /** http://en.wikipedia.org/wiki/Dot_(diacritic) */
     private static final char COMBINING_DOT_ABOVE = '\u0307';

     private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();

     @AfterClass
     public static void restoreDefaultLocale() {
         Locale.setDefault(SAVED_DEFAULT_LOCALE);
     }

     /**
      * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
      * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
      */
     @Test
     public void testTurkishI() {
         // check common i and I
         assertEquals("common lowercase i should have a dot", 'i', DOTTED_i);
         assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I);

         final String iIıİ = "iIıİ";

         // check source encoding doesn't wreck havoc */
         assertUnicodeEquals(
                 "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I);

         // check toUpperCase and toLowerCase difference with turkish and english locales
         assertUnicodeEquals(
                 "'iIıİ'.toUpperCase('tr')=='İIIİ'",
                 "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                 iIıİ.toUpperCase(LOCALE_TURKISH));
         assertUnicodeEquals(
                 "'iIıİ'.toLowerCase('tr')=='iııi'",
                 "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
                 iIıİ.toLowerCase(LOCALE_TURKISH));
         assertUnicodeEquals(
                 "'iIıİ'.toUpperCase('en')=='IIIİ'",
                 "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                 iIıİ.toUpperCase(Locale.ENGLISH));
         String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE
         assertUnicodeEquals(
                 "'iIıİ'.toLowerCase('en')=='iiıi'",
                 "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""),
                 lower);

         // check equalsIgnoreCase() , which has no locale
         for (int i = 0; i < iIıİ.length(); i++) {
             char currentI = iIıİ.charAt(i);

             StringBuilder sb = new StringBuilder(iIıİ.length());
             for (int j = 0; j < iIıİ.length(); j++) {
                 sb.append(currentI);
             }
             String current = sb.toString();

             assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ));
         }
     }

     /**
      * Assert equals, and in case the result isn't as expected, display content unicode-escaped.
      * @param message
      * @param expected
      * @param actual
      */
     private void assertUnicodeEquals(String message, String expected, String actual) {
         if (expected.equals(actual)) {
             return;
         }

         throw new ComparisonFailure(
                 message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual));
     }

     /**
      * Test case change on all ascii characters with every available locale, to check that turkish i is the only
      * exception on these characters.
      */
     @Test
     public void testAsciiAvailableLocales() {
         final String lower = "abcdefghijklmnopqrstuvwxyz";
         final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

         for (Locale locale : Locale.getAvailableLocales()) {
             // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
             Locale.setDefault(locale);
             assertEquals(lower.toUpperCase(), lower.toUpperCase(locale));
             assertEquals(upper.toLowerCase(), upper.toLowerCase(locale));

             // check result
             String expectedToUpperCase = upper;
             String expectedToLowerCase = lower;
             if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage())
                     || new Locale("az").getLanguage().equals(locale.getLanguage())) {
                 expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I);
                 expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i);
             }

             assertEquals(
                     "'" + lower + "'.toUpperCase('" + locale.toString() + "')",
                     expectedToUpperCase,
                     lower.toUpperCase(locale));
             assertEquals(
                     "'" + upper + "'.toLowerCase('" + locale.toString() + "')",
                     expectedToLowerCase,
                     upper.toLowerCase(locale));

             // check that toLowerCase on lower and toUpperCase on upper don't cause harm
             assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale));
             assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale));

             // check equalsIgnoreCase
             assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower));
             assertTrue(
                     "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
                     upper.equalsIgnoreCase(expectedToLowerCase));
             assertTrue(
                     "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
                     expectedToUpperCase.equalsIgnoreCase(lower));
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.maven.shared.utils;

	import java.util.Locale;

	import org.apache.commons.text.StringEscapeUtils;
	import org.junit.AfterClass;
	import org.junit.Assert;
	import org.junit.ComparisonFailure;
	import org.junit.Test;

	/**
	* Test case for character case changes, to precisely point the situations when character case comparison doesn't
	* give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
	* (platform locale dependent, with sometimes unexpected results)
	* but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
	*
	* @author Hervé Boutemy
	* @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
	*/
	public class CaseTest extends Assert {
	private static final Locale LOCALE_TURKISH = new Locale("tr");

	/** common ASCII 'i' */
	private static final char DOTTED_i = '\u0069';

	/** common ASCII 'I' */
	private static final char DOTLESS_I = '\u0049';

	/** turkish dotless i = ı */
	private static final char DOTLESS_i = '\u0131';

	/** turkish dotted I = İ */
	private static final char DOTTED_I = '\u0130';

	/** http://en.wikipedia.org/wiki/Dot_(diacritic) */
	private static final char COMBINING_DOT_ABOVE = '\u0307';

	private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();

	@AfterClass
	public static void restoreDefaultLocale() {
	Locale.setDefault(SAVED_DEFAULT_LOCALE);
	}

	/**
	* test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
	* @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
	*/
	@Test
	public void testTurkishI() {
	// check common i and I
	assertEquals("common lowercase i should have a dot", 'i', DOTTED_i);
	assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I);

	final String iIıİ = "iIıİ";

	// check source encoding doesn't wreck havoc */
	assertUnicodeEquals(
	"misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I);

	// check toUpperCase and toLowerCase difference with turkish and english locales
	assertUnicodeEquals(
	"'iIıİ'.toUpperCase('tr')=='İIIİ'",
	"" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
	iIıİ.toUpperCase(LOCALE_TURKISH));
	assertUnicodeEquals(
	"'iIıİ'.toLowerCase('tr')=='iııi'",
	"" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
	iIıİ.toLowerCase(LOCALE_TURKISH));
	assertUnicodeEquals(
	"'iIıİ'.toUpperCase('en')=='IIIİ'",
	"" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
	iIıİ.toUpperCase(Locale.ENGLISH));
	String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE
	assertUnicodeEquals(
	"'iIıİ'.toLowerCase('en')=='iiıi'",
	"" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""),
	lower);

	// check equalsIgnoreCase() , which has no locale
	for (int i = 0; i < iIıİ.length(); i++) {
	char currentI = iIıİ.charAt(i);

	StringBuilder sb = new StringBuilder(iIıİ.length());
	for (int j = 0; j < iIıİ.length(); j++) {
	sb.append(currentI);
	}
	String current = sb.toString();

	assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ));
	}
	}

	/**
	* Assert equals, and in case the result isn't as expected, display content unicode-escaped.
	* @param message
	* @param expected
	* @param actual
	*/
	private void assertUnicodeEquals(String message, String expected, String actual) {
	if (expected.equals(actual)) {
	return;
	}

	throw new ComparisonFailure(
	message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual));
	}

	/**
	* Test case change on all ascii characters with every available locale, to check that turkish i is the only
	* exception on these characters.
	*/
	@Test
	public void testAsciiAvailableLocales() {
	final String lower = "abcdefghijklmnopqrstuvwxyz";
	final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

	for (Locale locale : Locale.getAvailableLocales()) {
	// check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
	Locale.setDefault(locale);
	assertEquals(lower.toUpperCase(), lower.toUpperCase(locale));
	assertEquals(upper.toLowerCase(), upper.toLowerCase(locale));

	// check result
	String expectedToUpperCase = upper;
	String expectedToLowerCase = lower;
	if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage())
	\|\| new Locale("az").getLanguage().equals(locale.getLanguage())) {
	expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I);
	expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i);
	}

	assertEquals(
	"'" + lower + "'.toUpperCase('" + locale.toString() + "')",
	expectedToUpperCase,
	lower.toUpperCase(locale));
	assertEquals(
	"'" + upper + "'.toLowerCase('" + locale.toString() + "')",
	expectedToLowerCase,
	upper.toLowerCase(locale));

	// check that toLowerCase on lower and toUpperCase on upper don't cause harm
	assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale));
	assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale));

	// check equalsIgnoreCase
	assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower));
	assertTrue(
	"'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
	upper.equalsIgnoreCase(expectedToLowerCase));
	assertTrue(
	"'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
	expectedToUpperCase.equalsIgnoreCase(lower));
	}
	}
	}