| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.maven.shared.utils; |
| |
| import java.util.Locale; |
| |
| import org.apache.commons.text.StringEscapeUtils; |
| import org.junit.AfterClass; |
| import org.junit.Assert; |
| import org.junit.ComparisonFailure; |
| import org.junit.Test; |
| |
| /** |
| * Test case for character case changes, to precisely point the situations when character case comparison doesn't |
| * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()} |
| * (platform locale dependent, with sometimes unexpected results) |
| * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>. |
| * |
| * @author Hervé Boutemy |
| * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a> |
| */ |
| public class CaseTest extends Assert { |
| private static final Locale LOCALE_TURKISH = new Locale("tr"); |
| |
| /** common ASCII 'i' */ |
| private static final char DOTTED_i = '\u0069'; |
| |
| /** common ASCII 'I' */ |
| private static final char DOTLESS_I = '\u0049'; |
| |
| /** turkish dotless i = ı */ |
| private static final char DOTLESS_i = '\u0131'; |
| |
| /** turkish dotted I = İ */ |
| private static final char DOTTED_I = '\u0130'; |
| |
| /** http://en.wikipedia.org/wiki/Dot_(diacritic) */ |
| private static final char COMBINING_DOT_ABOVE = '\u0307'; |
| |
| private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault(); |
| |
| @AfterClass |
| public static void restoreDefaultLocale() { |
| Locale.setDefault(SAVED_DEFAULT_LOCALE); |
| } |
| |
| /** |
| * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i). |
| * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a> |
| */ |
| @Test |
| public void testTurkishI() { |
| // check common i and I |
| assertEquals("common lowercase i should have a dot", 'i', DOTTED_i); |
| assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I); |
| |
| final String iIıİ = "iIıİ"; |
| |
| // check source encoding doesn't wreck havoc */ |
| assertUnicodeEquals( |
| "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I); |
| |
| // check toUpperCase and toLowerCase difference with turkish and english locales |
| assertUnicodeEquals( |
| "'iIıİ'.toUpperCase('tr')=='İIIİ'", |
| "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I, |
| iIıİ.toUpperCase(LOCALE_TURKISH)); |
| assertUnicodeEquals( |
| "'iIıİ'.toLowerCase('tr')=='iııi'", |
| "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i, |
| iIıİ.toLowerCase(LOCALE_TURKISH)); |
| assertUnicodeEquals( |
| "'iIıİ'.toUpperCase('en')=='IIIİ'", |
| "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I, |
| iIıİ.toUpperCase(Locale.ENGLISH)); |
| String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE |
| assertUnicodeEquals( |
| "'iIıİ'.toLowerCase('en')=='iiıi'", |
| "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""), |
| lower); |
| |
| // check equalsIgnoreCase() , which has no locale |
| for (int i = 0; i < iIıİ.length(); i++) { |
| char currentI = iIıİ.charAt(i); |
| |
| StringBuilder sb = new StringBuilder(iIıİ.length()); |
| for (int j = 0; j < iIıİ.length(); j++) { |
| sb.append(currentI); |
| } |
| String current = sb.toString(); |
| |
| assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ)); |
| } |
| } |
| |
| /** |
| * Assert equals, and in case the result isn't as expected, display content unicode-escaped. |
| * @param message |
| * @param expected |
| * @param actual |
| */ |
| private void assertUnicodeEquals(String message, String expected, String actual) { |
| if (expected.equals(actual)) { |
| return; |
| } |
| |
| throw new ComparisonFailure( |
| message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual)); |
| } |
| |
| /** |
| * Test case change on all ascii characters with every available locale, to check that turkish i is the only |
| * exception on these characters. |
| */ |
| @Test |
| public void testAsciiAvailableLocales() { |
| final String lower = "abcdefghijklmnopqrstuvwxyz"; |
| final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| |
| for (Locale locale : Locale.getAvailableLocales()) { |
| // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale) |
| Locale.setDefault(locale); |
| assertEquals(lower.toUpperCase(), lower.toUpperCase(locale)); |
| assertEquals(upper.toLowerCase(), upper.toLowerCase(locale)); |
| |
| // check result |
| String expectedToUpperCase = upper; |
| String expectedToLowerCase = lower; |
| if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage()) |
| || new Locale("az").getLanguage().equals(locale.getLanguage())) { |
| expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I); |
| expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i); |
| } |
| |
| assertEquals( |
| "'" + lower + "'.toUpperCase('" + locale.toString() + "')", |
| expectedToUpperCase, |
| lower.toUpperCase(locale)); |
| assertEquals( |
| "'" + upper + "'.toLowerCase('" + locale.toString() + "')", |
| expectedToLowerCase, |
| upper.toLowerCase(locale)); |
| |
| // check that toLowerCase on lower and toUpperCase on upper don't cause harm |
| assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale)); |
| assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale)); |
| |
| // check equalsIgnoreCase |
| assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower)); |
| assertTrue( |
| "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')", |
| upper.equalsIgnoreCase(expectedToLowerCase)); |
| assertTrue( |
| "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')", |
| expectedToUpperCase.equalsIgnoreCase(lower)); |
| } |
| } |
| } |