OPENNLP-1268 -- fix StringUtil.toLowerCase() to work on codepoints, not chars (#356)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
index c3bd7e6..88f0fa6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java
@@ -66,20 +66,15 @@
 
   /**
    * Converts to lower case independent of the current locale via
-   * {@link Character#toLowerCase(char)} which uses mapping information
+   * {@link Character#toLowerCase(int)} which uses mapping information
    * from the UnicodeData file.
    *
    * @param string
    * @return lower cased String
    */
   public static String toLowerCase(CharSequence string) {
-    char[] lowerCaseChars = new char[string.length()];
-
-    for (int i = 0; i < string.length(); i++) {
-      lowerCaseChars[i] = Character.toLowerCase(string.charAt(i));
-    }
-
-    return new String(lowerCaseChars);
+    int[] cp = string.codePoints().map(Character::toLowerCase).toArray();
+    return new String(cp, 0, cp.length);
   }
 
   /**
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java
index f2cc41f..4aa0b59 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java
@@ -60,4 +60,12 @@
     StringUtil.isEmpty(null);
   }
 
+  @Test
+  public void testLowercaseBeyondBMP() throws Exception {
+    int[] codePoints = new int[]{65,66578,67};    //A,Deseret capital BEE,C
+    int[] expectedCodePoints = new int[]{97,66618,99};//a,Deseret lowercase b,c
+    String input = new String(codePoints, 0, codePoints.length);
+    String lc = StringUtil.toLowerCase(input);
+    Assert.assertArrayEquals(expectedCodePoints, lc.codePoints().toArray());
+  }
 }