Bugfix for UnicodeHelper in Bindy component (#3702)
* Bugfix for UnicodeHelper.indexOf(...).
* Default for counting chars is now codepoints like in XMLSchema.
Co-authored-by: Michael Greulich <michael.greulich@interface-ag.de>
diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java
index f55e4e2..6a30753 100644
--- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java
+++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/UnicodeHelper.java
@@ -117,25 +117,24 @@
* @see String#indexOf(String)
*/
public int indexOf(final String str) {
+ return indexOf(str, 0);
+ }
+
+ /**
+ * @see String#indexOf(String, int)
+ */
+ public int indexOf(final String str, final int fromIndex) {
split();
- final int tempIdx = input.indexOf(str);
- if (tempIdx < 0) {
- return tempIdx;
- }
-
- for (int b = 0; b < splitted.size() - 1; b++) {
- if (tempIdx == splitted.get(b)) {
- for (int e = b + 1; e < splitted.size() - 1; e++) {
- if (tempIdx + str.length() == splitted.get(e)) {
- return b;
- }
- }
+ final int len = new UnicodeHelper(str, method).length();
+
+ for (int index = fromIndex; index + len < length(); index++) {
+ if (str.equals(input.substring(splitted.get(index), splitted.get(index + len)))) {
+ return index;
}
}
-
- final String cps = str.codePoints().mapToObj(cp -> String.format("0x%X", cp)).collect(Collectors.joining(","));
- throw new IllegalArgumentException("Given string (" + cps + ") is not a valid sequence of " + this.method + "s.");
+
+ return -1;
}
private void split() {
diff --git a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java
index d8d93ae..ebfbe73 100644
--- a/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java
+++ b/components/camel-bindy/src/main/java/org/apache/camel/dataformat/bindy/annotation/FixedLengthRecord.java
@@ -102,5 +102,5 @@
/**
* Indicates how chars are counted
*/
- boolean countGrapheme() default true;
+ boolean countGrapheme() default false;
}
diff --git a/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java
index 6765887..b2e7fdb 100644
--- a/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java
+++ b/components/camel-bindy/src/test/java/org/apache/camel/dataformat/bindy/UnicodeHelperTest.java
@@ -167,49 +167,43 @@
public void testIndexOf() {
final UnicodeHelper lh = new UnicodeHelper("a", Method.CODEPOINTS);
Assert.assertEquals(-1, lh.indexOf("b"));
-
+
final UnicodeHelper lh2 = new UnicodeHelper(
- "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z",
+ "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z"
+ + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z",
Method.CODEPOINTS);
-
+
Assert.assertEquals(1, lh2.indexOf(new String(Character.toChars(0x1f600))));
+ Assert.assertEquals(14, lh2.indexOf(new String(Character.toChars(0x1f600)), 13));
Assert.assertEquals(3, lh2.indexOf(UCSTR));
-
+ Assert.assertEquals(16, lh2.indexOf(UCSTR, 13));
+
Assert.assertEquals(10, lh2.indexOf("\u035f"));
-
- expectIllegalArgumentException(() -> {
- lh2.indexOf(Character.toString(Character.toChars(0x1f600)[0])); // UTF-16 surrogates are no codepoints.
- });
+ Assert.assertEquals(23, lh2.indexOf("\u035f", 13));
}
-
+
@Test
public void testIndexOf2() {
final UnicodeHelper lh = new UnicodeHelper("a", Method.GRAPHEME);
Assert.assertEquals(-1, lh.indexOf("b"));
-
+
final UnicodeHelper lh2 = new UnicodeHelper(
- "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z",
- Method.GRAPHEME);
-
+ "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z"
+ + "a" + new String(Character.toChars(0x1f600)) + "a" + UCSTR + "A" + "k\u035fh" + "z",
+ Method.GRAPHEME
+ );
+
Assert.assertEquals(1, lh2.indexOf(new String(Character.toChars(0x1f600))));
-
+ Assert.assertEquals(9, lh2.indexOf(new String(Character.toChars(0x1f600)), 8));
+
Assert.assertEquals(3, lh2.indexOf(UCSTR));
+ Assert.assertEquals(11, lh2.indexOf(UCSTR), 8);
- expectIllegalArgumentException(() -> {
- lh2.indexOf("\u035f"); // Codepoint of dangling combing char is not a "unicode char".
- });
- }
-
- private void expectIllegalArgumentException(final Runnable r) {
- try {
- r.run();
- Assert.assertTrue("We do not expect to reach here -- missing IllegalArgumentException.", false);
-
- } catch (final IllegalArgumentException e) {
- LOG.debug("Caught expected IllegalArgumentException", e);
-
- }
+ final UnicodeHelper lh3 = new UnicodeHelper("mm̂mm̂m", Method.GRAPHEME);
+ Assert.assertEquals(0, lh3.indexOf("m"));
+ Assert.assertEquals(2, lh3.indexOf("m", 1));
+ Assert.assertEquals(3, lh3.indexOf("m̂", 2));
}
private static String cps2String(final int... cps) {