[JOHNZON-403] improved long string perfomance (#121)
* prototype improved large string parsing
* fix regression, fix build
* properly close JsonReader in test
diff --git a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
index d9b5338..dede892 100644
--- a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
+++ b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java
@@ -28,6 +28,8 @@
import java.io.Reader;
import java.math.BigDecimal;
import java.nio.charset.Charset;
+import java.util.LinkedList;
+import java.util.List;
import java.util.NoSuchElementException;
//This class represents either the Json tokenizer and the Json parser.
@@ -74,6 +76,8 @@
private char[] fallBackCopyBuffer;
private boolean releaseFallBackCopyBufferLength = true;
private int fallBackCopyBufferLength;
+ // when boundaries of fallBackCopyBuffer have been reached
+ private List<Buffer> previousFallBackCopyBuffers;
// location (line, column, offset)
// We try to calculate this efficiently so we do not just increment the values per char read
@@ -115,6 +119,16 @@
}
}
+ private static final class Buffer {
+ private char[] buffer;
+ private int length;
+
+ public Buffer(char[] buffer, int length) {
+ this.buffer = buffer;
+ this.length = length;
+ }
+ }
+
//detect charset according to RFC 4627
public JsonStreamParserImpl(final InputStream inputStream, final int maxStringLength,
final BufferStrategy.BufferProvider<char[]> bufferProvider, final BufferStrategy.BufferProvider<char[]> valueBuffer,
@@ -165,7 +179,7 @@
//append a single char to the value buffer
private void appendToCopyBuffer(final char c) {
if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - 1) {
- doAutoAdjust(1);
+ createNewFallBackCopyBuffer();
}
fallBackCopyBuffer[fallBackCopyBufferLength++] = c;
}
@@ -180,40 +194,39 @@
}
if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - length) { // not good at runtime but handled
- doAutoAdjust(length);
- } else {
- System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length);
+ createNewFallBackCopyBuffer();
}
+
+ System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length);
fallBackCopyBufferLength += length;
}
startOfValueInBuffer = endOfValueInBuffer = -1;
}
- private void doAutoAdjust(final int length) {
+ // Creates new fallBackCopyBuffer and stores the old instance in previousFallBackCopyBuffers,
+ // this is much faster than resizing (recreating + copying) fallBackCopyBuffer
+ private void createNewFallBackCopyBuffer() {
if (!autoAdjust) {
throw new ArrayIndexOutOfBoundsException("Buffer too small for such a long string");
}
- final char[] newArray = new char[fallBackCopyBuffer.length + Math.max(getBufferExtends(fallBackCopyBuffer.length), length)];
- // TODO: log to adjust size once?
- System.arraycopy(fallBackCopyBuffer, 0, newArray, 0, fallBackCopyBufferLength);
- if (startOfValueInBuffer != -1) {
- System.arraycopy(buffer, startOfValueInBuffer, newArray, fallBackCopyBufferLength, length);
+ if (previousFallBackCopyBuffers == null) {
+ previousFallBackCopyBuffers = new LinkedList<>();
}
- if (releaseFallBackCopyBufferLength) {
- bufferProvider.release(fallBackCopyBuffer);
- releaseFallBackCopyBufferLength = false;
- }
- fallBackCopyBuffer = newArray;
+
+ previousFallBackCopyBuffers.add(new Buffer(fallBackCopyBuffer, fallBackCopyBufferLength));
+ fallBackCopyBuffer = valueProvider.newBuffer();
+ fallBackCopyBufferLength = 0;
}
- /**
- * @param currentLength length of the buffer
- * @return the amount of bytes the current buffer should get extended with
- */
- protected int getBufferExtends(int currentLength) {
- return currentLength / 4;
+ private void releasePreviousFallBackCopyBuffers() {
+ if (previousFallBackCopyBuffers == null) {
+ return;
+ }
+
+ previousFallBackCopyBuffers.forEach(it -> valueProvider.release(it.buffer));
+ previousFallBackCopyBuffers = null;
}
@@ -443,6 +456,7 @@
currentIntegralNumber = Integer.MIN_VALUE;
}
+ releasePreviousFallBackCopyBuffers();
if (fallBackCopyBufferLength != 0) {
fallBackCopyBufferLength = 0;
}
@@ -898,6 +912,7 @@
@Override
public String getString() {
if (previousEvent == KEY_NAME || previousEvent == VALUE_STRING || previousEvent == VALUE_NUMBER) {
+ combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
return fallBackCopyBufferLength > 0 ? new String(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : new String(buffer,
@@ -907,6 +922,30 @@
}
}
+ // Combines all old stored fallback buffers into the current fallback buffer again so we have a char[] to easily access
+ // Releases all previous fallback buffers while doing so
+ private void combinePreviousFallbackBuffersToCurrent() {
+ if (previousFallBackCopyBuffers == null) {
+ return;
+ }
+
+ int newSize = previousFallBackCopyBuffers.stream().mapToInt(it -> it.length).sum() + fallBackCopyBufferLength;
+ char[] newBuffer = new char[newSize];
+
+ int index = 0;
+ for (Buffer buffer : previousFallBackCopyBuffers) {
+ System.arraycopy(buffer.buffer, 0, newBuffer, index, buffer.length);
+ index += buffer.length;
+ }
+
+ System.arraycopy(fallBackCopyBuffer, 0, newBuffer, index, fallBackCopyBufferLength);
+ index += fallBackCopyBufferLength;
+
+ releasePreviousFallBackCopyBuffers();
+ fallBackCopyBuffer = newBuffer;
+ fallBackCopyBufferLength = index;
+ }
+
@Override
public boolean isIntegralNumber() {
@@ -929,6 +968,7 @@
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return currentIntegralNumber;
} else if (isCurrentNumberIntegral) {
+ combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
final Integer retVal = fallBackCopyBufferLength > 0 ? parseIntegerFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength)
: parseIntegerFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer);
@@ -949,6 +989,7 @@
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return currentIntegralNumber;
} else if (isCurrentNumberIntegral) {
+ combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
final Long retVal = fallBackCopyBufferLength > 0 ? parseLongFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength)
: parseLongFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer);
@@ -984,6 +1025,8 @@
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return new BigDecimal(currentIntegralNumber);
}
+
+ combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
return (/*currentBigDecimalNumber = */fallBackCopyBufferLength > 0 ? new BigDecimal(fallBackCopyBuffer, 0,
fallBackCopyBufferLength) : new BigDecimal(buffer, startOfValueInBuffer, (endOfValueInBuffer - startOfValueInBuffer)));
@@ -1004,6 +1047,7 @@
if (releaseFallBackCopyBufferLength) {
valueProvider.release(fallBackCopyBuffer);
}
+ releasePreviousFallBackCopyBuffers();
try {
in.close();
diff --git a/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java b/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java
new file mode 100644
index 0000000..72ce072
--- /dev/null
+++ b/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.johnzon.core;
+
+import jakarta.json.Json;
+import jakarta.json.JsonReader;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.StringReader;
+
+@Ignore
+public class HugeStringTest {
+ @Test
+ public void test() {
+ String json = "{\"data\":\"" + "a".repeat(50 * 1024 * 1024 + 1) + "\"}";
+
+ // Warmup
+ for (int i = 0; i < 10; i++) {
+ try (JsonReader reader = Json.createReader(new StringReader(json))) {
+ reader.readObject();
+ }
+ }
+
+ long start = System.currentTimeMillis();
+ try (JsonReader reader = Json.createReader(new StringReader(json))) {
+ reader.readObject();
+ }
+ System.err.println("Took " + (System.currentTimeMillis() - start) + "ms");
+ }
+}