ORC-616: In Patched Base encoding, the value of headerThirdByte goes beyond the range of byte
Fixes #501
Signed-off-by: Owen O'Malley <omalley@apache.org>
diff --git a/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java b/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
index 9107774..cfc1e53 100644
--- a/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
+++ b/java/core/src/java/org/apache/orc/impl/RunLengthIntegerWriterV2.java
@@ -124,6 +124,7 @@
static final int MAX_SCOPE = 512;
static final int MIN_REPEAT = 3;
+ static final long BASE_VALUE_LIMIT = 1l << 56;
private static final int MAX_SHORT_REPEAT_LENGTH = 10;
private long prevDelta = 0;
private int fixedRunLength = 0;
@@ -529,7 +530,7 @@
// fallback to DIRECT encoding.
// The decision to use patched base was based on zigzag values, but the
// actual patching is done on base reduced literals.
- if ((brBits100p - brBits95p) != 0) {
+ if ((brBits100p - brBits95p) != 0 && Math.abs(min) < BASE_VALUE_LIMIT) {
encoding = EncodingType.PATCHED_BASE;
preparePatchedBlob();
return;
diff --git a/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java b/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
index 4d5fc03..39b0213 100644
--- a/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
+++ b/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
@@ -18,6 +18,7 @@
package org.apache.orc.impl;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -36,6 +37,8 @@
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.PhysicalWriter;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.impl.writer.StreamOptions;
@@ -64,7 +67,7 @@
fs.delete(testFilePath, false);
}
- private void appendInt(VectorizedRowBatch batch, int i) {
+ private void appendInt(VectorizedRowBatch batch, long i) {
((LongColumnVector) batch.cols[0]).vector[batch.size++] = i;
}
@@ -311,6 +314,42 @@
System.setOut(origOut);
}
+ @Test
+ public void testBaseValueLimit() throws Exception {
+ TypeDescription schema = TypeDescription.createInt();
+ Writer w = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .compress(CompressionKind.NONE)
+ .setSchema(schema)
+ .rowIndexStride(0)
+ .encodingStrategy(OrcFile.EncodingStrategy.COMPRESSION)
+ .version(OrcFile.Version.V_0_12)
+ );
+
+ VectorizedRowBatch batch = schema.createRowBatch();
+ //the minimum value is beyond RunLengthIntegerWriterV2.BASE_VALUE_LIMIT
+ long[] input = {-9007199254740992l,-8725724278030337l,-1125762467889153l, -1l,-9007199254740992l,
+ -9007199254740992l, -497l,127l,-1l,-72057594037927936l,-4194304l,-9007199254740992l,-4503599593816065l,
+ -4194304l,-8936830510563329l,-9007199254740992l, -1l, -70334384439312l,-4063233l, -6755399441973249l};
+ for(long data: input) {
+ appendInt(batch, data);
+ }
+ w.addRowBatch(batch);
+ w.close();
+
+ try(Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs))) {
+ RecordReader rows = reader.rows();
+ batch = reader.getSchema().createRowBatch();
+ long[] output = null;
+ while (rows.nextBatch(batch)) {
+ output = new long[batch.size];
+ System.arraycopy(((LongColumnVector) batch.cols[0]).vector, 0, output, 0, batch.size);
+ }
+ assertArrayEquals(input, output);
+ }
+ }
+
static class TestOutputCatcher implements PhysicalWriter.OutputReceiver {
int currentBuffer = 0;
List<ByteBuffer> buffers = new ArrayList<ByteBuffer>();