Research/encoding exp (#10572)
* add doublebuff
* apply BUFF completely
* fix bug
* add chimp
* save vscode setting
* add test
* fix buff, fix test
* modify test
* fix plain decoder on int
* fix style
* add text test
* fix
* Update EncodeTest.java
* Update EncodeTextTest.java
---------
Co-authored-by: qcloud <ubuntu@localhost.localdomain>
diff --git a/client-cpp/src/main/Session.h b/client-cpp/src/main/Session.h
index 3f21f0e..119b39b 100644
--- a/client-cpp/src/main/Session.h
+++ b/client-cpp/src/main/Session.h
@@ -176,7 +176,9 @@
AC = (char) 14,
SPRINTZ = (char) 15,
RAKE = (char) 16,
- RLBE = (char) 17
+ RLBE = (char) 17,
+ BUFF = (char) 18,
+ CHIMP = (char) 19
};
}
diff --git a/client-py/iotdb/utils/IoTDBConstants.py b/client-py/iotdb/utils/IoTDBConstants.py
index 9b9c1cc..e10b7a2 100644
--- a/client-py/iotdb/utils/IoTDBConstants.py
+++ b/client-py/iotdb/utils/IoTDBConstants.py
@@ -68,6 +68,8 @@
SPRINTZ = 15
RAKE = 16
RLBE = 17
+ BUFF = 18
+ CHIMP = 19
# this method is implemented to avoid the issue reported by:
# https://bugs.python.org/issue30545
diff --git a/server/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java b/server/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
index 58a2831..a57e0f6 100644
--- a/server/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
+++ b/server/src/main/java/org/apache/iotdb/db/utils/SchemaUtils.java
@@ -71,6 +71,8 @@
intSet.add(TSEncoding.SPRINTZ);
intSet.add(TSEncoding.RAKE);
intSet.add(TSEncoding.RLBE);
+ intSet.add(TSEncoding.BUFF);
+ intSet.add(TSEncoding.CHIMP);
schemaChecker.put(TSDataType.INT32, intSet);
schemaChecker.put(TSDataType.INT64, intSet);
@@ -84,6 +86,8 @@
floatSet.add(TSEncoding.SPRINTZ);
floatSet.add(TSEncoding.RAKE);
floatSet.add(TSEncoding.RLBE);
+ floatSet.add(TSEncoding.BUFF);
+ floatSet.add(TSEncoding.CHIMP);
schemaChecker.put(TSDataType.FLOAT, floatSet);
schemaChecker.put(TSDataType.DOUBLE, floatSet);
diff --git a/tsfile/pom.xml b/tsfile/pom.xml
index 203f2a3..fa3c66c 100644
--- a/tsfile/pom.xml
+++ b/tsfile/pom.xml
@@ -32,7 +32,7 @@
<description>A columnar file format designed for time-series data</description>
<url>https://github.com/thulab/iotdb/tree/master/tsfile</url>
<properties>
- <tsfile.test.skip>true</tsfile.test.skip>
+ <tsfile.test.skip>false</tsfile.test.skip>
<tsfile.it.skip>${tsfile.test.skip}</tsfile.it.skip>
<tsfile.ut.skip>${tsfile.test.skip}</tsfile.ut.skip>
</properties>
@@ -94,6 +94,11 @@
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>net.sourceforge.javacsv</groupId>
+ <artifactId>javacsv</artifactId>
+ <version>2.0</version>
+ </dependency>
<!-- antlr -->
<dependency>
<groupId>org.apache.iotdb</groupId>
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/Decoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/Decoder.java
index 0a11049..a7007d8 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/Decoder.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/Decoder.java
@@ -172,6 +172,32 @@
default:
throw new TsFileDecodingException(String.format(ERROR_MSG, encoding, dataType));
}
+ case BUFF:
+ switch (dataType) {
+ case INT32:
+ return new IntBUFFDecoder();
+ case INT64:
+ return new LongBUFFDecoder();
+ case FLOAT:
+ return new FloatBUFFDecoder();
+ case DOUBLE:
+ return new DoubleBUFFDecoder();
+ default:
+ throw new TsFileDecodingException(String.format(ERROR_MSG, encoding, dataType));
+ }
+ case CHIMP:
+ switch (dataType) {
+ case FLOAT:
+ return new SinglePrecisionChimpDecoder();
+ case DOUBLE:
+ return new DoublePrecisionChimpDecoder();
+ case INT32:
+ return new IntChimpDecoder();
+ case INT64:
+ return new LongChimpDecoder();
+ default:
+ throw new TsFileDecodingException(String.format(ERROR_MSG, encoding, dataType));
+ }
default:
throw new TsFileDecodingException(String.format(ERROR_MSG, encoding, dataType));
}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoubleBUFFDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoubleBUFFDecoder.java
new file mode 100644
index 0000000..3cdb6eb
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoubleBUFFDecoder.java
@@ -0,0 +1,86 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class DoubleBUFFDecoder extends Decoder {
+
+ private boolean readMeta;
+ private long minValue;
+ private int countA, countB, n;
+
+ private byte buffer = 0;
+ private int bitsLeft = 0;
+
+ public DoubleBUFFDecoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public boolean hasNext(ByteBuffer in) throws IOException {
+ if (!readMeta) readMeta(in);
+ return n > 0;
+ }
+
+ @Override
+ public double readDouble(ByteBuffer in) {
+ if (!readMeta) readMeta(in);
+ long partA = readBits(in, countA);
+ double partB = 0, base = 1;
+ for (int i = 0; i < countB; i++) {
+ base /= 2;
+ if (readBit(in)) partB += base;
+ }
+ n--;
+ return minValue + partA + partB;
+ }
+
+ @Override
+ public void reset() {
+ readMeta = false;
+
+ buffer = 0;
+ bitsLeft = 0;
+ }
+
+ private void readMeta(ByteBuffer in) {
+ n = (int) readBits(in, Integer.SIZE);
+ if (n > 0) {
+ countA = (int) readBits(in, Integer.SIZE);
+ countB = (int) readBits(in, Integer.SIZE);
+ minValue = (int) readBits(in, Long.SIZE);
+ }
+ readMeta = true;
+ }
+
+ protected long readBits(ByteBuffer in, int len) {
+ long result = 0;
+ for (int i = 0; i < len; i++) {
+ result <<= 1;
+ if (readBit(in)) result |= 1;
+ }
+ return result;
+ }
+
+ /**
+ * Reads the next bit and returns a boolean representing it.
+ *
+ * @return true if the next bit is 1, otherwise 0.
+ */
+ protected boolean readBit(ByteBuffer in) {
+ flipByte(in);
+ boolean bit = ((buffer >> (bitsLeft - 1)) & 1) == 1;
+ bitsLeft--;
+ return bit;
+ }
+
+ protected void flipByte(ByteBuffer in) {
+ if (bitsLeft == 0) {
+ buffer = in.get();
+ bitsLeft = Byte.SIZE;
+ }
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoublePrecisionChimpDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoublePrecisionChimpDecoder.java
new file mode 100644
index 0000000..a6e8ddf
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/DoublePrecisionChimpDecoder.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class DoublePrecisionChimpDecoder extends LongChimpDecoder {
+
+ private static final long CHIMP_ENCODING_ENDING = Double.doubleToRawLongBits(Double.NaN);
+
+ @Override
+ public final double readDouble(ByteBuffer in) {
+ return Double.longBitsToDouble(readLong(in));
+ }
+
+ @Override
+ protected long cacheNext(ByteBuffer in) {
+ readNext(in);
+ if (storedValues[current] == CHIMP_ENCODING_ENDING) {
+ hasNext = false;
+ }
+ return storedValues[current];
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/FloatBUFFDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/FloatBUFFDecoder.java
new file mode 100644
index 0000000..d2aba8e
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/FloatBUFFDecoder.java
@@ -0,0 +1,86 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class FloatBUFFDecoder extends Decoder {
+
+ private boolean readMeta;
+ private int minValue;
+ private int countA, countB, n;
+
+ private byte buffer = 0;
+ private int bitsLeft = 0;
+
+ public FloatBUFFDecoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public boolean hasNext(ByteBuffer in) throws IOException {
+ if (!readMeta) readMeta(in);
+ return n > 0;
+ }
+
+ @Override
+ public float readFloat(ByteBuffer in) {
+ if (!readMeta) readMeta(in);
+ int partA = readBits(in, countA);
+ float partB = 0, base = 1;
+ for (int i = 0; i < countB; i++) {
+ base /= 2;
+ if (readBit(in)) partB += base;
+ }
+ n--;
+ return minValue + partA + partB;
+ }
+
+ @Override
+ public void reset() {
+ readMeta = false;
+
+ buffer = 0;
+ bitsLeft = 0;
+ }
+
+ private void readMeta(ByteBuffer in) {
+ n = (int) readBits(in, Integer.SIZE);
+ if (n > 0) {
+ countA = (int) readBits(in, Integer.SIZE);
+ countB = (int) readBits(in, Integer.SIZE);
+ minValue = (int) readBits(in, Integer.SIZE);
+ }
+ readMeta = true;
+ }
+
+ protected int readBits(ByteBuffer in, int len) {
+ int result = 0;
+ for (int i = 0; i < len; i++) {
+ result <<= 1;
+ if (readBit(in)) result |= 1;
+ }
+ return result;
+ }
+
+ /**
+ * Reads the next bit and returns a boolean representing it.
+ *
+ * @return true if the next bit is 1, otherwise 0.
+ */
+ protected boolean readBit(ByteBuffer in) {
+ flipByte(in);
+ boolean bit = ((buffer >> (bitsLeft - 1)) & 1) == 1;
+ bitsLeft--;
+ return bit;
+ }
+
+ protected void flipByte(ByteBuffer in) {
+ if (bitsLeft == 0) {
+ buffer = in.get();
+ bitsLeft = Byte.SIZE;
+ }
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntBUFFDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntBUFFDecoder.java
new file mode 100644
index 0000000..2bf93b0
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntBUFFDecoder.java
@@ -0,0 +1,81 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class IntBUFFDecoder extends Decoder {
+
+ private boolean readMeta;
+ private int minValue;
+ private int countA, countB, n;
+
+ private byte buffer = 0;
+ private int bitsLeft = 0;
+
+ public IntBUFFDecoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public boolean hasNext(ByteBuffer in) throws IOException {
+ if (!readMeta) readMeta(in);
+ return n > 0;
+ }
+
+ @Override
+ public int readInt(ByteBuffer in) {
+ if (!readMeta) readMeta(in);
+ int partA = readBits(in, countA);
+ n--;
+ return minValue + partA;
+ }
+
+ @Override
+ public void reset() {
+ readMeta = false;
+
+ buffer = 0;
+ bitsLeft = 0;
+ }
+
+ private void readMeta(ByteBuffer in) {
+ n = (int) readBits(in, Integer.SIZE);
+ if (n > 0) {
+ countA = (int) readBits(in, Integer.SIZE);
+ countB = (int) readBits(in, Integer.SIZE);
+ minValue = (int) readBits(in, Integer.SIZE);
+ }
+ readMeta = true;
+ }
+
+ protected int readBits(ByteBuffer in, int len) {
+ int result = 0;
+ for (int i = 0; i < len; i++) {
+ result <<= 1;
+ if (readBit(in)) result |= 1;
+ }
+ return result;
+ }
+
+ /**
+ * Reads the next bit and returns a boolean representing it.
+ *
+ * @return true if the next bit is 1, otherwise 0.
+ */
+ protected boolean readBit(ByteBuffer in) {
+ flipByte(in);
+ boolean bit = ((buffer >> (bitsLeft - 1)) & 1) == 1;
+ bitsLeft--;
+ return bit;
+ }
+
+ protected void flipByte(ByteBuffer in) {
+ if (bitsLeft == 0) {
+ buffer = in.get();
+ bitsLeft = Byte.SIZE;
+ }
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntChimpDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntChimpDecoder.java
new file mode 100644
index 0000000..41bae7e
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/IntChimpDecoder.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.VALUE_BITS_LENGTH_32BIT;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class IntChimpDecoder extends GorillaDecoderV2 {
+
+ private static final short[] LEADING_REPRESENTATION = {0, 8, 12, 16, 18, 20, 22, 24};
+ private static final int PREVIOUS_VALUES = 64;
+ private static final int PREVIOUS_VALUES_LOG2 = (int) (Math.log(PREVIOUS_VALUES) / Math.log(2));
+ private static final int CASE_ONE_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 8;
+
+ private int storedValue = 0;
+ protected int storedValues[] = new int[PREVIOUS_VALUES];
+ protected int current = 0;
+
+ public IntChimpDecoder() {
+ this.setType(TSEncoding.CHIMP);
+ this.hasNext = true;
+ firstValueWasRead = false;
+ storedLeadingZeros = Integer.MAX_VALUE;
+ storedTrailingZeros = 0;
+ this.current = 0;
+ this.storedValue = 0;
+ this.storedValues = new int[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+
+ this.current = 0;
+ this.storedValue = 0;
+ this.storedValues = new int[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public final int readInt(ByteBuffer in) {
+ int returnValue = storedValue;
+ if (!firstValueWasRead) {
+ flipByte(in);
+ storedValue = (int) readLong(VALUE_BITS_LENGTH_32BIT, in);
+ storedValues[current] = storedValue;
+ firstValueWasRead = true;
+ returnValue = storedValue;
+ }
+ cacheNext(in);
+ return returnValue;
+ }
+
+ protected int cacheNext(ByteBuffer in) {
+ readNext(in);
+ if (storedValues[current] == Integer.MIN_VALUE) {
+ hasNext = false;
+ }
+ return storedValues[current];
+ }
+
+ protected int readNext(ByteBuffer in) {
+ // read the two control bits
+ byte controlBits = readNextNBits(2, in);
+ int value;
+ switch (controlBits) {
+ case 3:
+ // case 11: read the length of the number of leading
+ // zeros in the next 3 bits, then read the
+ // meaningful bits of the XORed value.
+ storedLeadingZeros = LEADING_REPRESENTATION[(int) readLong(3, in)];
+ value = (int) readLong(VALUE_BITS_LENGTH_32BIT - storedLeadingZeros, in);
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 10: use the previous leading zeros and
+ // and just read the meaningful XORed value.
+ case 2:
+ value = (int) readLong(VALUE_BITS_LENGTH_32BIT - storedLeadingZeros, in);
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 01: read the index of the previous value, the length of
+ // the number of leading zeros in the next 3 bits, then read
+ // the length of the meaningful XORed value in the next 5
+ // bits. Finally read the meaningful bits of the XORed value.
+ case 1:
+ int fill = CASE_ONE_METADATA_LENGTH;
+ int temp = (int) readLong(fill, in);
+ int index = temp >>> (fill -= PREVIOUS_VALUES_LOG2) & (1 << PREVIOUS_VALUES_LOG2) - 1;
+ storedLeadingZeros = LEADING_REPRESENTATION[temp >>> (fill -= 3) & (1 << 3) - 1];
+ int significantBits = temp >>> (fill -= 5) & (1 << 5) - 1;
+ storedValue = storedValues[index];
+ if (significantBits == 0) {
+ significantBits = VALUE_BITS_LENGTH_32BIT;
+ }
+ storedTrailingZeros = VALUE_BITS_LENGTH_32BIT - significantBits - storedLeadingZeros;
+ value =
+ (int) readLong(VALUE_BITS_LENGTH_32BIT - storedLeadingZeros - storedTrailingZeros, in);
+ value <<= storedTrailingZeros;
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 00: the values are identical, just read
+ // the index of the previous value
+ default:
+ int previousIndex = (int) readLong(PREVIOUS_VALUES_LOG2, in);
+ storedValue = storedValues[previousIndex];
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ }
+ }
+
+ private byte readNextNBits(int n, ByteBuffer in) {
+ byte value = 0x00;
+ for (int i = 0; i < n; i++) {
+ value <<= 1;
+ if (readBit(in)) {
+ value |= 0x01;
+ }
+ }
+ return value;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongBUFFDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongBUFFDecoder.java
new file mode 100644
index 0000000..9faa182
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongBUFFDecoder.java
@@ -0,0 +1,81 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class LongBUFFDecoder extends Decoder {
+
+ private boolean readMeta;
+ private long minValue;
+ private int countA, countB, n;
+
+ private byte buffer = 0;
+ private int bitsLeft = 0;
+
+ public LongBUFFDecoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public boolean hasNext(ByteBuffer in) throws IOException {
+ if (!readMeta) readMeta(in);
+ return n > 0;
+ }
+
+ @Override
+ public long readLong(ByteBuffer in) {
+ if (!readMeta) readMeta(in);
+ long partA = readBits(in, countA);
+ n--;
+ return minValue + partA;
+ }
+
+ @Override
+ public void reset() {
+ readMeta = false;
+
+ buffer = 0;
+ bitsLeft = 0;
+ }
+
+ private void readMeta(ByteBuffer in) {
+ n = (int) readBits(in, Integer.SIZE);
+ if (n > 0) {
+ countA = (int) readBits(in, Integer.SIZE);
+ countB = (int) readBits(in, Integer.SIZE);
+ minValue = (int) readBits(in, Long.SIZE);
+ }
+ readMeta = true;
+ }
+
+ protected long readBits(ByteBuffer in, int len) {
+ long result = 0;
+ for (int i = 0; i < len; i++) {
+ result <<= 1;
+ if (readBit(in)) result |= 1;
+ }
+ return result;
+ }
+
+ /**
+ * Reads the next bit and returns a boolean representing it.
+ *
+ * @return true if the next bit is 1, otherwise 0.
+ */
+ protected boolean readBit(ByteBuffer in) {
+ flipByte(in);
+ boolean bit = ((buffer >> (bitsLeft - 1)) & 1) == 1;
+ bitsLeft--;
+ return bit;
+ }
+
+ protected void flipByte(ByteBuffer in) {
+ if (bitsLeft == 0) {
+ buffer = in.get();
+ bitsLeft = Byte.SIZE;
+ }
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongChimpDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongChimpDecoder.java
new file mode 100644
index 0000000..59d063c
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/LongChimpDecoder.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.VALUE_BITS_LENGTH_64BIT;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class LongChimpDecoder extends GorillaDecoderV2 {
+
+ private static final short[] LEADING_REPRESENTATION = {0, 8, 12, 16, 18, 20, 22, 24};
+ private static final int PREVIOUS_VALUES = 128;
+ private static final int PREVIOUS_VALUES_LOG2 = (int) (Math.log(PREVIOUS_VALUES) / Math.log(2));
+ private static final int CASE_ONE_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 9;
+
+ private long storedValue = 0;
+ protected long storedValues[] = new long[PREVIOUS_VALUES];
+ protected int current = 0;
+
+ public LongChimpDecoder() {
+ this.setType(TSEncoding.CHIMP);
+ this.hasNext = true;
+ firstValueWasRead = false;
+ storedLeadingZeros = Integer.MAX_VALUE;
+ storedTrailingZeros = 0;
+ this.current = 0;
+ this.storedValue = 0;
+ this.storedValues = new long[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+
+ this.current = 0;
+ this.storedValue = 0;
+ this.storedValues = new long[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public final long readLong(ByteBuffer in) {
+ long returnValue = storedValue;
+ if (!firstValueWasRead) {
+ flipByte(in);
+ storedValue = readLong(VALUE_BITS_LENGTH_64BIT, in);
+ storedValues[current] = storedValue;
+ firstValueWasRead = true;
+ returnValue = storedValue;
+ }
+ cacheNext(in);
+ return returnValue;
+ }
+
+ protected long cacheNext(ByteBuffer in) {
+ readNext(in);
+ if (storedValues[current] == Long.MIN_VALUE) {
+ hasNext = false;
+ }
+ return storedValues[current];
+ }
+
+ protected long readNext(ByteBuffer in) {
+ // read the two control bits
+ byte controlBits = readNextNBits(2, in);
+ long value;
+ switch (controlBits) {
+ // case 11: read the length of the number of leading
+ // zeros in the next 3 bits, then read the
+ // meaningful bits of the XORed value.
+ case 3:
+ storedLeadingZeros = LEADING_REPRESENTATION[(int) readLong(3, in)];
+ value = readLong(VALUE_BITS_LENGTH_64BIT - storedLeadingZeros, in);
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 10: use the previous leading zeros and
+ // and just read the meaningful XORed value.
+ case 2:
+ value = readLong(VALUE_BITS_LENGTH_64BIT - storedLeadingZeros, in);
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 01: read the index of the previous value, the length of
+ // the number of leading zeros in the next 3 bits, then read
+ // the length of the meaningful XORed value in the next 6
+ // bits. Finally read the meaningful bits of the XORed value.
+ case 1:
+ int fill = CASE_ONE_METADATA_LENGTH;
+ int temp = (int) readLong(fill, in);
+ int index = temp >>> (fill -= PREVIOUS_VALUES_LOG2) & (1 << PREVIOUS_VALUES_LOG2) - 1;
+ storedLeadingZeros = LEADING_REPRESENTATION[temp >>> (fill -= 3) & (1 << 3) - 1];
+ int significantBits = temp >>> (fill -= 6) & (1 << 6) - 1;
+ storedValue = storedValues[index];
+ if (significantBits == 0) {
+ significantBits = VALUE_BITS_LENGTH_64BIT;
+ }
+ storedTrailingZeros = VALUE_BITS_LENGTH_64BIT - significantBits - storedLeadingZeros;
+ value = readLong(VALUE_BITS_LENGTH_64BIT - storedLeadingZeros - storedTrailingZeros, in);
+ value <<= storedTrailingZeros;
+ storedValue = storedValue ^ value;
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ // case 00: the values are identical, just read
+ // the index of the previous value
+ default:
+ int previousIndex = (int) readLong(PREVIOUS_VALUES_LOG2, in);
+ storedValue = storedValues[previousIndex];
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = storedValue;
+ return storedValue;
+ }
+ }
+
+ private byte readNextNBits(int n, ByteBuffer in) {
+ byte value = 0x00;
+ for (int i = 0; i < n; i++) {
+ value <<= 1;
+ if (readBit(in)) {
+ value |= 0x01;
+ }
+ }
+ return value;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/PlainDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/PlainDecoder.java
index 4af1779..7b21c03 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/PlainDecoder.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/PlainDecoder.java
@@ -22,7 +22,6 @@
import org.apache.iotdb.tsfile.exception.encoding.TsFileDecodingException;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.Binary;
-import org.apache.iotdb.tsfile.utils.ReadWriteForEncodingUtils;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
@@ -45,7 +44,8 @@
@Override
public int readInt(ByteBuffer buffer) {
- return ReadWriteForEncodingUtils.readVarInt(buffer);
+ return buffer.getInt();
+ // return ReadWriteForEncodingUtils.readVarInt(buffer);
}
@Override
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/SinglePrecisionChimpDecoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/SinglePrecisionChimpDecoder.java
new file mode 100644
index 0000000..33a2290
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/decoder/SinglePrecisionChimpDecoder.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import java.nio.ByteBuffer;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class SinglePrecisionChimpDecoder extends IntChimpDecoder {
+
+ private static final int CHIMP_ENCODING_ENDING = Float.floatToRawIntBits(Float.NaN);
+
+ @Override
+ public final float readFloat(ByteBuffer in) {
+ return Float.intBitsToFloat(readInt(in));
+ }
+
+ @Override
+ protected int cacheNext(ByteBuffer in) {
+ readNext(in);
+ if (storedValues[current] == CHIMP_ENCODING_ENDING) {
+ hasNext = false;
+ }
+ return storedValues[current];
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoubleBUFFEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoubleBUFFEncoder.java
new file mode 100644
index 0000000..f903aad
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoubleBUFFEncoder.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class DoubleBUFFEncoder extends Encoder {
+
+ private static final int lenlen = 10;
+ private static final int perlen = 5;
+ private static final int maxlen = 16;
+ private static final int[] len = {0, 5, 8, 11, 15, 18, 21, 25, 28, 31, 35};
+ private static final double eps = 1e-4;
+
+ private int maxFloatLength;
+ private boolean first;
+ private long minValue, maxValue;
+ private int countA, countB;
+ private List<Double> li;
+ private byte buffer = 0;
+ protected long bitsLeft = Byte.SIZE;
+
+ private void reset() {
+ maxFloatLength = 0;
+ first = true;
+ li = new ArrayList<>();
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+
+ public DoubleBUFFEncoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public void encode(double value, ByteArrayOutputStream out) {
+ if (first) {
+ minValue = (long) Math.floor(value);
+ maxValue = (long) Math.ceil(value);
+ first = false;
+ } else {
+ minValue = Math.min(minValue, (long) Math.floor(value));
+ maxValue = Math.max(maxValue, (long) Math.ceil(value));
+ }
+ double tmp = value;
+ tmp -= Math.floor(tmp + eps);
+ int curFloatLength = 0;
+ while (tmp > eps && curFloatLength < maxlen) {
+ curFloatLength++;
+ tmp *= 10;
+ tmp -= Math.floor(tmp + eps);
+ }
+ maxFloatLength = Math.max(maxFloatLength, curFloatLength);
+ li.add(value);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) throws IOException {
+ if (first) {
+ writeBits(li.size(), Integer.SIZE, out);
+ flushBits(out);
+ reset();
+ return;
+ }
+ calc();
+ writeBits(li.size(), Integer.SIZE, out);
+ writeBits(countA, Integer.SIZE, out);
+ writeBits(countB, Integer.SIZE, out);
+ writeBits(minValue, Long.SIZE, out);
+ for (double value : li) {
+ long partA = (long) Math.floor(value) - minValue;
+ writeBits(partA, countA, out);
+ double partB = value - Math.floor(value);
+ for (int i = 0; i < countB; i++) {
+ partB *= 2;
+ if (partB >= 1) {
+ writeBit(out);
+ partB -= 1;
+ } else skipBit(out);
+ }
+ }
+ flushBits(out);
+ reset();
+ }
+
+ protected void writeBits(long value, int len, ByteArrayOutputStream out) {
+ if (len == 0) return;
+ writeBits(value >>> 1, len - 1, out);
+ if ((value & 1) == 0) skipBit(out);
+ else writeBit(out);
+ }
+
+ protected void flushBits(ByteArrayOutputStream out) {
+ while (bitsLeft != Byte.SIZE) skipBit(out);
+ }
+
+ /** Stores a 0 and increases the count of bits by 1 */
+ protected void skipBit(ByteArrayOutputStream out) {
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ /** Stores a 1 and increases the count of bits by 1 */
+ protected void writeBit(ByteArrayOutputStream out) {
+ buffer |= (1 << (bitsLeft - 1));
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ protected void flipByte(ByteArrayOutputStream out) {
+ if (bitsLeft == 0) {
+ out.write(buffer);
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+ }
+
+ @Override
+ public int getOneItemMaxSize() {
+ if (first) return 0;
+ calc();
+ return countA + countB;
+ }
+
+ @Override
+ public long getMaxByteSize() {
+ if (first) return 0;
+ calc();
+ return (countA + countB) * li.size() + Integer.SIZE * 3 + Long.SIZE;
+ }
+
+ private void calc() {
+ maxFloatLength = Math.min(maxFloatLength, maxlen);
+ countA = Long.SIZE - Long.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoublePrecisionChimpEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoublePrecisionChimpEncoder.java
new file mode 100644
index 0000000..cf762de
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/DoublePrecisionChimpEncoder.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class DoublePrecisionChimpEncoder extends LongChimpEncoder {
+
+ private static final long CHIMP_ENCODING_ENDING = Double.doubleToRawLongBits(Double.NaN);
+
+ @Override
+ public final void encode(double value, ByteArrayOutputStream out) {
+ encode(Double.doubleToRawLongBits(value), out);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) {
+ // ending stream
+ encode(CHIMP_ENCODING_ENDING, out);
+
+ // flip the byte no matter it is empty or not
+ // the empty ending byte is necessary when decoding
+ bitsLeft = 0;
+ flipByte(out);
+
+ // the encoder may be reused, so let us reset it
+ reset();
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/FloatBUFFEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/FloatBUFFEncoder.java
new file mode 100644
index 0000000..e8eb69c
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/FloatBUFFEncoder.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class FloatBUFFEncoder extends Encoder {
+
+ private static final int lenlen = 10;
+ private static final int perlen = 5;
+ private static final int maxlen = 10;
+ private static final int[] len = {0, 5, 8, 11, 15, 18, 21, 25, 28, 31, 35};
+ private static final float eps = (float) 1e-4;
+
+ private int maxFloatLength;
+ private boolean first;
+ private int minValue, maxValue;
+ private int countA, countB;
+ private List<Float> li;
+ private byte buffer = 0;
+ protected int bitsLeft = Byte.SIZE;
+
+ private void reset() {
+ maxFloatLength = 0;
+ first = true;
+ li = new ArrayList<>();
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+
+ public FloatBUFFEncoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public void encode(float value, ByteArrayOutputStream out) {
+ if (first) {
+ minValue = (int) Math.floor(value);
+ maxValue = (int) Math.ceil(value);
+ first = false;
+ } else {
+ minValue = Math.min(minValue, (int) Math.floor(value));
+ maxValue = Math.max(maxValue, (int) Math.ceil(value));
+ }
+ float tmp = value;
+ tmp -= Math.floor(tmp + eps);
+ int curFloatLength = 0;
+ while (tmp > eps && curFloatLength < maxlen) {
+ curFloatLength++;
+ tmp *= 10;
+ tmp -= Math.floor(tmp + eps);
+ }
+ maxFloatLength = Math.max(maxFloatLength, curFloatLength);
+ li.add(value);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) throws IOException {
+ if (first) {
+ writeBits(li.size(), Integer.SIZE, out);
+ flushBits(out);
+ reset();
+ return;
+ }
+ calc();
+ writeBits(li.size(), Integer.SIZE, out);
+ writeBits(countA, Integer.SIZE, out);
+ writeBits(countB, Integer.SIZE, out);
+ writeBits(minValue, Integer.SIZE, out);
+ for (float value : li) {
+ int partA = (int) Math.floor(value) - minValue;
+ writeBits(partA, countA, out);
+ float partB = (float) (value - Math.floor(value));
+ for (int i = 0; i < countB; i++) {
+ partB *= 2;
+ if (partB >= 1) {
+ writeBit(out);
+ partB -= 1;
+ } else skipBit(out);
+ }
+ }
+ flushBits(out);
+ reset();
+ }
+
+ protected void writeBits(int value, int len, ByteArrayOutputStream out) {
+ if (len == 0) return;
+ writeBits(value >>> 1, len - 1, out);
+ if ((value & 1) == 0) skipBit(out);
+ else writeBit(out);
+ }
+
+ protected void flushBits(ByteArrayOutputStream out) {
+ while (bitsLeft != Byte.SIZE) skipBit(out);
+ }
+
+ /** Stores a 0 and increases the count of bits by 1 */
+ protected void skipBit(ByteArrayOutputStream out) {
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ /** Stores a 1 and increases the count of bits by 1 */
+ protected void writeBit(ByteArrayOutputStream out) {
+ buffer |= (1 << (bitsLeft - 1));
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ protected void flipByte(ByteArrayOutputStream out) {
+ if (bitsLeft == 0) {
+ out.write(buffer);
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+ }
+
+ @Override
+ public int getOneItemMaxSize() {
+ if (first) return 0;
+ calc();
+ return countA + countB;
+ }
+
+ @Override
+ public long getMaxByteSize() {
+ if (first) return 0;
+ calc();
+ return (countA + countB) * li.size() + Integer.SIZE * 3 + Integer.SIZE;
+ }
+
+ private void calc() {
+ maxFloatLength = Math.min(maxFloatLength, maxlen);
+ countA = Long.SIZE - Long.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntBUFFEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntBUFFEncoder.java
new file mode 100644
index 0000000..de1cffc
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntBUFFEncoder.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class IntBUFFEncoder extends Encoder {
+
+ private static final int lenlen = 10;
+ private static final int perlen = 5;
+ private static final int[] len = {0, 5, 8, 11, 15, 18, 21, 25, 28, 31, 35};
+
+ private int maxFloatLength;
+ private boolean first;
+ private int minValue, maxValue;
+ private int countA, countB;
+ private List<Integer> li;
+ private byte buffer = 0;
+ protected int bitsLeft = Byte.SIZE;
+
+ private void reset() {
+ maxFloatLength = 0;
+ first = true;
+ li = new ArrayList<>();
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+
+ public IntBUFFEncoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public void encode(int value, ByteArrayOutputStream out) {
+ if (first) {
+ minValue = value;
+ maxValue = value;
+ first = false;
+ } else {
+ minValue = Math.min(minValue, value);
+ maxValue = Math.max(maxValue, value);
+ }
+ int curFloatLength = 0;
+ maxFloatLength = Math.max(maxFloatLength, curFloatLength);
+ li.add(value);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) throws IOException {
+ if (first) {
+ writeBits(li.size(), Integer.SIZE, out);
+ flushBits(out);
+ reset();
+ return;
+ }
+ countA = Integer.SIZE - Integer.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ writeBits(li.size(), Integer.SIZE, out);
+ writeBits(countA, Integer.SIZE, out);
+ writeBits(countB, Integer.SIZE, out);
+ writeBits(minValue, Integer.SIZE, out);
+ for (int value : li) {
+ int partA = value - minValue;
+ writeBits(partA, countA, out);
+ }
+ flushBits(out);
+ reset();
+ }
+
+ protected void writeBits(int value, int len, ByteArrayOutputStream out) {
+ if (len == 0) return;
+ writeBits(value >>> 1, len - 1, out);
+ if ((value & 1) == 0) skipBit(out);
+ else writeBit(out);
+ }
+
+ protected void flushBits(ByteArrayOutputStream out) {
+ while (bitsLeft != Byte.SIZE) skipBit(out);
+ }
+
+ /** Stores a 0 and increases the count of bits by 1 */
+ protected void skipBit(ByteArrayOutputStream out) {
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ /** Stores a 1 and increases the count of bits by 1 */
+ protected void writeBit(ByteArrayOutputStream out) {
+ buffer |= (1 << (bitsLeft - 1));
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ protected void flipByte(ByteArrayOutputStream out) {
+ if (bitsLeft == 0) {
+ out.write(buffer);
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+ }
+
+ @Override
+ public int getOneItemMaxSize() {
+ if (first) return 0;
+ countA = Integer.SIZE - Integer.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ return countA + countB;
+ }
+
+ @Override
+ public long getMaxByteSize() {
+ if (first) return 0;
+ countA = Integer.SIZE - Integer.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ return (countA + countB) * li.size() + Integer.SIZE * 3 + Integer.SIZE;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntChimpEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntChimpEncoder.java
new file mode 100644
index 0000000..1f79249
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/IntChimpEncoder.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.LEADING_ZERO_BITS_LENGTH_32BIT;
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.MEANINGFUL_XOR_BITS_LENGTH_32BIT;
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.VALUE_BITS_LENGTH_32BIT;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class IntChimpEncoder extends GorillaEncoderV2 {
+
+ private static final int PREVIOUS_VALUES = 64;
+ private static final int PREVIOUS_VALUES_LOG2 = (int) (Math.log(PREVIOUS_VALUES) / Math.log(2));
+ private static final int THRESHOLD = 5 + PREVIOUS_VALUES_LOG2;
+ private static final int SET_LSB = (int) Math.pow(2, THRESHOLD + 1) - 1;
+ private static final int CASE_ZERO_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 2;
+ private static final int CASE_ONE_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 10;
+ public static final short[] LEADING_REPRESENTATION = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+ };
+
+ public static final short[] LEADING_ROUND = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
+ };
+
+ private int storedValues[];
+ private int[] indices;
+ private int index = 0;
+ private int current = 0;
+
+ public IntChimpEncoder() {
+ this.setType(TSEncoding.CHIMP);
+ this.indices = new int[(int) Math.pow(2, THRESHOLD + 1)];
+ this.storedValues = new int[PREVIOUS_VALUES];
+ }
+
+ private static final int ONE_ITEM_MAX_SIZE =
+ (2
+ + LEADING_ZERO_BITS_LENGTH_32BIT
+ + MEANINGFUL_XOR_BITS_LENGTH_32BIT
+ + VALUE_BITS_LENGTH_32BIT)
+ / Byte.SIZE
+ + 1;
+
+ @Override
+ public final int getOneItemMaxSize() {
+ return ONE_ITEM_MAX_SIZE;
+ }
+
+ @Override
+ protected void reset() {
+ super.reset();
+ this.current = 0;
+ this.index = 0;
+ this.indices = new int[(int) Math.pow(2, THRESHOLD + 1)];
+ this.storedValues = new int[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) {
+ // ending stream
+ encode(Integer.MIN_VALUE, out);
+
+ // flip the byte no matter it is empty or not
+ // the empty ending byte is necessary when decoding
+ bitsLeft = 0;
+ flipByte(out);
+
+ // the encoder may be reused, so let us reset it
+ reset();
+ }
+
+ @Override
+ public final void encode(int value, ByteArrayOutputStream out) {
+ if (firstValueWasWritten) {
+ compressValue(value, out);
+ } else {
+ writeFirst(value, out);
+ firstValueWasWritten = true;
+ }
+ }
+
+ // the first value is stored with no compression
+ private void writeFirst(int value, ByteArrayOutputStream out) {
+ storedValues[current] = value;
+ writeBits(value, VALUE_BITS_LENGTH_32BIT, out);
+ indices[value & SET_LSB] = index;
+ }
+
+ private void compressValue(int value, ByteArrayOutputStream out) {
+ // find the best previous value
+ int key = value & SET_LSB;
+ int xor;
+ int previousIndex;
+ int trailingZeros = 0;
+ int currIndex = indices[key];
+ if ((index - currIndex) < PREVIOUS_VALUES) {
+ int tempXor = value ^ storedValues[currIndex % PREVIOUS_VALUES];
+ trailingZeros = Integer.numberOfTrailingZeros(tempXor);
+ if (trailingZeros > THRESHOLD) {
+ previousIndex = currIndex % PREVIOUS_VALUES;
+ xor = tempXor;
+ } else {
+ previousIndex = index % PREVIOUS_VALUES;
+ xor = storedValues[previousIndex] ^ value;
+ }
+ } else {
+ previousIndex = index % PREVIOUS_VALUES;
+ xor = storedValues[previousIndex] ^ value;
+ }
+
+ // case 00: the values are identical, write 00 control bits
+ // and the index of the previous value
+ if (xor == 0) {
+ writeBits(previousIndex, CASE_ZERO_METADATA_LENGTH, out);
+ storedLeadingZeros = VALUE_BITS_LENGTH_32BIT + 1;
+ } else {
+ int leadingZeros = LEADING_ROUND[Integer.numberOfLeadingZeros(xor)];
+ // case 01: store the index, the length of
+ // the number of leading zeros in the next 3 bits, then store
+ // the length of the meaningful XORed value in the next 5
+ // bits. Finally store the meaningful bits of the XORed value.
+ if (trailingZeros > THRESHOLD) {
+ int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros - trailingZeros;
+ writeBits(
+ 256 * (PREVIOUS_VALUES + previousIndex)
+ + 32 * LEADING_REPRESENTATION[leadingZeros]
+ + significantBits,
+ CASE_ONE_METADATA_LENGTH,
+ out);
+ writeBits(xor >>> trailingZeros, significantBits, out); // Store the meaningful bits of XOR
+ storedLeadingZeros = VALUE_BITS_LENGTH_32BIT + 1;
+ // case 10: If the number of leading zeros is exactly
+ // equal to the previous leading zeros, use that information
+ // and just store 01 control bits and the meaningful XORed value.
+ } else if (leadingZeros == storedLeadingZeros) {
+ writeBit(out);
+ skipBit(out);
+ int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros;
+ writeBits(xor, significantBits, out);
+ // case 11: store 11 control bits, the length of the number of leading
+ // zeros in the next 3 bits, then store the
+ // meaningful bits of the XORed value.
+ } else {
+ storedLeadingZeros = leadingZeros;
+ int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros;
+ writeBits(24 + LEADING_REPRESENTATION[leadingZeros], 5, out);
+ writeBits(xor, significantBits, out);
+ }
+ }
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = value;
+ index++;
+ indices[key] = index;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongBUFFEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongBUFFEncoder.java
new file mode 100644
index 0000000..1ea929e
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongBUFFEncoder.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class LongBUFFEncoder extends Encoder {
+
+ private static final int lenlen = 10;
+ private static final int perlen = 5;
+ private static final int[] len = {0, 5, 8, 11, 15, 18, 21, 25, 28, 31, 35};
+
+ private int maxFloatLength;
+ private boolean first;
+ private long minValue, maxValue;
+ private int countA, countB;
+ private List<Long> li;
+ private byte buffer = 0;
+ protected long bitsLeft = Byte.SIZE;
+
+ private void reset() {
+ maxFloatLength = 0;
+ first = true;
+ li = new ArrayList<>();
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+
+ public LongBUFFEncoder() {
+ super(TSEncoding.BUFF);
+ reset();
+ }
+
+ @Override
+ public void encode(long value, ByteArrayOutputStream out) {
+ if (first) {
+ minValue = value;
+ maxValue = value;
+ first = false;
+ } else {
+ minValue = Math.min(minValue, value);
+ maxValue = Math.max(maxValue, value);
+ }
+ int curFloatLength = 0;
+ maxFloatLength = Math.max(maxFloatLength, curFloatLength);
+ li.add(value);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) throws IOException {
+ if (first) {
+ writeBits(li.size(), Integer.SIZE, out);
+ flushBits(out);
+ reset();
+ return;
+ }
+ countA = Long.SIZE - Long.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ writeBits(li.size(), Integer.SIZE, out);
+ writeBits(countA, Integer.SIZE, out);
+ writeBits(countB, Integer.SIZE, out);
+ writeBits(minValue, Long.SIZE, out);
+ for (long value : li) {
+ long partA = value - minValue;
+ writeBits(partA, countA, out);
+ }
+ flushBits(out);
+ reset();
+ }
+
+ protected void writeBits(long value, int len, ByteArrayOutputStream out) {
+ if (len == 0) return;
+ writeBits(value >>> 1, len - 1, out);
+ if ((value & 1) == 0) skipBit(out);
+ else writeBit(out);
+ }
+
+ protected void flushBits(ByteArrayOutputStream out) {
+ while (bitsLeft != Byte.SIZE) skipBit(out);
+ }
+
+ /** Stores a 0 and increases the count of bits by 1 */
+ protected void skipBit(ByteArrayOutputStream out) {
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ /** Stores a 1 and increases the count of bits by 1 */
+ protected void writeBit(ByteArrayOutputStream out) {
+ buffer |= (1 << (bitsLeft - 1));
+ bitsLeft--;
+ flipByte(out);
+ }
+
+ protected void flipByte(ByteArrayOutputStream out) {
+ if (bitsLeft == 0) {
+ out.write(buffer);
+ buffer = 0;
+ bitsLeft = Byte.SIZE;
+ }
+ }
+
+ @Override
+ public int getOneItemMaxSize() {
+ if (first) return 0;
+ countA = Long.SIZE - Long.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ return countA + countB;
+ }
+
+ @Override
+ public long getMaxByteSize() {
+ if (first) return 0;
+ countA = Long.SIZE - Long.numberOfLeadingZeros(maxValue - minValue);
+ if (maxFloatLength > lenlen) countB = len[lenlen] + perlen * (maxFloatLength - lenlen);
+ else countB = len[maxFloatLength];
+ return (countA + countB) * li.size() + Integer.SIZE * 3 + Long.SIZE;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongChimpEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongChimpEncoder.java
new file mode 100644
index 0000000..170d34d
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/LongChimpEncoder.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import java.io.ByteArrayOutputStream;
+
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.LEADING_ZERO_BITS_LENGTH_64BIT;
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.MEANINGFUL_XOR_BITS_LENGTH_64BIT;
+import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.VALUE_BITS_LENGTH_64BIT;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class LongChimpEncoder extends GorillaEncoderV2 {
+
+ private static final int PREVIOUS_VALUES = 128;
+ private static final int PREVIOUS_VALUES_LOG2 = (int) (Math.log(PREVIOUS_VALUES) / Math.log(2));
+ private static final int THRESHOLD = 6 + PREVIOUS_VALUES_LOG2;
+ private static final int SET_LSB = (int) Math.pow(2, THRESHOLD + 1) - 1;
+ private static final int CASE_ZERO_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 2;
+ private static final int CASE_ONE_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 11;
+ public static final short[] LEADING_REPRESENTATION = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+ };
+
+ public static final short[] LEADING_ROUND = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
+ };
+
+ private long storedValues[];
+ private int[] indices;
+ private int index = 0;
+ private int current = 0;
+
+ public LongChimpEncoder() {
+ this.setType(TSEncoding.CHIMP);
+ this.indices = new int[(int) Math.pow(2, THRESHOLD + 1)];
+ this.storedValues = new long[PREVIOUS_VALUES];
+ }
+
+ private static final int ONE_ITEM_MAX_SIZE =
+ (2
+ + LEADING_ZERO_BITS_LENGTH_64BIT
+ + MEANINGFUL_XOR_BITS_LENGTH_64BIT
+ + VALUE_BITS_LENGTH_64BIT)
+ / Byte.SIZE
+ + 1;
+
+ public static final short[] leadingRepresentation = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+ };
+
+ public static final short[] leadingRound = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
+ };
+
+ @Override
+ public final int getOneItemMaxSize() {
+ return ONE_ITEM_MAX_SIZE;
+ }
+
+ @Override
+ protected void reset() {
+ super.reset();
+ this.current = 0;
+ this.index = 0;
+ this.indices = new int[(int) Math.pow(2, THRESHOLD + 1)];
+ this.storedValues = new long[PREVIOUS_VALUES];
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) {
+ // ending stream
+ encode(Long.MIN_VALUE, out);
+
+ // flip the byte no matter it is empty or not
+ // the empty ending byte is necessary when decoding
+ bitsLeft = 0;
+ flipByte(out);
+
+ // the encoder may be reused, so let us reset it
+ reset();
+ }
+
+ @Override
+ public final void encode(long value, ByteArrayOutputStream out) {
+ if (firstValueWasWritten) {
+ compressValue(value, out);
+ } else {
+ writeFirst(value, out);
+ firstValueWasWritten = true;
+ }
+ }
+
+ // the first value is stored with no compression
+ private void writeFirst(long value, ByteArrayOutputStream out) {
+ storedValues[current] = value;
+ writeBits(value, VALUE_BITS_LENGTH_64BIT, out);
+ indices[(int) value & SET_LSB] = index;
+ }
+
+ private void compressValue(long value, ByteArrayOutputStream out) {
+ // find the best previous value
+ int key = (int) value & SET_LSB;
+ long xor;
+ int previousIndex;
+ int trailingZeros = 0;
+ int currIndex = indices[key];
+ if ((index - currIndex) < PREVIOUS_VALUES) {
+ long tempXor = value ^ storedValues[currIndex % PREVIOUS_VALUES];
+ trailingZeros = Long.numberOfTrailingZeros(tempXor);
+ if (trailingZeros > THRESHOLD) {
+ previousIndex = currIndex % PREVIOUS_VALUES;
+ xor = tempXor;
+ } else {
+ previousIndex = index % PREVIOUS_VALUES;
+ xor = storedValues[previousIndex] ^ value;
+ }
+ } else {
+ previousIndex = index % PREVIOUS_VALUES;
+ xor = storedValues[previousIndex] ^ value;
+ }
+
+ // case 00: the values are identical, write 00 control bits
+ // and the index of the previous value
+ if (xor == 0) {
+ writeBits(previousIndex, CASE_ZERO_METADATA_LENGTH, out);
+ storedLeadingZeros = VALUE_BITS_LENGTH_64BIT + 1;
+ } else {
+ int leadingZeros = leadingRound[Long.numberOfLeadingZeros(xor)];
+ // case 01: store the index, the length of
+ // the number of leading zeros in the next 3 bits, then store
+ // the length of the meaningful XORed value in the next 6
+ // bits. Finally store the meaningful bits of the XORed value.
+ if (trailingZeros > THRESHOLD) {
+ int significantBits = VALUE_BITS_LENGTH_64BIT - leadingZeros - trailingZeros;
+ writeBits(
+ 512 * (PREVIOUS_VALUES + previousIndex)
+ + 64 * leadingRepresentation[leadingZeros]
+ + significantBits,
+ CASE_ONE_METADATA_LENGTH,
+ out);
+ writeBits(xor >>> trailingZeros, significantBits, out); // Store the meaningful bits of XOR
+ storedLeadingZeros = VALUE_BITS_LENGTH_64BIT + 1;
+ // case 10: If the number of leading zeros is exactly
+ // equal to the previous leading zeros, use that information
+ // and just store 01 control bits and the meaningful XORed value.
+ } else if (leadingZeros == storedLeadingZeros) {
+ writeBit(out);
+ skipBit(out);
+ int significantBits = VALUE_BITS_LENGTH_64BIT - leadingZeros;
+ writeBits(xor, significantBits, out);
+ // case 11: store 11 control bits, the length of the number of leading
+ // zeros in the next 3 bits, then store the
+ // meaningful bits of the XORed value.
+ } else {
+ storedLeadingZeros = leadingZeros;
+ int significantBits = VALUE_BITS_LENGTH_64BIT - leadingZeros;
+ writeBits(24 + leadingRepresentation[leadingZeros], 5, out);
+ writeBits(xor, significantBits, out);
+ }
+ }
+ current = (current + 1) % PREVIOUS_VALUES;
+ storedValues[current] = value;
+ index++;
+ indices[key] = index;
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/PlainEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/PlainEncoder.java
index 1a22718..556edea 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/PlainEncoder.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/PlainEncoder.java
@@ -24,7 +24,6 @@
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.Binary;
-import org.apache.iotdb.tsfile.utils.ReadWriteForEncodingUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,7 +61,10 @@
@Override
public void encode(int value, ByteArrayOutputStream out) {
- ReadWriteForEncodingUtils.writeVarInt(value, out);
+ for (int i = 3; i >= 0; i--) {
+ out.write((byte) (((value) >> (i * 8)) & 0xFF));
+ }
+ // ReadWriteForEncodingUtils.writeVarInt(value, out);
}
@Override
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/SinglePrecisionChimpEncoder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/SinglePrecisionChimpEncoder.java
new file mode 100644
index 0000000..b428c3d
--- /dev/null
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/SinglePrecisionChimpEncoder.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.tsfile.encoding.encoder;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * This class includes code modified from Panagiotis Liakos chimp project.
+ *
+ * <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
+ *
+ * <p>Project page: https://github.com/panagiotisl/chimp
+ *
+ * <p>License: http://www.apache.org/licenses/LICENSE-2.0
+ */
+public class SinglePrecisionChimpEncoder extends IntChimpEncoder {
+
+ private static final int CHIMP_ENCODING_ENDING = Float.floatToRawIntBits(Float.NaN);
+
+ @Override
+ public final void encode(float value, ByteArrayOutputStream out) {
+ encode(Float.floatToRawIntBits(value), out);
+ }
+
+ @Override
+ public void flush(ByteArrayOutputStream out) {
+ // ending stream
+ encode(CHIMP_ENCODING_ENDING, out);
+
+ // flip the byte no matter it is empty or not
+ // the empty ending byte is necessary when decoding
+ bitsLeft = 0;
+ flipByte(out);
+
+ // the encoder may be reused, so let us reset it
+ reset();
+ }
+}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/TSEncodingBuilder.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/TSEncodingBuilder.java
index e06ce87..079570d 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/TSEncodingBuilder.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/encoding/encoder/TSEncodingBuilder.java
@@ -87,6 +87,10 @@
return new Rake();
case RLBE:
return new RLBE();
+ case BUFF:
+ return new Buff();
+ case CHIMP:
+ return new Chimp();
default:
throw new UnsupportedOperationException(type.toString());
}
@@ -549,4 +553,52 @@
// do nothing
}
}
+
+ public static class Buff extends TSEncodingBuilder {
+ @Override
+ public Encoder getEncoder(TSDataType type) {
+ switch (type) {
+ case INT32:
+ return new IntBUFFEncoder();
+ case INT64:
+ return new LongBUFFEncoder();
+ case FLOAT:
+ return new FloatBUFFEncoder();
+ case DOUBLE:
+ return new DoubleBUFFEncoder();
+ default:
+ throw new UnSupportedDataTypeException("BUFF doesn't support data type: " + type);
+ }
+ }
+
+ @Override
+ public void initFromProps(Map<String, String> props) {
+ // do nothing
+ }
+ }
+
+ /** for FLOAT, DOUBLE, INT, LONG. */
+ public static class Chimp extends TSEncodingBuilder {
+
+ @Override
+ public Encoder getEncoder(TSDataType type) {
+ switch (type) {
+ case FLOAT:
+ return new SinglePrecisionChimpEncoder();
+ case DOUBLE:
+ return new DoublePrecisionChimpEncoder();
+ case INT32:
+ return new IntChimpEncoder();
+ case INT64:
+ return new LongChimpEncoder();
+ default:
+ throw new UnSupportedDataTypeException("CHIMP doesn't support data type: " + type);
+ }
+ }
+
+ @Override
+ public void initFromProps(Map<String, String> props) {
+ // allowed do nothing
+ }
+ }
}
diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/enums/TSEncoding.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/enums/TSEncoding.java
index 9d217d2..1fc9b09 100644
--- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/enums/TSEncoding.java
+++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/enums/TSEncoding.java
@@ -36,7 +36,9 @@
AC((byte) 14),
SPRINTZ((byte) 15),
RAKE((byte) 16),
- RLBE((byte) 17);
+ RLBE((byte) 17),
+ BUFF((byte) 18),
+ CHIMP((byte) 19);
private final byte type;
@@ -92,6 +94,10 @@
return TSEncoding.RAKE;
case 17:
return TSEncoding.RLBE;
+ case 18:
+ return TSEncoding.BUFF;
+ case 19:
+ return TSEncoding.CHIMP;
default:
throw new IllegalArgumentException("Invalid input: " + encoding);
}
diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTest.java
new file mode 100644
index 0000000..630b86a
--- /dev/null
+++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTest.java
@@ -0,0 +1,606 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.compress.ICompressor;
+import org.apache.iotdb.tsfile.compress.IUnCompressor;
+import org.apache.iotdb.tsfile.encoding.encoder.Encoder;
+import org.apache.iotdb.tsfile.encoding.encoder.TSEncodingBuilder;
+import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+
+import com.csvreader.CsvReader;
+import com.csvreader.CsvWriter;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+
+public class EncodeTest {
+
+ @Test
+ public void test() throws IOException {
+ String[] Inputs = {
+ "/home/ubuntu/Datasets/Real-World/Numerical", "/home/ubuntu/Datasets/Synthetic/Numerical"
+ };
+ String[] Outputs = {
+ "/home/ubuntu/Real_Numerical_result.csv", "/home/ubuntu/Synthetic_Numerical_result.csv"
+ };
+
+ for (int idx = 0; idx < 2; idx++) {
+
+ String Input = Inputs[idx];
+ String Output = Outputs[idx];
+ int repeatTime = 50; // set repeat time
+
+ String[] dataTypeNames = {"INT32", "INT64", "FLOAT", "DOUBLE"};
+ // select encoding algorithms
+ TSEncoding[] encodingList = {
+ TSEncoding.PLAIN,
+ TSEncoding.TS_2DIFF,
+ TSEncoding.RLE,
+ TSEncoding.SPRINTZ,
+ TSEncoding.GORILLA,
+ TSEncoding.RLBE,
+ TSEncoding.RAKE,
+ TSEncoding.BUFF,
+ TSEncoding.CHIMP
+ };
+ // select compression algorithms
+ CompressionType[] compressList = {
+ CompressionType.UNCOMPRESSED,
+ CompressionType.LZ4,
+ CompressionType.GZIP,
+ CompressionType.SNAPPY
+ };
+ String[] head = {
+ "Input Direction",
+ "Data Type",
+ "Encoding Algorithm",
+ "Compress Algorithm",
+ "Encoding Time",
+ "Decoding Time",
+ "Compress Time",
+ "Uncompress Time",
+ "Compressed Size",
+ "Compression Ratio"
+ };
+
+ CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8);
+ writer.writeRecord(head); // write header to output file
+
+ for (String dataTypeName : dataTypeNames) {
+ String inputPath = Input + "/" + dataTypeName; // the direction of input compressed data
+ File file = new File(inputPath);
+ File[] tempList = file.listFiles();
+
+ for (File dataset : tempList) {
+ File[] temp2List = dataset.listFiles();
+ for (File f : temp2List) {
+ // fileRepeat += 1;
+ InputStream inputStream = new FileInputStream(f);
+ CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8);
+ String fileName = f.getAbsolutePath();
+ // ArrayList<String> dataIndex = new ArrayList<>();
+ ArrayList<String> data = new ArrayList<>();
+
+ loader.readHeaders();
+ while (loader.readRecord()) {
+ String[] temp = loader.getValues();
+ // dataIndex.add(temp[0]);
+ data.add(temp[1]);
+ }
+ loader.close();
+ inputStream.close();
+
+ // for (int index : columnIndexes) {
+ // // add a column to "data"
+ // System.out.println(index);
+ // loader.readHeaders();
+ // while (loader.readRecord()) {
+ // data.add(loader.getValues()[index]);
+ // }
+ // // loader.close();
+ // inputStream.close();
+
+ switch (dataTypeName) {
+ case "INT32":
+ {
+ TSDataType dataType = TSDataType.INT32; // set TSDataType
+ // ArrayList<Long> tmpIndex = new ArrayList<>();
+ ArrayList<Integer> tmp = new ArrayList<>();
+ // for (String valueIndex : dataIndex) {
+ // tmpIndex.add(Long.valueOf(valueIndex));
+ // }
+ for (String value : data) {
+ tmp.add(Integer.valueOf(value));
+ }
+ // Iterate over each encoding algorithm
+ for (TSEncoding encoding : encodingList) {
+
+ // Iterate over each compression algorithm
+ for (CompressionType comp : compressList) {
+ long encodeTime = 0;
+ long decodeTime = 0;
+
+ double ratio = 0;
+ double compressed_size = 0;
+
+ long compressTime = 0;
+ long uncompressTime = 0;
+
+ // repeat many times to test time
+ for (int i = 0; i < repeatTime; i++) {
+ // TSEncodingBuilder.getEncodingBuilder(encoding).initFromProps(props);
+ // Encoder encoderIndex =
+ // TSEncodingBuilder.getEncodingBuilder(encoding)
+ // .getEncoder(TSDataType.INT64);
+ // Decoder decoderIndex = Decoder.getDecoderByType(encoding,
+ // TSDataType.INT64);
+ Encoder encoder =
+ TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType);
+ Decoder decoder = Decoder.getDecoderByType(encoding, dataType);
+
+ // ICompressor compressorIndex = ICompressor.getCompressor(comp);
+ // IUnCompressor unCompressorIndex = IUnCompressor.getUnCompressor(comp);
+ ICompressor compressor = ICompressor.getCompressor(comp);
+ IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp);
+
+ // ByteArrayOutputStream bufferIndex = new ByteArrayOutputStream();
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ // test encode time
+ long s = System.nanoTime();
+ // for (long valIndex : tmpIndex) encoderIndex.encode(valIndex,
+ // bufferIndex);
+ // encoderIndex.flush(bufferIndex);
+ for (int val : tmp) encoder.encode(val, buffer);
+ encoder.flush(buffer);
+ long e = System.nanoTime();
+ encodeTime += (e - s);
+
+ // test compress time
+ // byte[] elemsIndex = bufferIndex.toByteArray();
+ byte[] elems = buffer.toByteArray();
+ s = System.nanoTime();
+ // byte[] compressedIndex = compressorIndex.compress(elemsIndex);
+ byte[] compressed = compressor.compress(elems);
+ e = System.nanoTime();
+ compressTime += (e - s);
+
+ // test compression ratio and compressed size
+ // compressed_size += compressedIndex.length;
+ compressed_size += compressed.length;
+ double ratioTmp =
+ (double) (/* compressedIndex.length + */ compressed.length)
+ / (double)
+ (
+ /* tmpIndex.size() * Long.BYTES + */ tmp.size()
+ * Integer.BYTES);
+ ratio += ratioTmp;
+
+ // test uncompress time
+ s = System.nanoTime();
+ // byte[] xIndex = unCompressorIndex.uncompress(compressedIndex);
+ byte[] x = unCompressor.uncompress(compressed);
+ e = System.nanoTime();
+ uncompressTime += (e - s);
+
+ // test decode time
+ // ByteBuffer ebufferIndex = ByteBuffer.wrap(bufferIndex.toByteArray());
+ ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray());
+ s = System.nanoTime();
+ // while (decoderIndex.hasNext(ebufferIndex)) {
+ // decoderIndex.readLong(ebufferIndex);
+ // }
+ while (decoder.hasNext(ebuffer)) {
+ decoder.readInt(ebuffer);
+ }
+ e = System.nanoTime();
+ decodeTime += (e - s);
+
+ buffer.close();
+ }
+
+ ratio /= repeatTime;
+ compressed_size /= repeatTime;
+ encodeTime /= repeatTime;
+ decodeTime /= repeatTime;
+ compressTime /= repeatTime;
+ uncompressTime /= repeatTime;
+
+ String[] record = {
+ f.toString().replaceAll("^/home/ubuntu/", ""),
+ dataTypeName,
+ encoding.toString(),
+ comp.toString(),
+ String.valueOf(encodeTime),
+ String.valueOf(decodeTime),
+ String.valueOf(compressTime),
+ String.valueOf(uncompressTime),
+ String.valueOf(compressed_size),
+ String.valueOf(ratio)
+ };
+ writer.writeRecord(record);
+ }
+ }
+ break;
+ }
+ case "INT64":
+ {
+ TSDataType dataType = TSDataType.INT64; // set TSDataType
+ // ArrayList<Long> tmpIndex = new ArrayList<>();
+ ArrayList<Long> tmp = new ArrayList<>();
+ // for (String valueIndex : dataIndex) {
+ // tmpIndex.add(Long.valueOf(valueIndex));
+ // }
+ for (String value : data) {
+ tmp.add(Long.valueOf(value));
+ }
+ // Iterate over each encoding algorithm
+ for (TSEncoding encoding : encodingList) {
+
+ // Iterate over each compression algorithm
+ for (CompressionType comp : compressList) {
+ long encodeTime = 0;
+ long decodeTime = 0;
+
+ double ratio = 0;
+ double compressed_size = 0;
+
+ long compressTime = 0;
+ long uncompressTime = 0;
+
+ // repeat many times to test time
+ for (int i = 0; i < repeatTime; i++) {
+ // TSEncodingBuilder.getEncodingBuilder(encoding).initFromProps(props);
+ // Encoder encoderIndex =
+ // TSEncodingBuilder.getEncodingBuilder(encoding)
+ // .getEncoder(TSDataType.INT64);
+ // Decoder decoderIndex = Decoder.getDecoderByType(encoding,
+ // TSDataType.INT64);
+ Encoder encoder =
+ TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType);
+ Decoder decoder = Decoder.getDecoderByType(encoding, dataType);
+
+ // ICompressor compressorIndex = ICompressor.getCompressor(comp);
+ // IUnCompressor unCompressorIndex = IUnCompressor.getUnCompressor(comp);
+ ICompressor compressor = ICompressor.getCompressor(comp);
+ IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp);
+
+ // ByteArrayOutputStream bufferIndex = new ByteArrayOutputStream();
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ // test encode time
+ long s = System.nanoTime();
+ // for (long valIndex : tmpIndex) encoderIndex.encode(valIndex,
+ // bufferIndex);
+ // encoderIndex.flush(bufferIndex);
+ for (long val : tmp) encoder.encode(val, buffer);
+ encoder.flush(buffer);
+ long e = System.nanoTime();
+ encodeTime += (e - s);
+
+ // test compress time
+ // byte[] elemsIndex = bufferIndex.toByteArray();
+ byte[] elems = buffer.toByteArray();
+ s = System.nanoTime();
+ // byte[] compressedIndex = compressorIndex.compress(elemsIndex);
+ byte[] compressed = compressor.compress(elems);
+ e = System.nanoTime();
+ compressTime += (e - s);
+
+ // test compression ratio and compressed size
+ // compressed_size += compressedIndex.length;
+ compressed_size += compressed.length;
+ double ratioTmp =
+ (double) (/* compressedIndex.length + */ compressed.length)
+ / (double)
+ (
+ /* tmpIndex.size() * Long.BYTES + */ tmp.size() * Long.BYTES);
+ ratio += ratioTmp;
+
+ // test uncompress time
+ s = System.nanoTime();
+ // byte[] xIndex = unCompressorIndex.uncompress(compressedIndex);
+ byte[] x = unCompressor.uncompress(compressed);
+ e = System.nanoTime();
+ uncompressTime += (e - s);
+
+ // test decode time
+ // ByteBuffer ebufferIndex = ByteBuffer.wrap(bufferIndex.toByteArray());
+ ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray());
+ s = System.nanoTime();
+ // while (decoderIndex.hasNext(ebufferIndex)) {
+ // decoderIndex.readLong(ebufferIndex);
+ // }
+ while (decoder.hasNext(ebuffer)) {
+ decoder.readLong(ebuffer);
+ }
+ e = System.nanoTime();
+ decodeTime += (e - s);
+
+ buffer.close();
+ }
+
+ ratio /= repeatTime;
+ compressed_size /= repeatTime;
+ encodeTime /= repeatTime;
+ decodeTime /= repeatTime;
+ compressTime /= repeatTime;
+ uncompressTime /= repeatTime;
+
+ String[] record = {
+ f.toString().replaceAll("^/home/ubuntu/", ""),
+ dataTypeName,
+ encoding.toString(),
+ comp.toString(),
+ String.valueOf(encodeTime),
+ String.valueOf(decodeTime),
+ String.valueOf(compressTime),
+ String.valueOf(uncompressTime),
+ String.valueOf(compressed_size),
+ String.valueOf(ratio)
+ };
+ writer.writeRecord(record);
+ }
+ }
+ break;
+ }
+ case "DOUBLE":
+ {
+ TSDataType dataType = TSDataType.DOUBLE; // set TSDataType
+ // ArrayList<Long> tmpIndex = new ArrayList<>();
+ ArrayList<Double> tmp = new ArrayList<>();
+ // for (String valueIndex : dataIndex) {
+ // tmpIndex.add(Long.valueOf(valueIndex));
+ // }
+ for (String value : data) {
+ tmp.add(Double.valueOf(value));
+ }
+ // Iterate over each encoding algorithm
+ for (TSEncoding encoding : encodingList) {
+
+ // Iterate over each compression algorithm
+ for (CompressionType comp : compressList) {
+ long encodeTime = 0;
+ long decodeTime = 0;
+
+ double ratio = 0;
+ double compressed_size = 0;
+
+ long compressTime = 0;
+ long uncompressTime = 0;
+
+ // repeat many times to test time
+ for (int i = 0; i < repeatTime; i++) {
+ // TSEncodingBuilder.getEncodingBuilder(encoding).initFromProps(props);
+ // Encoder encoderIndex =
+ // TSEncodingBuilder.getEncodingBuilder(encoding)
+ // .getEncoder(TSDataType.INT64);
+ // Decoder decoderIndex = Decoder.getDecoderByType(encoding,
+ // TSDataType.INT64);
+ Encoder encoder =
+ TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType);
+ Decoder decoder = Decoder.getDecoderByType(encoding, dataType);
+
+ // ICompressor compressorIndex = ICompressor.getCompressor(comp);
+ // IUnCompressor unCompressorIndex = IUnCompressor.getUnCompressor(comp);
+ ICompressor compressor = ICompressor.getCompressor(comp);
+ IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp);
+
+ // ByteArrayOutputStream bufferIndex = new ByteArrayOutputStream();
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ // test encode time
+ long s = System.nanoTime();
+ // for (long valIndex : tmpIndex) encoderIndex.encode(valIndex,
+ // bufferIndex);
+ // encoderIndex.flush(bufferIndex);
+ for (double val : tmp) encoder.encode(val, buffer);
+ encoder.flush(buffer);
+ long e = System.nanoTime();
+ encodeTime += (e - s);
+
+ // test compress time
+ // byte[] elemsIndex = bufferIndex.toByteArray();
+ byte[] elems = buffer.toByteArray();
+ s = System.nanoTime();
+ // byte[] compressedIndex = compressorIndex.compress(elemsIndex);
+ byte[] compressed = compressor.compress(elems);
+ e = System.nanoTime();
+ compressTime += (e - s);
+
+ // test compression ratio and compressed size
+ // compressed_size += compressedIndex.length;
+ compressed_size += compressed.length;
+ double ratioTmp =
+ (double) (/* compressedIndex.length + */ compressed.length)
+ / (double)
+ (
+ /* tmpIndex.size() * Long.BYTES + */ tmp.size() * Double.BYTES);
+ ratio += ratioTmp;
+
+ // test uncompress time
+ s = System.nanoTime();
+ // byte[] xIndex = unCompressorIndex.uncompress(compressedIndex);
+ byte[] x = unCompressor.uncompress(compressed);
+ e = System.nanoTime();
+ uncompressTime += (e - s);
+
+ // test decode time
+ // ByteBuffer ebufferIndex = ByteBuffer.wrap(bufferIndex.toByteArray());
+ ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray());
+ s = System.nanoTime();
+ // while (decoderIndex.hasNext(ebufferIndex)) {
+ // decoderIndex.readLong(ebufferIndex);
+ // }
+ while (decoder.hasNext(ebuffer)) {
+ decoder.readDouble(ebuffer);
+ }
+ e = System.nanoTime();
+ decodeTime += (e - s);
+
+ buffer.close();
+ }
+
+ ratio /= repeatTime;
+ compressed_size /= repeatTime;
+ encodeTime /= repeatTime;
+ decodeTime /= repeatTime;
+ compressTime /= repeatTime;
+ uncompressTime /= repeatTime;
+
+ String[] record = {
+ f.toString().replaceAll("^/home/ubuntu/", ""),
+ dataTypeName,
+ encoding.toString(),
+ comp.toString(),
+ String.valueOf(encodeTime),
+ String.valueOf(decodeTime),
+ String.valueOf(compressTime),
+ String.valueOf(uncompressTime),
+ String.valueOf(compressed_size),
+ String.valueOf(ratio)
+ };
+ writer.writeRecord(record);
+ }
+ }
+ break;
+ }
+ case "FLOAT":
+ {
+ TSDataType dataType = TSDataType.FLOAT; // set TSDataType
+ // ArrayList<Long> tmpIndex = new ArrayList<>();
+ ArrayList<Float> tmp = new ArrayList<>();
+ // for (String valueIndex : dataIndex) {
+ // tmpIndex.add(Long.valueOf(valueIndex));
+ // }
+ for (String value : data) {
+ tmp.add(Float.valueOf(value));
+ }
+ // Iterate over each encoding algorithm
+ for (TSEncoding encoding : encodingList) {
+
+ // Iterate over each compression algorithm
+ for (CompressionType comp : compressList) {
+ long encodeTime = 0;
+ long decodeTime = 0;
+
+ double ratio = 0;
+ double compressed_size = 0;
+
+ long compressTime = 0;
+ long uncompressTime = 0;
+
+ // repeat many times to test time
+ for (int i = 0; i < repeatTime; i++) {
+ // TSEncodingBuilder.getEncodingBuilder(encoding).initFromProps(props);
+ // Encoder encoderIndex =
+ // TSEncodingBuilder.getEncodingBuilder(encoding)
+ // .getEncoder(TSDataType.INT64);
+ // Decoder decoderIndex = Decoder.getDecoderByType(encoding,
+ // TSDataType.INT64);
+ Encoder encoder =
+ TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType);
+ Decoder decoder = Decoder.getDecoderByType(encoding, dataType);
+
+ // ICompressor compressorIndex = ICompressor.getCompressor(comp);
+ // IUnCompressor unCompressorIndex = IUnCompressor.getUnCompressor(comp);
+ ICompressor compressor = ICompressor.getCompressor(comp);
+ IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp);
+
+ // ByteArrayOutputStream bufferIndex = new ByteArrayOutputStream();
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ // test encode time
+ long s = System.nanoTime();
+ // for (long valIndex : tmpIndex) encoderIndex.encode(valIndex,
+ // bufferIndex);
+ // encoderIndex.flush(bufferIndex);
+ for (float val : tmp) encoder.encode(val, buffer);
+ encoder.flush(buffer);
+ long e = System.nanoTime();
+ encodeTime += (e - s);
+
+ // test compress time
+ // byte[] elemsIndex = bufferIndex.toByteArray();
+ byte[] elems = buffer.toByteArray();
+ s = System.nanoTime();
+ // byte[] compressedIndex = compressorIndex.compress(elemsIndex);
+ byte[] compressed = compressor.compress(elems);
+ e = System.nanoTime();
+ compressTime += (e - s);
+
+ // test compression ratio and compressed size
+ // compressed_size += compressedIndex.length;
+ compressed_size += compressed.length;
+ double ratioTmp =
+ (double) (/* compressedIndex.length + */ compressed.length)
+ / (double)
+ (
+ /* tmpIndex.size() * Long.BYTES + */ tmp.size() * Float.BYTES);
+ ratio += ratioTmp;
+
+ // test uncompress time
+ s = System.nanoTime();
+ // byte[] xIndex = unCompressorIndex.uncompress(compressedIndex);
+ byte[] x = unCompressor.uncompress(compressed);
+ e = System.nanoTime();
+ uncompressTime += (e - s);
+
+ // test decode time
+ // ByteBuffer ebufferIndex = ByteBuffer.wrap(bufferIndex.toByteArray());
+ ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray());
+ s = System.nanoTime();
+ // while (decoderIndex.hasNext(ebufferIndex)) {
+ // decoderIndex.readLong(ebufferIndex);
+ // }
+ while (decoder.hasNext(ebuffer)) {
+ decoder.readFloat(ebuffer);
+ }
+ e = System.nanoTime();
+ decodeTime += (e - s);
+
+ buffer.close();
+ }
+
+ ratio /= repeatTime;
+ compressed_size /= repeatTime;
+ encodeTime /= repeatTime;
+ decodeTime /= repeatTime;
+ compressTime /= repeatTime;
+ uncompressTime /= repeatTime;
+
+ String[] record = {
+ f.toString().replaceAll("^/home/ubuntu/", ""),
+ dataTypeName,
+ encoding.toString(),
+ comp.toString(),
+ String.valueOf(encodeTime),
+ String.valueOf(decodeTime),
+ String.valueOf(compressTime),
+ String.valueOf(uncompressTime),
+ String.valueOf(compressed_size),
+ String.valueOf(ratio)
+ };
+ writer.writeRecord(record);
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // if (fileRepeat > repeatTime) break;
+ }
+ }
+ writer.close();
+ }
+ }
+}
diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTextTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTextTest.java
new file mode 100644
index 0000000..12a1191
--- /dev/null
+++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/decoder/EncodeTextTest.java
@@ -0,0 +1,240 @@
+package org.apache.iotdb.tsfile.encoding.decoder;
+
+import org.apache.iotdb.tsfile.compress.ICompressor;
+import org.apache.iotdb.tsfile.compress.IUnCompressor;
+import org.apache.iotdb.tsfile.encoding.encoder.Encoder;
+import org.apache.iotdb.tsfile.encoding.encoder.TSEncodingBuilder;
+import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
+import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.iotdb.tsfile.utils.Binary;
+
+import com.csvreader.CsvReader;
+import com.csvreader.CsvWriter;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class EncodeTextTest {
+
+ @Test
+ public void test() throws IOException {
+ String[] Inputs = {
+ "/home/ubuntu/Datasets/Real-world/Text", "/home/ubuntu/Datasets/Synthetic/Text"
+ };
+ String[] Outputs = {
+ "/home/ubuntu/Real_Text_result.csv", "/home/ubuntu/Synthetic_Text_result.csv"
+ };
+
+ for (int idx = 0; idx < 2; idx++) {
+
+ String Input = Inputs[idx];
+ String Output = Outputs[idx];
+ int repeatTime = 50; // set repeat time
+
+ // select encoding algorithms
+ TSEncoding[] encodingList = {
+ TSEncoding.HUFFMAN,
+ TSEncoding.MTF,
+ TSEncoding.BW,
+ TSEncoding.DICTIONARY,
+ TSEncoding.RLE,
+ TSEncoding.AC,
+ TSEncoding.PLAIN
+ };
+ // select compression algorithms
+ CompressionType[] compressList = {
+ CompressionType.UNCOMPRESSED,
+ CompressionType.LZ4,
+ CompressionType.GZIP,
+ CompressionType.SNAPPY
+ };
+ String[] head = {
+ "Input Direction",
+ "Data Type",
+ "Encoding Algorithm",
+ "Compress Algorithm",
+ "Encoding Time",
+ "Decoding Time",
+ "Compress Time",
+ "Uncompress Time",
+ "Compressed Size",
+ "Compression Ratio"
+ };
+
+ CsvWriter writer = new CsvWriter(Output, ',', StandardCharsets.UTF_8);
+ writer.writeRecord(head); // write header to output file
+
+ String inputPath = Input; // the direction of input compressed data
+ File file = new File(inputPath);
+ File[] tempList = file.listFiles();
+
+ for (File dataset : tempList) {
+ File[] temp2List = dataset.listFiles();
+ ArrayList<File> temp3List = new ArrayList<>();
+ if (idx == 0) temp3List = new ArrayList<>(Arrays.asList(temp2List));
+ else {
+ for (File seed : temp2List) temp3List.addAll(Arrays.asList(seed.listFiles()));
+ }
+ for (File f : temp3List) {
+ System.out.println(f.toString());
+ // fileRepeat += 1;
+ InputStream inputStream = new FileInputStream(f);
+ CsvReader loader = new CsvReader(inputStream, StandardCharsets.UTF_8);
+ String fileName = f.getAbsolutePath();
+ // ArrayList<String> dataIndex = new ArrayList<>();
+ ArrayList<String> data = new ArrayList<>();
+
+ loader.readHeaders();
+ while (loader.readRecord()) {
+ String[] temp = loader.getValues();
+ // dataIndex.add(temp[0]);
+ if (temp.length < 2) break;
+ data.add(temp[1]);
+ }
+ loader.close();
+ inputStream.close();
+
+ // for (int index : columnIndexes) {
+ // // add a column to "data"
+ // System.out.println(index);
+ // loader.readHeaders();
+ // while (loader.readRecord()) {
+ // data.add(loader.getValues()[index]);
+ // }
+ // // loader.close();
+ // inputStream.close();
+ TSDataType dataType = TSDataType.TEXT; // set TSDataType
+ // ArrayList<Long> tmpIndex = new ArrayList<>();
+ ArrayList<Binary> tmp = new ArrayList<>();
+ // for (String valueIndex : dataIndex) {
+ // tmpIndex.add(Long.valueOf(valueIndex));
+ // }
+ for (String value : data) {
+ tmp.add(Binary.valueOf(value));
+ }
+ // Iterate over each encoding algorithm
+ for (TSEncoding encoding : encodingList) {
+
+ // Iterate over each compression algorithm
+ for (CompressionType comp : compressList) {
+ long encodeTime = 0;
+ long decodeTime = 0;
+
+ double ratio = 0;
+ double compressed_size = 0;
+
+ long compressTime = 0;
+ long uncompressTime = 0;
+
+ // repeat many times to test time
+ for (int i = 0; i < repeatTime; i++) {
+ // TSEncodingBuilder.getEncodingBuilder(encoding).initFromProps(props);
+ // Encoder encoderIndex =
+ // TSEncodingBuilder.getEncodingBuilder(encoding)
+ // .getEncoder(TSDataType.INT64);
+ // Decoder decoderIndex = Decoder.getDecoderByType(encoding,
+ // TSDataType.INT64);
+ Encoder encoder =
+ TSEncodingBuilder.getEncodingBuilder(encoding).getEncoder(dataType);
+ Decoder decoder = Decoder.getDecoderByType(encoding, dataType);
+
+ // ICompressor compressorIndex = ICompressor.getCompressor(comp);
+ // IUnCompressor unCompressorIndex = IUnCompressor.getUnCompressor(comp);
+ ICompressor compressor = ICompressor.getCompressor(comp);
+ IUnCompressor unCompressor = IUnCompressor.getUnCompressor(comp);
+
+ // ByteArrayOutputStream bufferIndex = new ByteArrayOutputStream();
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ // test encode time
+ long s = System.nanoTime();
+ // for (long valIndex : tmpIndex) encoderIndex.encode(valIndex, bufferIndex);
+ // encoderIndex.flush(bufferIndex);
+ int ori_size = 0;
+ for (Binary val : tmp) {
+ ori_size += val.getLength();
+ encoder.encode(val, buffer);
+ }
+ encoder.flush(buffer);
+ long e = System.nanoTime();
+ encodeTime += (e - s);
+
+ // test compress time
+ // byte[] elemsIndex = bufferIndex.toByteArray();
+ byte[] elems = buffer.toByteArray();
+ s = System.nanoTime();
+ // byte[] compressedIndex = compressorIndex.compress(elemsIndex);
+ byte[] compressed = compressor.compress(elems);
+ e = System.nanoTime();
+ compressTime += (e - s);
+
+ // test compression ratio and compressed size
+ // compressed_size += compressedIndex.length;
+ compressed_size += compressed.length;
+ double ratioTmp =
+ (double) (/* compressedIndex.length + */ compressed.length)
+ / (double) (/* tmpIndex.size() * Long.BYTES + */ ori_size);
+ ratio += ratioTmp;
+
+ // test uncompress time
+ s = System.nanoTime();
+ // byte[] xIndex = unCompressorIndex.uncompress(compressedIndex);
+ byte[] x = unCompressor.uncompress(compressed);
+ e = System.nanoTime();
+ uncompressTime += (e - s);
+
+ // test decode time
+ // ByteBuffer ebufferIndex = ByteBuffer.wrap(bufferIndex.toByteArray());
+ ByteBuffer ebuffer = ByteBuffer.wrap(buffer.toByteArray());
+ s = System.nanoTime();
+ // while (decoderIndex.hasNext(ebufferIndex)) {
+ // decoderIndex.readLong(ebufferIndex);
+ // }
+ while (decoder.hasNext(ebuffer)) {
+ decoder.readBinary(ebuffer);
+ }
+ e = System.nanoTime();
+ decodeTime += (e - s);
+
+ buffer.close();
+ }
+
+ ratio /= repeatTime;
+ compressed_size /= repeatTime;
+ encodeTime /= repeatTime;
+ decodeTime /= repeatTime;
+ compressTime /= repeatTime;
+ uncompressTime /= repeatTime;
+
+ String[] record = {
+ f.toString().replaceAll("^/home/ubuntu/", ""),
+ "TEXT",
+ encoding.toString(),
+ comp.toString(),
+ String.valueOf(encodeTime),
+ String.valueOf(decodeTime),
+ String.valueOf(compressTime),
+ String.valueOf(uncompressTime),
+ String.valueOf(compressed_size),
+ String.valueOf(ratio)
+ };
+ writer.writeRecord(record);
+ }
+ }
+ }
+
+ // if (fileRepeat > repeatTime) break;
+ }
+ writer.close();
+ }
+ }
+}
diff --git a/vscode/settings.json b/vscode/settings.json
new file mode 100644
index 0000000..d0bccb6
--- /dev/null
+++ b/vscode/settings.json
@@ -0,0 +1,4 @@
+{
+ "java.compile.nullAnalysis.mode": "disabled",
+ "cmake.configureOnOpen": false
+}
\ No newline at end of file
diff --git a/vscode/tasks.json b/vscode/tasks.json
new file mode 100644
index 0000000..cfc933b
--- /dev/null
+++ b/vscode/tasks.json
@@ -0,0 +1,49 @@
+{
+ // See https://go.microsoft.com/fwlink/?LinkId=733558
+ // for the documentation about the tasks.json format
+ "version": "2.0.0",
+ "tasks": [
+ {
+ "label": "start server",
+ "type": "shell",
+ "command": "./distribution/target/apache-iotdb-1.0.0-all-bin/apache-iotdb-1.0.0-all-bin/sbin/start-standalone.sh && sleep 3",
+ "problemMatcher":[]
+ },
+ {
+ "label": "stop server",
+ "type": "shell",
+ "command": "./distribution/target/apache-iotdb-1.0.0-all-bin/apache-iotdb-1.0.0-all-bin/sbin/stop-standalone.sh",
+ "problemMatcher":[]
+ },
+ {
+ "label": "start client",
+ "type": "shell",
+ "command": "./distribution/target/apache-iotdb-1.0.0-all-bin/apache-iotdb-1.0.0-all-bin/sbin/start-cli.sh",
+ "problemMatcher":[]
+ },
+ {
+ "label": "apply spotless",
+ "type": "shell",
+ "command": "mvn spotless:apply",
+ "problemMatcher":[]
+ },
+ {
+ "label": "build",
+ "type": "shell",
+ "command": "mvn clean package -DskipTests -Drat.skip=true",
+ "problemMatcher":[]
+ },
+ {
+ "label": "delete",
+ "type": "shell",
+ "command": "mvn clean -DskipTests -Drat.skip=true",
+ "problemMatcher":[]
+ },
+ {
+ "label": "pack python",
+ "type": "shell",
+ "command": "zip -r target/py.zip client-py/iotdb/",
+ "problemMatcher":[]
+ }
+ ]
+}
\ No newline at end of file