blob: 03c01415b65a4b0e50b6a88f838ca65c6fb22411 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.encoder;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import java.io.ByteArrayOutputStream;
import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.LEADING_ZERO_BITS_LENGTH_32BIT;
import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.MEANINGFUL_XOR_BITS_LENGTH_32BIT;
import static org.apache.iotdb.tsfile.common.conf.TSFileConfig.VALUE_BITS_LENGTH_32BIT;
/**
* This class includes code modified from Panagiotis Liakos chimp project.
*
* <p>Copyright: 2022- Panagiotis Liakos, Katia Papakonstantinopoulou and Yannis Kotidis
*
* <p>Project page: https://github.com/panagiotisl/chimp
*
* <p>License: http://www.apache.org/licenses/LICENSE-2.0
*/
public class IntChimpEncoder extends GorillaEncoderV2 {
private static final int PREVIOUS_VALUES = 64;
private static final int PREVIOUS_VALUES_LOG2 = (int) (Math.log(PREVIOUS_VALUES) / Math.log(2));
private static final int THRESHOLD = 5 + PREVIOUS_VALUES_LOG2;
private static final int SET_LSB = (int) Math.pow(2, THRESHOLD + 1d) - 1;
private static final int CASE_ZERO_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 2;
private static final int CASE_ONE_METADATA_LENGTH = PREVIOUS_VALUES_LOG2 + 10;
protected static final short[] LEADING_REPRESENTATION = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
protected static final short[] LEADING_ROUND = {
0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
};
private int[] storedValues;
private int[] indices;
private int index = 0;
private int current = 0;
public IntChimpEncoder() {
this.setType(TSEncoding.CHIMP);
this.indices = new int[(int) Math.pow(2, THRESHOLD + 1d)];
this.storedValues = new int[PREVIOUS_VALUES];
}
private static final int ONE_ITEM_MAX_SIZE =
(2
+ LEADING_ZERO_BITS_LENGTH_32BIT
+ MEANINGFUL_XOR_BITS_LENGTH_32BIT
+ VALUE_BITS_LENGTH_32BIT)
/ Byte.SIZE
+ 1;
@Override
public final int getOneItemMaxSize() {
return ONE_ITEM_MAX_SIZE;
}
@Override
protected void reset() {
super.reset();
this.current = 0;
this.index = 0;
this.indices = new int[(int) Math.pow(2, THRESHOLD + 1d)];
this.storedValues = new int[PREVIOUS_VALUES];
}
@Override
public void flush(ByteArrayOutputStream out) {
// ending stream
encode(Integer.MIN_VALUE, out);
// flip the byte no matter it is empty or not
// the empty ending byte is necessary when decoding
bitsLeft = 0;
flipByte(out);
// the encoder may be reused, so let us reset it
reset();
}
@Override
public final void encode(int value, ByteArrayOutputStream out) {
if (firstValueWasWritten) {
compressValue(value, out);
} else {
writeFirst(value, out);
firstValueWasWritten = true;
}
}
// the first value is stored with no compression
private void writeFirst(int value, ByteArrayOutputStream out) {
storedValues[current] = value;
writeBits(value, VALUE_BITS_LENGTH_32BIT, out);
indices[value & SET_LSB] = index;
}
private void compressValue(int value, ByteArrayOutputStream out) {
// find the best previous value
int key = value & SET_LSB;
int xor;
int previousIndex;
int trailingZeros = 0;
int currIndex = indices[key];
if ((index - currIndex) < PREVIOUS_VALUES) {
int tempXor = value ^ storedValues[currIndex % PREVIOUS_VALUES];
trailingZeros = Integer.numberOfTrailingZeros(tempXor);
if (trailingZeros > THRESHOLD) {
previousIndex = currIndex % PREVIOUS_VALUES;
xor = tempXor;
} else {
previousIndex = index % PREVIOUS_VALUES;
xor = storedValues[previousIndex] ^ value;
}
} else {
previousIndex = index % PREVIOUS_VALUES;
xor = storedValues[previousIndex] ^ value;
}
// case 00: the values are identical, write 00 control bits
// and the index of the previous value
if (xor == 0) {
writeBits(previousIndex, CASE_ZERO_METADATA_LENGTH, out);
storedLeadingZeros = VALUE_BITS_LENGTH_32BIT + 1;
} else {
int leadingZeros = LEADING_ROUND[Integer.numberOfLeadingZeros(xor)];
// case 01: store the index, the length of
// the number of leading zeros in the next 3 bits, then store
// the length of the meaningful XORed value in the next 5
// bits. Finally store the meaningful bits of the XORed value.
if (trailingZeros > THRESHOLD) {
int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros - trailingZeros;
writeBits(
256L * (PREVIOUS_VALUES + previousIndex)
+ 32L * LEADING_REPRESENTATION[leadingZeros]
+ significantBits,
CASE_ONE_METADATA_LENGTH,
out);
writeBits(xor >>> trailingZeros, significantBits, out); // Store the meaningful bits of XOR
storedLeadingZeros = VALUE_BITS_LENGTH_32BIT + 1;
// case 10: If the number of leading zeros is exactly
// equal to the previous leading zeros, use that information
// and just store 01 control bits and the meaningful XORed value.
} else if (leadingZeros == storedLeadingZeros) {
writeBit(out);
skipBit(out);
int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros;
writeBits(xor, significantBits, out);
// case 11: store 11 control bits, the length of the number of leading
// zeros in the next 3 bits, then store the
// meaningful bits of the XORed value.
} else {
storedLeadingZeros = leadingZeros;
int significantBits = VALUE_BITS_LENGTH_32BIT - leadingZeros;
writeBits(24L + LEADING_REPRESENTATION[leadingZeros], 5, out);
writeBits(xor, significantBits, out);
}
}
current = (current + 1) % PREVIOUS_VALUES;
storedValues[current] = value;
index++;
indices[key] = index;
}
}