blob: 9a2badb6cce32206db9d9c90090f2c8cd5058f14 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.encoder;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.BytesUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.BitSet;
/**
* RegularDataEncoder is an encoder for compressing data in type of integer and long. We adapt a
* hypothesis that the difference between each data point is the same, which it means the data is
* regular.
*
* <p>To encode the regular data, we first create an array as a block to store the data loaded into
* the encoder. While it reach the default block size, start calculating the delta between each data
* point in this block in order to checkout whether there are missing points exist in the data. If
* there is, create a bitmap for this block to denote the position of missing points. Next, store
* the data info (the data size, the minimum delta value and the first data point of this block) and
* the bitmap with its info into the result byte array output stream.
*/
public abstract class RegularDataEncoder extends Encoder {
protected static final int BLOCK_DEFAULT_SIZE = 128;
private static final Logger LOGGER = LoggerFactory.getLogger(RegularDataEncoder.class);
protected ByteArrayOutputStream out;
protected int blockSize;
protected boolean isMissingPoint;
protected int writeIndex = -1;
protected int dataTotal;
/**
* constructor of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
protected RegularDataEncoder(int size) {
super(TSEncoding.REGULAR);
blockSize = size;
}
protected abstract void writeHeader() throws IOException;
protected abstract void reset();
protected abstract void checkMissingPoint(ByteArrayOutputStream out) throws IOException;
protected abstract void writeBitmap(ByteArrayOutputStream out) throws IOException;
protected void writeHeaderToBytes() throws IOException {
out.write(BytesUtils.intToBytes(writeIndex));
writeHeader();
}
protected void flushBlockBuffer(ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
return;
}
this.out = out;
// check if the missing point exists
checkMissingPoint(out);
// write identifier
out.write(BytesUtils.boolToBytes(isMissingPoint));
// write bitmap if missing points exist
if (isMissingPoint) {
writeBitmap(out);
}
// write header
writeHeaderToBytes();
reset();
writeIndex = -1;
}
/** calling this method to flush all values which haven't encoded to result byte array. */
@Override
public void flush(ByteArrayOutputStream out) {
try {
flushBlockBuffer(out);
} catch (IOException e) {
LOGGER.error("flush data to stream failed!", e);
}
}
public static class IntRegularEncoder extends RegularDataEncoder {
private int[] data;
private int firstValue;
private int previousValue;
private int minDeltaBase;
private int newBlockSize;
private BitSet bitmap;
/** constructor of IntRegularEncoder which is a sub-class of RegularDataEncoder. */
public IntRegularEncoder() {
this(BLOCK_DEFAULT_SIZE);
}
/**
* constructor of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
public IntRegularEncoder(int size) {
super(size);
reset();
}
@Override
protected void reset() {
minDeltaBase = Integer.MAX_VALUE;
isMissingPoint = false;
firstValue = 0;
previousValue = 0;
}
@Override
protected void writeHeader() throws IOException {
out.write(BytesUtils.intToBytes(minDeltaBase));
out.write(BytesUtils.intToBytes(firstValue));
}
@Override
public void encode(int value, ByteArrayOutputStream out) {
try {
encodeValue(value, out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public int getOneItemMaxSize() {
return 4;
}
@Override
public long getMaxByteSize() {
// The meaning of 20 is:
// identifier(4)+bitmapLength(4)+index(4)+minDeltaBase(4)+firstValue(4)
return (long) 20 + (writeIndex * 2 / 8) + (writeIndex * 4);
}
@Override
protected void checkMissingPoint(ByteArrayOutputStream out) throws IOException {
// get the new regular data if the missing point exists in the original data
if (writeIndex > 1) {
previousValue = data[0];
minDeltaBase = data[1] - data[0];
// calculate minimum elapsed of the data and check whether the missing point
// exists
for (int i = 1; i < writeIndex; i++) {
int delta = data[i] - previousValue; // calculate delta
if (delta != minDeltaBase) {
isMissingPoint = true;
}
if (delta < minDeltaBase) {
minDeltaBase = delta;
}
previousValue = data[i];
}
}
firstValue = data[0];
if (isMissingPoint) {
dataTotal = writeIndex;
newBlockSize = ((data[writeIndex - 1] - data[0]) / minDeltaBase) + 1;
writeIndex = newBlockSize;
}
}
@Override
protected void writeBitmap(ByteArrayOutputStream out) throws IOException {
// generate bitmap
data2Diff(data);
byte[] bsArr = bitmap.toByteArray();
out.write(BytesUtils.intToBytes(bsArr.length));
out.write(bsArr);
}
/**
* input a integer or long value.
*
* @param value value to encode
* @param out - the ByteArrayOutputStream which data encode into
*/
public void encodeValue(int value, ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
data = new int[blockSize];
writeIndex = 0;
}
data[writeIndex++] = value;
if (writeIndex == blockSize) {
flush(out);
}
}
private void data2Diff(int[] missingPointData) {
bitmap = new BitSet(newBlockSize);
bitmap.flip(0, newBlockSize);
int offset = 0;
for (int i = 1; i < dataTotal; i++) {
int delta = missingPointData[i] - missingPointData[i - 1];
if (delta != minDeltaBase) {
int missingPointNum = (delta / minDeltaBase) - 1;
for (int j = 0; j < missingPointNum; j++) {
bitmap.set(i + (offset++), false);
}
}
}
}
}
public static class LongRegularEncoder extends RegularDataEncoder {
private long[] data;
private long firstValue;
private long previousValue;
private long minDeltaBase;
private int newBlockSize;
private BitSet bitmap;
public LongRegularEncoder() {
this(BLOCK_DEFAULT_SIZE);
}
/**
* constructor of LongRegularEncoder which is a sub-class of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
public LongRegularEncoder(int size) {
super(size);
reset();
}
@Override
protected void reset() {
minDeltaBase = Long.MAX_VALUE;
isMissingPoint = false;
firstValue = 0L;
previousValue = 0L;
}
@Override
protected void writeHeader() throws IOException {
out.write(BytesUtils.longToBytes(minDeltaBase));
out.write(BytesUtils.longToBytes(firstValue));
}
@Override
public void encode(long value, ByteArrayOutputStream out) {
try {
encodeValue(value, out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public int getOneItemMaxSize() {
return 8;
}
@Override
public long getMaxByteSize() {
// The meaning of 20 is:
// identifier(4)+bitmapLength(4)+index(4)+minDeltaBase(8)+firstValue(8)
return (long) 28 + (writeIndex * 2 / 8) + (writeIndex * 8);
}
@Override
protected void checkMissingPoint(ByteArrayOutputStream out) throws IOException {
// get the new regular data if the missing point exists in the original data
if (writeIndex > 1) {
previousValue = data[0];
minDeltaBase = data[1] - data[0];
// calculate minimum elapsed of the data and check whether the missing point
// exists
for (int i = 1; i < writeIndex; i++) {
long delta = data[i] - previousValue; // calculate delta
if (delta != minDeltaBase) {
isMissingPoint = true;
}
if (delta < minDeltaBase) {
minDeltaBase = delta;
}
previousValue = data[i];
}
}
firstValue = data[0];
if (isMissingPoint) {
dataTotal = writeIndex;
newBlockSize = (int) (((data[writeIndex - 1] - data[0]) / minDeltaBase) + 1);
writeIndex = newBlockSize;
}
}
@Override
protected void writeBitmap(ByteArrayOutputStream out) throws IOException {
// generate bitmap
data2Diff(data);
byte[] bsArr = bitmap.toByteArray();
out.write(BytesUtils.intToBytes(bsArr.length));
out.write(bsArr);
}
/**
* input a integer or long value.
*
* @param value value to encode
* @param out - the ByteArrayOutputStream which data encode into
*/
public void encodeValue(long value, ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
data = new long[blockSize];
writeIndex = 0;
}
data[writeIndex++] = value;
if (writeIndex == blockSize) {
flush(out);
}
}
private void data2Diff(long[] missingPointData) {
bitmap = new BitSet(newBlockSize);
bitmap.flip(0, newBlockSize);
int offset = 0;
for (int i = 1; i < dataTotal; i++) {
long delta = missingPointData[i] - missingPointData[i - 1];
if (delta != minDeltaBase) {
int missingPointNum = (int) (delta / minDeltaBase) - 1;
for (int j = 0; j < missingPointNum; j++) {
bitmap.set(i + (offset++), false);
}
}
}
}
}
}