blob: cf1fa56606b02beb093bee9d9ffb3f658c7837cd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.compress;
import org.apache.iotdb.tsfile.exception.compress.CompressionTypeNotSupportedException;
import org.apache.iotdb.tsfile.exception.compress.GZIPCompressOverflowException;
import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
import com.github.luben.zstd.Zstd;
import net.jpountz.lz4.LZ4Compressor;
import net.jpountz.lz4.LZ4Factory;
import org.tukaani.xz.LZMA2Options;
import org.tukaani.xz.XZInputStream;
import org.tukaani.xz.XZOutputStream;
import org.xerial.snappy.Snappy;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import static org.apache.iotdb.tsfile.file.metadata.enums.CompressionType.GZIP;
import static org.apache.iotdb.tsfile.file.metadata.enums.CompressionType.LZ4;
import static org.apache.iotdb.tsfile.file.metadata.enums.CompressionType.LZMA2;
import static org.apache.iotdb.tsfile.file.metadata.enums.CompressionType.SNAPPY;
import static org.apache.iotdb.tsfile.file.metadata.enums.CompressionType.ZSTD;
/** compress data according to type in schema. */
public interface ICompressor extends Serializable {
static ICompressor getCompressor(String name) {
return getCompressor(CompressionType.valueOf(name));
}
/**
* get Compressor according to CompressionType.
*
* @param name CompressionType
* @return the Compressor of specified CompressionType
*/
static ICompressor getCompressor(CompressionType name) {
if (name == null) {
throw new CompressionTypeNotSupportedException("NULL");
}
switch (name) {
case UNCOMPRESSED:
return new NoCompressor();
case SNAPPY:
return new SnappyCompressor();
case LZ4:
return new IOTDBLZ4Compressor();
case GZIP:
return new GZIPCompressor();
case ZSTD:
return new ZstdCompressor();
case LZMA2:
return new LZMA2Compressor();
default:
throw new CompressionTypeNotSupportedException(name.toString());
}
}
byte[] compress(byte[] data) throws IOException;
/**
* abstract method of compress. this method has an important overhead due to the fact that it
* needs to allocate a byte array to compress into, and then needs to resize this buffer to the
* actual compressed length.
*
* @return byte array of compressed data.
*/
byte[] compress(byte[] data, int offset, int length) throws IOException;
/**
* abstract method of compress.
*
* @return byte length of compressed data.
*/
int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException;
/**
* If the data is large, this function is better than byte[].
*
* @param data MUST be DirectByteBuffer for Snappy.
* @param compressed MUST be DirectByteBuffer for Snappy.
* @return byte length of compressed data.
*/
int compress(ByteBuffer data, ByteBuffer compressed) throws IOException;
/**
* Get the maximum byte size needed for compressing data of the given byte size. For GZIP, this
* method is insecure and may cause {@code GZIPCompressOverflowException}
*
* @param uncompressedDataSize byte size of the data to compress
* @return maximum byte size of the compressed data
*/
int getMaxBytesForCompression(int uncompressedDataSize);
CompressionType getType();
/** NoCompressor will do nothing for data and return the input data directly. */
class NoCompressor implements ICompressor {
@Override
public byte[] compress(byte[] data) {
return data;
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
throw new IOException("No Compressor does not support compression function");
}
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException {
throw new IOException("No Compressor does not support compression function");
}
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) throws IOException {
throw new IOException("No Compressor does not support compression function");
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
return uncompressedDataSize;
}
@Override
public CompressionType getType() {
return CompressionType.UNCOMPRESSED;
}
}
class SnappyCompressor implements ICompressor {
@Override
public byte[] compress(byte[] data) throws IOException {
if (data == null) {
return new byte[0];
}
return Snappy.compress(data);
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
byte[] maxCompressed = new byte[getMaxBytesForCompression(length)];
int compressedSize = Snappy.compress(data, offset, length, maxCompressed, 0);
byte[] compressed = null;
if (compressedSize < maxCompressed.length) {
compressed = new byte[compressedSize];
System.arraycopy(maxCompressed, 0, compressed, 0, compressedSize);
} else {
compressed = maxCompressed;
}
return compressed;
}
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException {
return Snappy.compress(data, offset, length, compressed, 0);
}
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) throws IOException {
return Snappy.compress(data, compressed);
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
return Snappy.maxCompressedLength(uncompressedDataSize);
}
@Override
public CompressionType getType() {
return SNAPPY;
}
}
class IOTDBLZ4Compressor implements ICompressor {
/**
* This instance should be cached to avoid performance problem. See:
* https://github.com/lz4/lz4-java/issues/152 and https://github.com/apache/spark/pull/24905
*/
private static final LZ4Factory factory = LZ4Factory.fastestInstance();
private static final LZ4Compressor compressor = factory.fastCompressor();
public static LZ4Factory getFactory() {
return factory;
}
public IOTDBLZ4Compressor() {
super();
}
@Override
public byte[] compress(byte[] data) {
if (data == null) {
return new byte[0];
}
return compressor.compress(data);
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
return compressor.compress(data, offset, length);
}
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) {
return compressor.compress(data, offset, length, compressed, 0);
}
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) {
int startPosition = compressed.position();
compressor.compress(data, compressed);
return compressed.position() - startPosition;
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
return compressor.maxCompressedLength(uncompressedDataSize);
}
@Override
public CompressionType getType() {
return LZ4;
}
}
class GZIPCompress {
public static byte[] compress(byte[] data) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(data);
gzip.close();
return out.toByteArray();
}
public static byte[] uncompress(byte[] data) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = new ByteArrayInputStream(data);
GZIPInputStream ungzip = new GZIPInputStream(in);
byte[] buffer = new byte[256];
int n;
while ((n = ungzip.read(buffer)) > 0) {
out.write(buffer, 0, n);
}
in.close();
return out.toByteArray();
}
}
class GZIPCompressor implements ICompressor {
@Override
public byte[] compress(byte[] data) throws IOException {
if (null == data) {
return new byte[0];
}
return GZIPCompress.compress(data);
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
byte[] dataBefore = new byte[length];
System.arraycopy(data, offset, dataBefore, 0, length);
return GZIPCompress.compress(dataBefore);
}
/** @exception GZIPCompressOverflowException if compressed byte array is too small. */
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException {
byte[] dataBefore = new byte[length];
System.arraycopy(data, offset, dataBefore, 0, length);
byte[] res = GZIPCompress.compress(dataBefore);
if (res.length > compressed.length) {
throw new GZIPCompressOverflowException();
}
System.arraycopy(res, 0, compressed, 0, res.length);
return res.length;
}
/** @exception GZIPCompressOverflowException if compressed ByteBuffer is too small. */
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) throws IOException {
int length = data.remaining();
byte[] dataBefore = new byte[length];
data.get(dataBefore, 0, length);
byte[] res = GZIPCompress.compress(dataBefore);
if (res.length > compressed.capacity()) {
throw new GZIPCompressOverflowException();
}
compressed.put(res);
return res.length;
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
// hard to estimate
return Math.max(40 + uncompressedDataSize / 2, uncompressedDataSize);
}
@Override
public CompressionType getType() {
return GZIP;
}
}
class ZstdCompressor implements ICompressor {
private int compressionLevel;
public ZstdCompressor() {
super();
compressionLevel = Zstd.maxCompressionLevel();
}
@Override
public byte[] compress(byte[] data) throws IOException {
return Zstd.compress(data, compressionLevel);
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
if (data == null) {
return new byte[0];
}
byte[] compressedData = new byte[getMaxBytesForCompression(length)];
int compressedSize = compress(data, offset, length, compressedData);
byte[] result = new byte[compressedSize];
System.arraycopy(compressedData, 0, result, 0, compressedSize);
return result;
}
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException {
return (int)
Zstd.compressByteArray(
compressed, 0, compressed.length, data, offset, length, compressionLevel);
}
/**
* @param data MUST be DirectByteBuffer for Zstd.
* @param compressed MUST be DirectByteBuffer for Zstd.
* @return byte length of compressed data.
*/
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) throws IOException {
return Zstd.compress(compressed, data, compressionLevel);
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
return (int) Zstd.compressBound(uncompressedDataSize);
}
@Override
public CompressionType getType() {
return ZSTD;
}
}
class LZMA2Compress {
public static byte[] compress(byte[] data) throws IOException {
LZMA2Options options = new LZMA2Options();
ByteArrayOutputStream out = new ByteArrayOutputStream();
XZOutputStream lzma2 = new XZOutputStream(out, options);
lzma2.write(data);
lzma2.close();
byte[] r = out.toByteArray();
return r;
}
public static byte[] uncompress(byte[] data) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = new ByteArrayInputStream(data);
XZInputStream unlzma2 = new XZInputStream(in);
byte[] buffer = new byte[256];
int n;
while ((n = unlzma2.read(buffer)) > 0) {
out.write(buffer, 0, n);
}
in.close();
byte[] r = out.toByteArray();
return r;
}
}
class LZMA2Compressor implements ICompressor {
@Override
public byte[] compress(byte[] data) throws IOException {
if (null == data) {
return new byte[0];
}
byte[] r = LZMA2Compress.compress(data);
return r;
}
@Override
public byte[] compress(byte[] data, int offset, int length) throws IOException {
byte[] dataBefore = new byte[length];
System.arraycopy(data, offset, dataBefore, 0, length);
byte[] r = LZMA2Compress.compress(dataBefore);
return r;
}
@Override
public int compress(byte[] data, int offset, int length, byte[] compressed) throws IOException {
byte[] dataBefore = new byte[length];
System.arraycopy(data, offset, dataBefore, 0, length);
byte[] res = LZMA2Compress.compress(dataBefore);
System.arraycopy(res, 0, compressed, 0, res.length);
return res.length;
}
@Override
public int compress(ByteBuffer data, ByteBuffer compressed) throws IOException {
int length = data.remaining();
byte[] dataBefore = new byte[length];
data.get(dataBefore, 0, length);
byte[] res = LZMA2Compress.compress(dataBefore);
compressed.put(res);
return res.length;
}
@Override
public int getMaxBytesForCompression(int uncompressedDataSize) {
// hard to estimate
return 100 + uncompressedDataSize;
}
@Override
public CompressionType getType() {
return LZMA2;
}
}
}