blob: 9136283157753f4accea8a5d8d7e3170eab14bb9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.curator.framework.imps;
import com.google.common.annotations.VisibleForTesting;
import org.apache.curator.framework.api.CompressionProvider;
import java.io.EOFException;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.zip.*;
public class GzipCompressionProvider implements CompressionProvider
{
// This class re-implements java.util.zip.GZIPInputStream and GZIPOutputStream functionality to avoid
// creation many finalized Deflater and Inflater objects on heap (see
// https://issues.apache.org/jira/browse/CURATOR-487). Even when Curator's minimum supported Java version becomes
// no less than Java 12, where finalize() methods are removed in Deflater and Inflater classes and instead they
// are phantom-referenced via Cleaner, it still makes sense to avoid GZIPInputStream and GZIPOutputStream because
// phantom references are also not entirely free for GC algorithms, and also to allocate less garbage and make
// less unnecessary data copies.
private static final int MAX_SAFE_JAVA_BYTE_ARRAY_SIZE = Integer.MAX_VALUE - 128;
/** GZIP header magic number. */
private static final int GZIP_MAGIC = 0x8b1f;
// The value of the OS bit has changed in JDK 16;
// see https://bugs.openjdk.org/browse/JDK-8244706 for details.
private static final byte OS_BIT;
static {
final String version = System.getProperty("java.specification.version");
if (version.contains(".")) {
// before or equal to 1.8
OS_BIT = 0;
} else {
OS_BIT = (Float.parseFloat(version) >= 16 ? (byte) -1 : 0);
}
}
/** See {@code java.util.zip.GZIPOutputStream.writeHeader()} */
private static final byte[] GZIP_HEADER = new byte[] {
(byte) GZIP_MAGIC, // Magic number (byte 0)
(byte) (GZIP_MAGIC >> 8), // Magic number (byte 1)
Deflater.DEFLATED, // Compression method (CM)
0, // Flags (FLG)
0, // Modification time MTIME (byte 0)
0, // Modification time MTIME (byte 1)
0, // Modification time MTIME (byte 2)
0, // Modification time MTIME (byte 3)
0, // Extra flags (XFLG)
OS_BIT // Operating system (OS)
};
/** GZip flags, {@link #GZIP_HEADER}'s 4th byte */
private static final int FHCRC = 1 << 1;
private static final int FEXTRA = 1 << 2;
private static final int FNAME = 1 << 3;
private static final int FCOMMENT = 1 << 4;
private static final int GZIP_HEADER_SIZE = GZIP_HEADER.length;
/** 32-bit CRC and uncompressed data size */
private static final int GZIP_TRAILER_SIZE = Integer.BYTES + Integer.BYTES;
/** DEFLATE doesn't produce shorter compressed data */
private static final int MIN_COMPRESSED_DATA_SIZE = 2;
/**
* Since Deflaters and Inflaters are acquired and returned to the pools in try-finally blocks that are free of
* blocking calls themselves, it's not expected that the number of objects in the pools could exceed the number of
* hardware threads on the machine much. Therefore it's accepted to have simple "ever-growing" (in fact, no) pools
* of strongly-referenced objects.
*/
private static final ConcurrentLinkedQueue<Deflater> DEFLATER_POOL = new ConcurrentLinkedQueue<>();
private static final ConcurrentLinkedQueue<Inflater> INFLATER_POOL = new ConcurrentLinkedQueue<>();
/** The value verified in GzipCompressionProviderTest.testEmpty() */
private static final byte[] COMPRESSED_EMPTY_BYTES = new byte[] {
31, -117, 8, 0, 0, 0, 0, 0, 0, OS_BIT, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private static Deflater acquireDeflater()
{
Deflater deflater = DEFLATER_POOL.poll();
if ( deflater == null )
{
// Using the same settings as in GZIPOutputStream constructor
deflater = new Deflater(Deflater.DEFAULT_COMPRESSION, true);
}
return deflater;
}
private static Inflater acquireInflater()
{
Inflater inflater = INFLATER_POOL.poll();
if ( inflater == null )
{
// Using the same nowrap setting as GZIPInputStream constructor
inflater = new Inflater(true);
}
return inflater;
}
@Override
public byte[] compress(String path, byte[] data)
{
if ( data.length == 0 )
{
// clone() because clients could update the array
return COMPRESSED_EMPTY_BYTES.clone();
}
return doCompress(data);
}
@VisibleForTesting
static byte[] doCompress(byte[] data)
{
byte[] result = Arrays.copyOf(GZIP_HEADER, conservativeGZippedSizeEstimate(data.length));
Deflater deflater = acquireDeflater();
try {
deflater.setInput(data);
deflater.finish();
int offset = GZIP_HEADER_SIZE;
while ( true )
{
int available = result.length - GZIP_TRAILER_SIZE - offset;
int numCompressedBytes = deflater.deflate(result, offset, available);
offset += numCompressedBytes;
if ( deflater.finished() )
{
break;
}
int newResultLength = result.length + (result.length / 2);
result = Arrays.copyOf(result, newResultLength);
}
// Write GZip trailer
CRC32 crc = new CRC32();
crc.update(data, 0, data.length);
writeLittleEndianInt(result, offset, (int) crc.getValue());
writeLittleEndianInt(result, offset + 4, data.length);
int endOffset = offset + GZIP_TRAILER_SIZE;
if ( result.length != endOffset )
{
result = Arrays.copyOf(result, endOffset);
}
return result;
} finally {
deflater.reset();
DEFLATER_POOL.add(deflater);
}
}
private static int conservativeGZippedSizeEstimate(int dataSize)
{
int conservativeCompressedDataSizeEstimate;
if ( dataSize < 512 )
{
// Assuming DEFLATE doesn't compress small data well
conservativeCompressedDataSizeEstimate = Math.max(dataSize, MIN_COMPRESSED_DATA_SIZE);
}
else
{
// Assuming pretty bad 2:1 compression ratio
conservativeCompressedDataSizeEstimate = Math.max(512, dataSize / 2);
}
return GZIP_HEADER_SIZE + conservativeCompressedDataSizeEstimate + GZIP_TRAILER_SIZE;
}
private static void writeLittleEndianInt(byte[] b, int offset, int v)
{
b[offset] = (byte) v;
b[offset + 1] = (byte) (v >> 8);
b[offset + 2] = (byte) (v >> 16);
b[offset + 3] = (byte) (v >> 24);
}
@Override
public byte[] decompress(String path, byte[] gzippedDataBytes) throws IOException {
if ( Arrays.equals(gzippedDataBytes, COMPRESSED_EMPTY_BYTES) )
{
// Allocating a new array instead of creating a static constant because clients may somehow depend on the
// identity of the returned arrays
return new byte[0];
}
ByteBuffer gzippedData = ByteBuffer.wrap(gzippedDataBytes);
gzippedData.order(ByteOrder.LITTLE_ENDIAN);
int headerSize = readGzipHeader(gzippedData);
if ( gzippedDataBytes.length < headerSize + MIN_COMPRESSED_DATA_SIZE + GZIP_TRAILER_SIZE )
{
throw new EOFException("Too short GZipped data");
}
int compressedDataSize = gzippedDataBytes.length - headerSize - GZIP_TRAILER_SIZE;
// Assuming 3:1 compression ratio. Intentionally a more generous estimation than in
// conservativeGZippedSizeEstimate() to reduce the probability of result array reallocation.
int initialResultLength = (int) Math.min(compressedDataSize * 3L, MAX_SAFE_JAVA_BYTE_ARRAY_SIZE);
byte[] result = new byte[initialResultLength];
Inflater inflater = acquireInflater();
try {
inflater.setInput(gzippedDataBytes, headerSize, compressedDataSize);
CRC32 crc = new CRC32();
int offset = 0;
while (true)
{
int numDecompressedBytes;
try {
numDecompressedBytes = inflater.inflate(result, offset, result.length - offset);
} catch (DataFormatException e) {
String s = e.getMessage();
throw new ZipException(s != null ? s : "Invalid ZLIB data format");
}
crc.update(result, offset, numDecompressedBytes);
offset += numDecompressedBytes;
if ( inflater.finished() || inflater.needsDictionary() )
{
break;
}
// Just calling inflater.needsInput() doesn't work as expected, apparently it doesn't uphold it's own
// contract and could have needsInput() == true if numDecompressedBytes != 0 and that just means that
// there is not enough space in the result array
else if ( numDecompressedBytes == 0 && inflater.needsInput() )
{
throw new ZipException("Corrupt GZipped data");
}
// Inflater's contract doesn't say whether it's able to be finished() without returning 0 from inflate()
// call, so the additional `numDecompressedBytes == 0` condition ensures that we did another cycle and
// definitely need to inflate some more bytes.
if ( result.length == MAX_SAFE_JAVA_BYTE_ARRAY_SIZE && numDecompressedBytes == 0 )
{
throw new OutOfMemoryError("Unable to uncompress that much data into a single byte[] array");
}
int newResultLength =
(int) Math.min((long) result.length + (result.length / 2), MAX_SAFE_JAVA_BYTE_ARRAY_SIZE);
if ( result.length != newResultLength )
{
result = Arrays.copyOf(result, newResultLength);
}
}
if ( inflater.getRemaining() != 0 )
{
throw new ZipException("Expected just one GZip block, without garbage in the end");
}
int checksum = gzippedData.getInt(gzippedDataBytes.length - GZIP_TRAILER_SIZE);
int numUncompressedBytes = gzippedData.getInt(gzippedDataBytes.length - Integer.BYTES);
if ( checksum != (int) crc.getValue() || numUncompressedBytes != offset )
{
throw new ZipException("Corrupt GZIP trailer");
}
if ( result.length != offset )
{
result = Arrays.copyOf(result, offset);
}
return result;
} finally {
inflater.reset();
INFLATER_POOL.add(inflater);
}
}
/**
* Returns the header size
*/
private static int readGzipHeader(ByteBuffer gzippedData) throws IOException
{
try {
return doReadHeader(gzippedData);
} catch (BufferUnderflowException e) {
throw new EOFException();
}
}
private static int doReadHeader(ByteBuffer gzippedData) throws IOException {
if ( gzippedData.getChar() != GZIP_MAGIC )
{
throw new ZipException("Not in GZip format");
}
if ( gzippedData.get() != Deflater.DEFLATED )
{
throw new ZipException("Unsupported compression method");
}
int flags = gzippedData.get();
// Skip MTIME, XFL, and OS fields
skip(gzippedData, Integer.BYTES + Byte.BYTES + Byte.BYTES);
if ( (flags & FEXTRA) != 0 )
{
int extraBytes = gzippedData.getChar();
skip(gzippedData, extraBytes);
}
if ( (flags & FNAME) != 0 )
{
skipZeroTerminatedString(gzippedData);
}
if ( (flags & FCOMMENT) != 0 )
{
skipZeroTerminatedString(gzippedData);
}
if ( (flags & FHCRC) != 0 )
{
CRC32 crc = new CRC32();
crc.update(gzippedData.array(), 0, gzippedData.position());
if ( gzippedData.getChar() != (char) crc.getValue() )
{
throw new ZipException("Corrupt GZIP header");
}
}
return gzippedData.position();
}
private static void skip(ByteBuffer gzippedData, int skipBytes) throws IOException
{
try {
gzippedData.position(gzippedData.position() + skipBytes);
} catch (IllegalArgumentException e) {
throw new EOFException();
}
}
private static void skipZeroTerminatedString(ByteBuffer gzippedData)
{
while (gzippedData.get() != 0) {
// loop
}
}
}