blob: b9218bd3b15bd3118b204dbe79745f9a8736d8ae [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.catalog;
import org.apache.impala.fb.FbCompression;
import org.apache.impala.thrift.THdfsCompression;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
/**
* Support for recognizing compression suffixes on data files.
* Compression of a file is recognized in mapreduce by looking for suffixes of
* supported codecs.
* For now Impala supports LZO, GZIP, SNAPPY, BZIP2 and some additional formats if plugins
* are available. Even if a plugin is available, we need to add the file suffixes here so
* that we can resolve the compression type from the file name. LZO can use the specific
* HIVE input class.
*/
public enum HdfsCompression {
NONE,
DEFLATE,
GZIP,
BZIP2,
SNAPPY,
LZO,
LZO_INDEX, //Lzo index file.
LZ4,
ZSTD;
/* Map from a suffix to a compression type */
public static final ImmutableMap<String, HdfsCompression> SUFFIX_MAP =
ImmutableMap.<String, HdfsCompression>builder().
put("deflate", DEFLATE).
put("gz", GZIP).
put("bz2", BZIP2).
put("snappy", SNAPPY).
put("lzo", LZO).
put("index", LZO_INDEX).
put("lz4", LZ4).
put("zst", ZSTD).
build();
/* Given a file name return its compression type, if any. */
public static HdfsCompression fromFileName(String fileName) {
int index = fileName.lastIndexOf(".");
if (index == -1) {
return NONE;
}
String suffix = fileName.substring(index + 1);
HdfsCompression compression = SUFFIX_MAP.get(suffix.toLowerCase());
return compression == null ? NONE : compression;
}
public THdfsCompression toThrift() {
switch (this) {
case NONE: return THdfsCompression.NONE;
case DEFLATE: return THdfsCompression.DEFLATE;
case GZIP: return THdfsCompression.GZIP;
case BZIP2: return THdfsCompression.BZIP2;
case SNAPPY: return THdfsCompression.SNAPPY_BLOCKED;
case LZO: return THdfsCompression.LZO;
case LZ4: return THdfsCompression.LZ4;
case ZSTD: return THdfsCompression.ZSTD;
default: throw new IllegalStateException("Unexpected codec: " + this);
}
}
public byte toFb() {
switch (this) {
case NONE: return FbCompression.NONE;
case DEFLATE: return FbCompression.DEFLATE;
case GZIP: return FbCompression.GZIP;
case BZIP2: return FbCompression.BZIP2;
case SNAPPY: return FbCompression.SNAPPY;
case LZO: return FbCompression.LZO;
case LZ4: return FbCompression.LZ4;
case ZSTD: return FbCompression.ZSTD;
default: throw new IllegalStateException("Unexpected codec: " + this);
}
}
/* Returns a compression type based on (Hive's) intput format. Special case for LZO. */
public static HdfsCompression fromHdfsInputFormatClass(String inputFormatClass) {
Preconditions.checkNotNull(inputFormatClass);
if (inputFormatClass.equals(HdfsFileFormat.LZO_TEXT.inputFormat())) {
return LZO;
}
return NONE;
}
}