blob: d5796188434bdb564289324d220b4c49fd3783b2 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
*/
public final class CodecMap {
// Supported codec map values
// Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
public static final String LZOP = "lzop";
private static Map<String, String> codecNames;
static {
codecNames = new TreeMap<String, String>();
// Register the names of codecs we know about.
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
codecNames.put(LZOP, "com.hadoop.compression.lzo.LzopCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
private CodecMap() {
}
/**
* Given a codec name, return the name of the concrete class
* that implements it (or 'null' in the case of the "none" codec).
* @throws org.apache.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static String getCodecClassName(String codecName)
throws org.apache.sqoop.io.UnsupportedCodecException {
if (!codecNames.containsKey(codecName)) {
throw new org.apache.sqoop.io.UnsupportedCodecException(codecName);
}
return codecNames.get(codecName);
}
/**
* Given a codec name, instantiate the concrete implementation
* class that implements it.
* @throws org.apache.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws org.apache.sqoop.io.UnsupportedCodecException {
// Try standard Hadoop mechanism first
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
if (null == codecClassName) {
return null;
}
Class<? extends CompressionCodec> codecClass =
(Class<? extends CompressionCodec>)
conf.getClassByName(codecClassName);
return (CompressionCodec) ReflectionUtils.newInstance(
codecClass, conf);
} catch (ClassNotFoundException cnfe) {
throw new org.apache.sqoop.io.UnsupportedCodecException(
"Cannot find codec class "
+ codecClassName + " for codec " + codecName);
}
}
/**
* Return the set of available codec names.
*/
public static Set<String> getCodecNames() {
return codecNames.keySet();
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param codecName the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
/**
* Gets the short name for a specified codec. See {@link
* #getCodecByName(String, Configuration)} for details. The name returned
* here is the shortest possible one that means a {@code Codec} part is
* removed as well.
*
* @param codecName name of the codec to return the short name for
* @param conf job configuration object used to get the registered
* compression codecs
*
* @return the short name of the codec
*
* @throws org.apache.sqoop.io.UnsupportedCodecException
* if no short name could be found
*/
public static String getCodecShortNameByName(String codecName,
Configuration conf) throws org.apache.sqoop.io.UnsupportedCodecException {
if (codecNames.containsKey(codecName)) {
return codecName;
}
CompressionCodec codec = getCodecByName(codecName, conf);
Class<? extends CompressionCodec> codecClass = null;
if (codec != null) {
codecClass = codec.getClass();
}
if (codecClass != null) {
String simpleName = codecClass.getSimpleName();
if (simpleName.endsWith("Codec")) {
simpleName =
simpleName.substring(0, simpleName.length() - "Codec".length());
}
return simpleName.toLowerCase();
}
throw new org.apache.sqoop.io.UnsupportedCodecException(
"Cannot find codec class " + codecName + " for codec " + codecName);
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
}
}