blob: 49a33a6e18c7edd002c2ff032b8726be79c2ee18 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.hive;
import java.sql.Types;
import org.apache.avro.Schema;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.sqoop.SqoopOptions;
import org.apache.sqoop.config.ConfigurationConstants;
import static org.apache.avro.LogicalTypes.Decimal;
/**
* Defines conversion between SQL types and Hive types.
*/
public final class HiveTypes {
private static final String HIVE_TYPE_TINYINT = "TINYINT";
private static final String HIVE_TYPE_INT = "INT";
private static final String HIVE_TYPE_BIGINT = "BIGINT";
private static final String HIVE_TYPE_FLOAT = "FLOAT";
private static final String HIVE_TYPE_DOUBLE = "DOUBLE";
private static final String HIVE_TYPE_STRING = "STRING";
private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
private static final String HIVE_TYPE_BINARY = "BINARY";
private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
private HiveTypes() { }
/**
* Given JDBC SQL types coming from another database, what is the best
* mapping to a Hive-specific type?
*/
public static String toHiveType(int sqlType) {
switch (sqlType) {
case Types.INTEGER:
case Types.SMALLINT:
return HIVE_TYPE_INT;
case Types.VARCHAR:
case Types.CHAR:
case Types.LONGVARCHAR:
case Types.NVARCHAR:
case Types.NCHAR:
case Types.LONGNVARCHAR:
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
case Types.CLOB:
return HIVE_TYPE_STRING;
case Types.NUMERIC:
case Types.DECIMAL:
case Types.FLOAT:
case Types.DOUBLE:
case Types.REAL:
return HIVE_TYPE_DOUBLE;
case Types.BIT:
case Types.BOOLEAN:
return HIVE_TYPE_BOOLEAN;
case Types.TINYINT:
return HIVE_TYPE_TINYINT;
case Types.BIGINT:
return HIVE_TYPE_BIGINT;
default:
// TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
// BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
return null;
}
}
public static String toHiveType(Schema schema, SqoopOptions options) {
if (schema.getType() == Schema.Type.UNION) {
for (Schema subSchema : schema.getTypes()) {
if (subSchema.getType() != Schema.Type.NULL) {
return toHiveType(subSchema, options);
}
}
}
Schema.Type avroType = schema.getType();
switch (avroType) {
case BOOLEAN:
return HIVE_TYPE_BOOLEAN;
case INT:
return HIVE_TYPE_INT;
case LONG:
return HIVE_TYPE_BIGINT;
case FLOAT:
return HIVE_TYPE_FLOAT;
case DOUBLE:
return HIVE_TYPE_DOUBLE;
case STRING:
case ENUM:
return HIVE_TYPE_STRING;
case BYTES:
return mapToDecimalOrBinary(schema, options);
case FIXED:
return HIVE_TYPE_BINARY;
default:
throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
}
}
private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
if (logicalTypesEnabled && schema.getLogicalType() instanceof Decimal) {
Decimal decimal = (Decimal) schema.getLogicalType();
// trimming precision and scale to Hive's maximum values.
int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
if (precision < decimal.getPrecision()) {
LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
}
int scale = Math.min(HiveDecimal.MAX_SCALE, decimal.getScale());
if (scale < decimal.getScale()) {
LOG.warn("Warning! Scale in the Hive table definition will be smaller than the actual scale of the column on storage! Hive may not be able to read data from this column.");
}
return String.format("%s (%d, %d)", HIVE_TYPE_DECIMAL, precision, scale);
}
else {
return HIVE_TYPE_BINARY;
}
}
/**
* @return true if a sql type can't be translated to a precise match
* in Hive, and we have to cast it to something more generic.
*/
public static boolean isHiveTypeImprovised(int sqlType) {
return sqlType == Types.DATE || sqlType == Types.TIME
|| sqlType == Types.TIMESTAMP
|| sqlType == Types.DECIMAL
|| sqlType == Types.NUMERIC;
}
}