blob: b0e78b2e61fcc4305f70532aaacd611355c8ee5a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.converter;
import java.io.Serializable;
import java.math.BigDecimal;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
import org.apache.carbondata.core.util.DataTypeConverter;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
import org.apache.spark.sql.catalyst.util.GenericArrayData;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.DecimalType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.unsafe.types.UTF8String;
/**
* Convert java data type to spark data type
*/
public final class SparkDataTypeConverterImpl implements DataTypeConverter, Serializable {
private static final long serialVersionUID = -4379212832935070583L;
@Override
public Object convertFromStringToDecimal(Object data) {
BigDecimal javaDecVal = new BigDecimal(data.toString());
return org.apache.spark.sql.types.Decimal.apply(javaDecVal);
}
@Override
public Object convertFromBigDecimalToDecimal(Object data) {
if (null == data) {
return null;
}
return org.apache.spark.sql.types.Decimal.apply((BigDecimal) data);
}
@Override
public Object convertFromDecimalToBigDecimal(Object data) {
return ((org.apache.spark.sql.types.Decimal) data).toJavaBigDecimal();
}
@Override
public byte[] convertFromStringToByte(Object data) {
if (null == data) {
return null;
}
return UTF8String.fromString((String) data).getBytes();
}
@Override
public Object convertFromByteToUTF8String(byte[] data) {
if (null == data) {
return null;
}
return UTF8String.fromBytes(data);
}
@Override
public byte[] convertFromByteToUTF8Bytes(byte[] data) {
return UTF8String.fromBytes(data).getBytes();
}
@Override
public Object convertFromStringToUTF8String(Object data) {
if (null == data) {
return null;
}
return UTF8String.fromString((String) data);
}
@Override
public Object wrapWithGenericArrayData(Object data) {
return new GenericArrayData(data);
}
@Override
public Object wrapWithGenericRow(Object[] fields) {
return new GenericInternalRow(fields);
}
@Override
public Object wrapWithArrayBasedMapData(Object[] keyArray, Object[] valueArray) {
return new ArrayBasedMapData(new GenericArrayData(keyArray), new GenericArrayData(valueArray));
}
@Override
public Object[] unwrapGenericRowToObject(Object data) {
GenericInternalRow row = (GenericInternalRow) data;
return row.values();
}
public static org.apache.spark.sql.types.DataType convertCarbonToSparkDataType(
DataType carbonDataType) {
if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.STRING
|| carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.VARCHAR) {
return DataTypes.StringType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT) {
return DataTypes.ShortType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT) {
return DataTypes.IntegerType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG) {
return DataTypes.LongType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE) {
return DataTypes.DoubleType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN) {
return DataTypes.BooleanType;
} else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(carbonDataType)) {
return DataTypes.createDecimalType();
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.TIMESTAMP) {
return DataTypes.TimestampType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.DATE) {
return DataTypes.DateType;
} else if (carbonDataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BINARY) {
return DataTypes.BinaryType;
} else {
return null;
}
}
/**
* convert from CarbonColumn array to Spark's StructField array
*/
@Override
public Object[] convertCarbonSchemaToSparkSchema(CarbonColumn[] carbonColumns) {
StructField[] fields = new StructField[carbonColumns.length];
for (int i = 0; i < carbonColumns.length; i++) {
CarbonColumn carbonColumn = carbonColumns[i];
if (carbonColumn.isDimension()) {
if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(carbonColumn.getDataType());
fields[i] = new StructField(carbonColumn.getColName(),
convertCarbonToSparkDataType(generator.getReturnType()), true, null);
} else if (!carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
fields[i] = new StructField(carbonColumn.getColName(),
convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
} else if (carbonColumn.isComplex()) {
fields[i] = new StructField(carbonColumn.getColName(),
convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
} else {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(
org.apache.carbondata.core.metadata.datatype.DataTypes.INT), true, null);
}
} else if (carbonColumn.isMeasure()) {
DataType dataType = carbonColumn.getDataType();
if (dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN
|| dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT
|| dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT
|| dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG
|| dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BINARY
|| dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.VARCHAR) {
fields[i] =
new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(dataType),
true, null);
} else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(dataType)) {
CarbonMeasure measure = (CarbonMeasure) carbonColumn;
fields[i] = new StructField(carbonColumn.getColName(),
new DecimalType(measure.getPrecision(), measure.getScale()), true, null);
} else {
fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(
org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE), true, null);
}
}
}
return fields;
}
}