blob: 734021e93688973b5d8e90fb753be1216357f1b8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer.schema;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
/**
*
* A utility class for simplify the schema creation, especially for bag and
* tuple schema. Currently, it only support simple schema creation, nested tuple
* and bag is not supported
*
*/
public class SchemaUtil {
private static Set<Byte> SUPPORTED_TYPE_SET;
static {
SUPPORTED_TYPE_SET = new HashSet<Byte>();
SUPPORTED_TYPE_SET.add(DataType.INTEGER);
SUPPORTED_TYPE_SET.add(DataType.LONG);
SUPPORTED_TYPE_SET.add(DataType.CHARARRAY);
SUPPORTED_TYPE_SET.add(DataType.BOOLEAN);
SUPPORTED_TYPE_SET.add(DataType.BYTE);
SUPPORTED_TYPE_SET.add(DataType.BYTEARRAY);
SUPPORTED_TYPE_SET.add(DataType.DOUBLE);
SUPPORTED_TYPE_SET.add(DataType.FLOAT);
SUPPORTED_TYPE_SET.add(DataType.DATETIME);
SUPPORTED_TYPE_SET.add(DataType.MAP);
SUPPORTED_TYPE_SET.add(DataType.BIGINTEGER);
SUPPORTED_TYPE_SET.add(DataType.BIGDECIMAL);
}
/**
* Create a new tuple schema according the tuple name and two list: names of
* fields, types of fields
*
* @param tupleName
* @param fieldNames
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(String tupleName,
List<String> fieldNames, List<Byte> dataTypes)
throws FrontendException {
checkParameters(fieldNames, dataTypes);
List<Schema.FieldSchema> tokenSchemas = new ArrayList<Schema.FieldSchema>();
for (int i = 0; i < fieldNames.size(); ++i) {
String name = fieldNames.get(i);
Byte type = dataTypes.get(i);
tokenSchemas.add(new Schema.FieldSchema(name, type));
}
Schema tupleSchema = new Schema(tokenSchemas);
Schema.FieldSchema tupleField = new Schema.FieldSchema(tupleName,
tupleSchema);
return new Schema(tupleField);
}
/**
* Create a new tuple schema according the tuple name and two arrays: names
* of fields, types of fields
*
* @param tupleName
* @param fieldNames
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(String tupleName, String[] fieldNames,
Byte[] dataTypes) throws FrontendException {
return newTupleSchema(tupleName, Arrays.asList(fieldNames), Arrays
.asList(dataTypes));
}
/**
* Create a new tuple schema according the two list: names of fields, types
* of fields, the default tuple name is t.
*
* @param fieldNames
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(List<String> fieldNames,
List<Byte> dataTypes) throws FrontendException {
return newTupleSchema("t", fieldNames, dataTypes);
}
/**
* Create a new tuple schema according one list: types of fields, the
* default names of fields are f0,f1,f2..., and the tuple name is t.
*
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(List<Byte> dataTypes)
throws FrontendException {
List<String> names = newNames(dataTypes.size());
return newTupleSchema("t", names, dataTypes);
}
/**
* Create a new tuple schema according the two arrays: names of fields,
* types of fields, the default tuple name is t.
*
* @param names
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(String[] names, Byte[] dataTypes)
throws FrontendException {
return newTupleSchema("t", Arrays.asList(names), Arrays
.asList(dataTypes));
}
/**
* Create a new tuple schema according one array: types of fields, the
* default names of fields are f0,f1,f2..., and the tuple name is t.
*
* @param dataTypes
* @return tuple schema
* @throws FrontendException
*/
public static Schema newTupleSchema(Byte[] dataTypes)
throws FrontendException {
return newTupleSchema(Arrays.asList(dataTypes));
}
private static List<String> newNames(int size) {
List<String> names = new ArrayList<String>();
for (int i = 0; i < size; ++i) {
names.add("f" + i);
}
return names;
}
/**
* Create a bag schema according the bag name,tuple name and two list: name
* of fields, type of fields
*
* @param bagName
* @param tupleName
* @param fieldNames
* @param dataTypes
* @return bag schema
* @throws FrontendException
*/
public static Schema newBagSchema(String bagName, String tupleName,
List<String> fieldNames, List<Byte> dataTypes)
throws FrontendException {
checkParameters(fieldNames, dataTypes);
Schema tupleSchema = newTupleSchema(tupleName, fieldNames, dataTypes);
Schema.FieldSchema bagField = new Schema.FieldSchema(bagName,
tupleSchema, DataType.BAG);
return new Schema(bagField);
}
public static Schema newBagSchema(String bagName, String tupleName,
String[] fieldNames, Byte[] dataTypes) throws FrontendException {
return newBagSchema(bagName, tupleName, Arrays.asList(fieldNames),
Arrays.asList(dataTypes));
}
/**
* Create a bag schema according two list: name of fields, type of fields,
* and the default bag name is b, the default tuple name is t.
*
* @param names
* @param dataTypes
* @return bag schema
* @throws FrontendException
*/
public static Schema newBagSchema(List<String> names, List<Byte> dataTypes)
throws FrontendException {
checkParameters(names, dataTypes);
Schema tupleSchema = newTupleSchema(names, dataTypes);
Schema.FieldSchema bagField = new Schema.FieldSchema("b", tupleSchema,
DataType.BAG);
return new Schema(bagField);
}
/**
* Create a new tuple schema according one list: types of fields, the
* default names of fields are f0,f1,f2..., and the tuple is t, the bag name
* is b.
*
* @param dataTypes
* @return bag schema
* @throws FrontendException
*/
public static Schema newBagSchema(List<Byte> dataTypes)
throws FrontendException {
List<String> names = newNames(dataTypes.size());
return newBagSchema(names, dataTypes);
}
/**
* Create a new tuple schema according two arrays: names of field,types of
* fields. The default tuple name is t, and the bag is b.
*
* @param names
* @param dataTypes
* @return bag schema
* @throws FrontendException
*/
public static Schema newBagSchema(String[] names, Byte[] dataTypes)
throws FrontendException {
return newBagSchema(Arrays.asList(names), Arrays.asList(dataTypes));
}
/**
* Create a new tuple schema according one array: the type of fields, the
* tuple name is t, and the bag name is b.
*
* @param dataTypes
* @return bag schema
* @throws FrontendException
*/
public static Schema newBagSchema(Byte[] dataTypes)
throws FrontendException {
return newBagSchema(Arrays.asList(dataTypes));
}
private static void checkDataTypes(List<Byte> dataTypes)
throws FrontendException {
for (Byte type : dataTypes) {
if (!SUPPORTED_TYPE_SET.contains(type)) {
throw new FrontendException(
"Currently pig do not support this kind of type using Schema:"
+ DataType.findTypeName(type)
+ ". You can write schema by yourself.");
}
}
}
private static void checkParameters(List<String> names, List<Byte> dataTypes)
throws FrontendException {
checkDataTypes(dataTypes);
if (names.size() != dataTypes.size()) {
throw new FrontendException(
"The number of names (" + names.size() + ") is not equal to the number of dataTypes (" + dataTypes.size() + ")");
}
}
}