| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.pig; |
| |
| import java.io.IOException; |
| import java.io.Serializable; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| |
| import org.apache.pig.classification.InterfaceAudience; |
| import org.apache.pig.classification.InterfaceStability; |
| import org.apache.pig.data.DataType; |
| import org.apache.pig.impl.logicalLayer.FrontendException; |
| import org.apache.pig.impl.logicalLayer.schema.Schema; |
| import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; |
| import org.apache.pig.newplan.logical.relational.LogicalSchema; |
| import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema; |
| import org.codehaus.jackson.annotate.JsonPropertyOrder; |
| |
| /** |
| * A represenation of a schema used to communicate with load and store functions. This is |
| * separate from {@link Schema}, which is an internal Pig representation of a schema. |
| * @since Pig 0.7 |
| */ |
| @InterfaceAudience.Public |
| @InterfaceStability.Stable |
| @JsonPropertyOrder({ "fields", "version", "sortKeys", "sortKeyOrders" }) |
| public class ResourceSchema implements Serializable { |
| |
| private static final long serialVersionUID = 1L; |
| |
| private static Log log = LogFactory.getLog(ResourceSchema.class); |
| |
| /* Array Getters intentionally return mutable arrays instead of copies, |
| * to simplify updates without unnecessary copying. |
| * Setters make a copy of the arrays in order to prevent an array |
| * from being shared by two objects, with modifications in one |
| * accidentally changing the other. |
| */ |
| |
| // initializing arrays to empty so we don't have to worry about NPEs |
| // setters won't set to null |
| private ResourceFieldSchema[] fields = new ResourceFieldSchema[0]; |
| |
| public enum Order { ASCENDING, DESCENDING } |
| private int[] sortKeys = new int[0]; // each entry is an offset into the fields array. |
| private Order[] sortKeyOrders = new Order[0]; |
| |
| private int version = 0; |
| |
| @JsonPropertyOrder({ "name", "type", "description", "schema" }) |
| public static class ResourceFieldSchema implements Serializable { |
| private static final long serialVersionUID = 1L; |
| private String name; |
| |
| // values are constants from DataType |
| private byte type; |
| |
| private String description; |
| |
| // nested tuples and bags will have their own schema |
| private ResourceSchema schema; |
| |
| /** |
| * Construct an empty field schema. |
| */ |
| public ResourceFieldSchema() { |
| |
| } |
| |
| /** |
| * Construct using a {@link org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema} as the template. |
| * @param fieldSchema fieldSchema to copy from |
| */ |
| public ResourceFieldSchema(FieldSchema fieldSchema) { |
| type = fieldSchema.type; |
| name = fieldSchema.alias; |
| description = "autogenerated from Pig Field Schema"; |
| Schema inner = fieldSchema.schema; |
| |
| // allow partial schema |
| if ((type == DataType.BAG || type == DataType.TUPLE || type == DataType.MAP) |
| && inner != null) { |
| schema = new ResourceSchema(inner); |
| } else { |
| schema = null; |
| } |
| } |
| |
| /** |
| * Construct using a {@link org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema} as the template. |
| * @param fieldSchema fieldSchema to copy from |
| */ |
| public ResourceFieldSchema(LogicalFieldSchema fieldSchema) { |
| type = fieldSchema.type; |
| name = fieldSchema.alias; |
| description = "autogenerated from Pig Field Schema"; |
| LogicalSchema inner = fieldSchema.schema; |
| |
| // allow partial schema |
| if (DataType.isSchemaType(type) && inner != null) { |
| schema = new ResourceSchema(inner); |
| } else { |
| schema = null; |
| } |
| } |
| |
| /** |
| * Get the name of this field. |
| * @return name |
| */ |
| public String getName() { |
| return name; |
| } |
| |
| /** |
| * Set the name of this filed. |
| * @param name new name |
| * @return this |
| */ |
| public ResourceFieldSchema setName(String name) { |
| this.name = name; |
| return this; |
| } |
| |
| /** |
| * Get the type of this field. |
| * @return type, as a {@link DataType} static final byte |
| */ |
| public byte getType() { |
| return type; |
| } |
| |
| /** |
| * Set the type of this field |
| * @param type new type |
| * @return this |
| */ |
| public ResourceFieldSchema setType(byte type) { |
| this.type = type; |
| return this; |
| } |
| |
| /** |
| * Get a free form text description of this field. |
| * @return description |
| */ |
| public String getDescription() { |
| return description; |
| } |
| |
| /** |
| * Set the description |
| * @param description new description |
| * @return this |
| */ |
| public ResourceFieldSchema setDescription(String description) { |
| this.description = description; |
| return this; |
| } |
| |
| /** |
| * Get the schema for this field. Type tuple/bag/map may have a schema. |
| * @return schema |
| */ |
| public ResourceSchema getSchema() { |
| return schema; |
| } |
| |
| /** |
| * Set the schema for this field. Type tuple/bag/map may have a schema. |
| * @param schema new schema |
| * @return this |
| */ |
| public ResourceFieldSchema setSchema(ResourceSchema schema) throws |
| IOException { |
| validateSchema(schema); |
| this.schema = schema; |
| return this; |
| } |
| |
| private void validateSchema(ResourceSchema schema) throws IOException { |
| if(type == DataType.BAG && schema != null) { |
| ResourceFieldSchema[] subFields = schema.getFields(); |
| if (subFields.length == 1) { |
| if (subFields[0].type != DataType.TUPLE) { |
| throwInvalidSchemaException(); |
| } |
| } else { |
| throwInvalidSchemaException(); |
| } |
| } |
| } |
| |
| public static void throwInvalidSchemaException() throws FrontendException { |
| int errCode = 2218; |
| throw new FrontendException("Invalid resource schema: " + |
| "bag schema must have tuple as its field", errCode, PigException.BUG); |
| } |
| |
| @Override |
| public String toString() { |
| return getDescription(true); |
| } |
| |
| public String calcCastString() { |
| return getDescription(false); |
| } |
| |
| private String getDescription(boolean printAlias) { |
| StringBuilder sb = new StringBuilder(); |
| if (printAlias && this.name != null) |
| sb.append(this.name).append(":"); |
| if (DataType.isAtomic(this.type)) { |
| sb.append(DataType.findTypeName(this.type)); |
| } else { |
| //if (this.schema!=null) |
| stringifyResourceSchema(sb, this.schema, this.type, printAlias); |
| } |
| return sb.toString(); |
| } |
| } |
| |
| |
| /** |
| * Construct an empty ResourceSchema. |
| */ |
| public ResourceSchema() { |
| |
| } |
| |
| /** |
| * Construct a ResourceSchema from a {@link Schema} |
| * @param pigSchema Schema to use |
| */ |
| public ResourceSchema(Schema pigSchema) { |
| List<FieldSchema> pigSchemaFields = pigSchema.getFields(); |
| fields = new ResourceFieldSchema[pigSchemaFields.size()]; |
| for (int i=0; i<fields.length; i++) { |
| fields[i] = new ResourceFieldSchema(pigSchemaFields.get(i)); |
| } |
| } |
| |
| /** |
| * Construct a ResourceSchema from a {@link LogicalSchema} |
| * @param pigSchema Schema to use |
| */ |
| public ResourceSchema(LogicalSchema pigSchema) { |
| List<LogicalFieldSchema> pigSchemaFields = pigSchema.getFields(); |
| fields = new ResourceFieldSchema[pigSchemaFields.size()]; |
| for (int i=0; i<fields.length; i++) { |
| fields[i] = new ResourceFieldSchema(pigSchemaFields.get(i)); |
| } |
| } |
| |
| /** |
| * Only for use by Pig internal code. |
| * Construct a ResourceSchema from a {@link Schema} |
| * @param pigSchema Schema to use |
| * @param sortInfo information on how data is sorted |
| */ |
| @InterfaceAudience.Private |
| public ResourceSchema(Schema pigSchema, SortInfo sortInfo) { |
| this(pigSchema); |
| if (sortInfo!=null && sortInfo.getSortColInfoList().size()!=0) { |
| sortKeys = new int[sortInfo.getSortColInfoList().size()]; |
| sortKeyOrders = new Order[sortInfo.getSortColInfoList().size()]; |
| for (int i=0;i<sortInfo.getSortColInfoList().size();i++) { |
| SortColInfo colInfo = sortInfo.getSortColInfoList().get(i); |
| int index = colInfo.getColIndex(); |
| Order order; |
| org.apache.pig.SortColInfo.Order origOrder = colInfo.getSortOrder(); |
| if (origOrder==org.apache.pig.SortColInfo.Order.ASCENDING) { |
| order = Order.ASCENDING; |
| } else { |
| order = Order.DESCENDING; |
| } |
| sortKeys[i] = index; |
| sortKeyOrders[i] = order; |
| } |
| } |
| } |
| |
| /** |
| * Only for use by Pig internal code. |
| * Construct a ResourceSchema from a {@link LogicalSchema} |
| * @param pigSchema LogicalSchema to use |
| * @param sortInfo information on how data is sorted |
| */ |
| @InterfaceAudience.Private |
| public ResourceSchema(LogicalSchema pigSchema, SortInfo sortInfo) { |
| this(pigSchema); |
| if (sortInfo!=null && sortInfo.getSortColInfoList().size()!=0) { |
| sortKeys = new int[sortInfo.getSortColInfoList().size()]; |
| sortKeyOrders = new Order[sortInfo.getSortColInfoList().size()]; |
| for (int i=0;i<sortInfo.getSortColInfoList().size();i++) { |
| SortColInfo colInfo = sortInfo.getSortColInfoList().get(i); |
| int index = colInfo.getColIndex(); |
| Order order; |
| org.apache.pig.SortColInfo.Order origOrder = colInfo.getSortOrder(); |
| if (origOrder==org.apache.pig.SortColInfo.Order.ASCENDING) { |
| order = Order.ASCENDING; |
| } else { |
| order = Order.DESCENDING; |
| } |
| sortKeys[i] = index; |
| sortKeyOrders[i] = order; |
| } |
| } |
| } |
| |
| /** |
| * Get the version of this schema. |
| * @return version |
| */ |
| public int getVersion() { |
| return version; |
| } |
| |
| public ResourceSchema setVersion(int version) { |
| this.version = version; |
| return this; |
| } |
| |
| /** |
| * Get field schema for each field |
| * @return array of field schemas. |
| */ |
| public ResourceFieldSchema[] getFields() { |
| return fields; |
| } |
| |
| /** |
| * Get all field names. |
| * @return array of field names |
| */ |
| public String[] fieldNames() { |
| String[] names = new String[fields.length]; |
| for (int i=0; i<fields.length; i++) { |
| names[i] = fields[i].getName(); |
| } |
| return names; |
| } |
| |
| /** |
| * Set all the fields. If fields are not currently null the new fields will be silently |
| * ignored. |
| * @param fields to use as fields in this schema |
| * @return this |
| */ |
| public ResourceSchema setFields(ResourceFieldSchema[] fields) { |
| if (fields != null) |
| this.fields = Arrays.copyOf(fields, fields.length); |
| return this; |
| } |
| |
| /** |
| * Get the sort keys for this data. |
| * @return array of ints. Each integer in the array represents the field number. So if the |
| * schema of the data is (a, b, c, d) and the data is sorted on c, b, the returned sort keys |
| * will be [2, 1]. Field numbers are zero based. If the data is not sorted a zero length |
| * array will be returned. |
| */ |
| public int[] getSortKeys() { |
| return sortKeys; |
| } |
| |
| /** |
| * Set the sort keys for htis data. If sort keys are not currently null the new sort keys |
| * will be silently ignored. |
| * @param sortKeys Each integer in the array represents the field number. So if the |
| * schema of the data is (a, b, c, d) and the data is sorted on c, b, the sort keys |
| * should be [2, 1]. Field numbers are zero based. |
| * @return this |
| */ |
| public ResourceSchema setSortKeys(int[] sortKeys) { |
| if (sortKeys != null) |
| this.sortKeys = Arrays.copyOf(sortKeys, sortKeys.length); |
| return this; |
| } |
| |
| /** |
| * Get order for sort keys. |
| * @return array of Order. This array will be the same length as the int[] array returned by |
| * {@link #getSortKeys}. |
| */ |
| public Order[] getSortKeyOrders() { |
| return sortKeyOrders; |
| } |
| |
| /** |
| * Set the order for each sort key. If order is not currently null, new order will be |
| * silently ignored. |
| * @param sortKeyOrders array of Order. Should be the same length as int[] passed to |
| * {@link #setSortKeys}. |
| * @return this |
| */ |
| public ResourceSchema setSortKeyOrders(Order[] sortKeyOrders) { |
| if (sortKeyOrders != null) |
| this.sortKeyOrders = Arrays.copyOf(sortKeyOrders, sortKeyOrders.length); |
| return this; |
| } |
| |
| /** |
| * Test whether two ResourceSchemas are the same. Two schemas are said to be the same if they |
| * are both null or |
| * have the same number of fields and for each field the name, type are the same. For fields |
| * that have may have schemas (i.e. tuples) both schemas be equal. Field |
| * descriptions are not used in testing equality. |
| * @return true if equal according to the above definition, otherwise false. |
| */ |
| public static boolean equals(ResourceSchema rs1, ResourceSchema rs2) { |
| if (rs1 == null) { |
| return rs2 == null ? true : false; |
| } |
| |
| if (rs2 == null) { |
| return false; |
| } |
| |
| if (rs1.getVersion() != rs2.getVersion() |
| || !Arrays.equals(rs1.getSortKeys(), rs2.getSortKeys()) |
| || !Arrays.equals(rs1.getSortKeyOrders(), rs2.getSortKeyOrders())) { |
| return false; |
| } |
| |
| ResourceFieldSchema[] rfs1 = rs1.getFields(); |
| ResourceFieldSchema[] rfs2 = rs1.getFields(); |
| |
| if (rfs1.length != rfs2.length) return false; |
| |
| for (int i=0; i<rfs1.length; i++) { |
| if (rfs1[i].getName()==null && rfs2[i].getName()!=null || |
| rfs1[i].getName()!=null && rfs2[i].getName()==null) |
| return false; |
| if (rfs1[i].getName()==null && rfs2[i].getName()==null) { |
| if (rfs1[i].getType() == rfs2[i].getType()) |
| return true; |
| else |
| return false; |
| } |
| if (!rfs1[i].getName().equals(rfs2[i].getName()) |
| || rfs1[i].getType() != rfs2[i].getType()) { |
| return false; |
| } |
| if (!equals(rfs1[i].getSchema(), rfs2[i].getSchema())) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| stringifyResourceSchema(sb, this, DataType.UNKNOWN, true) ; |
| return sb.toString(); |
| } |
| |
| private static void stringifyResourceSchema(StringBuilder sb, |
| ResourceSchema rs, byte type, boolean printAlias) { |
| |
| if (type == DataType.BAG) { |
| sb.append("{"); |
| } else if (type == DataType.TUPLE) { |
| sb.append("("); |
| } else if (type == DataType.MAP) { |
| sb.append("["); |
| } |
| |
| if (rs != null) { |
| for (int i=0; i<rs.getFields().length; i++) { |
| sb.append(rs.getFields()[i].getDescription(printAlias)); |
| if (i < rs.getFields().length - 1) { |
| sb.append(","); |
| } |
| } |
| } |
| |
| if (type == DataType.BAG) { |
| sb.append("}"); |
| } else if (type == DataType.TUPLE) { |
| sb.append(")"); |
| } else if (type == DataType.MAP) { |
| sb.append("]"); |
| } |
| } |
| } |