| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.orc.impl; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.orc.TypeDescription; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * Take the file types and the (optional) configuration column names/types and see if there |
| * has been schema evolution. |
| */ |
| public class SchemaEvolution { |
| private final Map<Integer, TypeDescription> readerToFile; |
| private final boolean[] included; |
| private final TypeDescription readerSchema; |
| private static final Logger LOG = LoggerFactory.getLogger(SchemaEvolution.class); |
| |
| public SchemaEvolution(TypeDescription readerSchema, boolean[] included) { |
| this.included = included; |
| readerToFile = null; |
| this.readerSchema = readerSchema; |
| } |
| |
| public SchemaEvolution(TypeDescription fileSchema, |
| TypeDescription readerSchema, |
| boolean[] included) throws IOException { |
| readerToFile = new HashMap<>(readerSchema.getMaximumId() + 1); |
| this.included = included; |
| if (checkAcidSchema(fileSchema)) { |
| this.readerSchema = createEventSchema(readerSchema); |
| } else { |
| this.readerSchema = readerSchema; |
| } |
| buildMapping(fileSchema, this.readerSchema); |
| } |
| |
| public TypeDescription getReaderSchema() { |
| return readerSchema; |
| } |
| |
| public TypeDescription getFileType(TypeDescription readerType) { |
| TypeDescription result; |
| if (readerToFile == null) { |
| if (included == null || included[readerType.getId()]) { |
| result = readerType; |
| } else { |
| result = null; |
| } |
| } else { |
| result = readerToFile.get(readerType.getId()); |
| } |
| return result; |
| } |
| |
| void buildMapping(TypeDescription fileType, |
| TypeDescription readerType) throws IOException { |
| // if the column isn't included, don't map it |
| if (included != null && !included[readerType.getId()]) { |
| return; |
| } |
| boolean isOk = true; |
| // check the easy case first |
| if (fileType.getCategory() == readerType.getCategory()) { |
| switch (readerType.getCategory()) { |
| case BOOLEAN: |
| case BYTE: |
| case SHORT: |
| case INT: |
| case LONG: |
| case DOUBLE: |
| case FLOAT: |
| case STRING: |
| case TIMESTAMP: |
| case BINARY: |
| case DATE: |
| // these are always a match |
| break; |
| case CHAR: |
| case VARCHAR: |
| // We do conversion when same CHAR/VARCHAR type but different maxLength. |
| break; |
| case DECIMAL: |
| // We do conversion when same DECIMAL type but different precision/scale. |
| break; |
| case UNION: |
| case MAP: |
| case LIST: { |
| // these must be an exact match |
| List<TypeDescription> fileChildren = fileType.getChildren(); |
| List<TypeDescription> readerChildren = readerType.getChildren(); |
| if (fileChildren.size() == readerChildren.size()) { |
| for(int i=0; i < fileChildren.size(); ++i) { |
| buildMapping(fileChildren.get(i), readerChildren.get(i)); |
| } |
| } else { |
| isOk = false; |
| } |
| break; |
| } |
| case STRUCT: { |
| // allow either side to have fewer fields than the other |
| List<TypeDescription> fileChildren = fileType.getChildren(); |
| List<TypeDescription> readerChildren = readerType.getChildren(); |
| int jointSize = Math.min(fileChildren.size(), readerChildren.size()); |
| for(int i=0; i < jointSize; ++i) { |
| buildMapping(fileChildren.get(i), readerChildren.get(i)); |
| } |
| break; |
| } |
| default: |
| throw new IllegalArgumentException("Unknown type " + readerType); |
| } |
| } else { |
| /* |
| * Check for the few cases where will not convert.... |
| */ |
| |
| isOk = ConvertTreeReaderFactory.canConvert(fileType, readerType); |
| } |
| if (isOk) { |
| readerToFile.put(readerType.getId(), fileType); |
| } else { |
| throw new IOException( |
| String.format( |
| "ORC does not support type conversion from file type %s (%d) to reader type %s (%d)", |
| fileType.toString(), fileType.getId(), |
| readerType.toString(), readerType.getId())); |
| } |
| } |
| |
| private static boolean checkAcidSchema(TypeDescription type) { |
| if (type.getCategory().equals(TypeDescription.Category.STRUCT)) { |
| List<String> rootFields = type.getFieldNames(); |
| if (acidEventFieldNames.equals(rootFields)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * @param typeDescr |
| * @return ORC types for the ACID event based on the row's type description |
| */ |
| public static TypeDescription createEventSchema(TypeDescription typeDescr) { |
| TypeDescription result = TypeDescription.createStruct() |
| .addField("operation", TypeDescription.createInt()) |
| .addField("originalTransaction", TypeDescription.createLong()) |
| .addField("bucket", TypeDescription.createInt()) |
| .addField("rowId", TypeDescription.createLong()) |
| .addField("currentTransaction", TypeDescription.createLong()) |
| .addField("row", typeDescr.clone()); |
| return result; |
| } |
| |
| public static final List<String> acidEventFieldNames= new ArrayList<String>(); |
| static { |
| acidEventFieldNames.add("operation"); |
| acidEventFieldNames.add("originalTransaction"); |
| acidEventFieldNames.add("bucket"); |
| acidEventFieldNames.add("rowId"); |
| acidEventFieldNames.add("currentTransaction"); |
| acidEventFieldNames.add("row"); |
| } |
| } |