| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.orc.impl; |
| |
| import org.apache.orc.TypeDescription; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.regex.Pattern; |
| |
| public class ParserUtils { |
| |
| static TypeDescription.Category parseCategory(ParserUtils.StringPosition source) { |
| StringBuilder word = new StringBuilder(); |
| boolean hadSpace = true; |
| while (source.position < source.length) { |
| char ch = source.value.charAt(source.position); |
| if (Character.isLetter(ch)) { |
| word.append(Character.toLowerCase(ch)); |
| hadSpace = false; |
| } else if (ch == ' ') { |
| if (!hadSpace) { |
| hadSpace = true; |
| word.append(ch); |
| } |
| } else { |
| break; |
| } |
| source.position += 1; |
| } |
| String catString = word.toString(); |
| // if there were trailing spaces, remove them. |
| if (hadSpace) { |
| catString = catString.trim(); |
| } |
| if (!catString.isEmpty()) { |
| for (TypeDescription.Category cat : TypeDescription.Category.values()) { |
| if (cat.getName().equals(catString)) { |
| return cat; |
| } |
| } |
| } |
| throw new IllegalArgumentException("Can't parse category at " + source); |
| } |
| |
| static int parseInt(ParserUtils.StringPosition source) { |
| int start = source.position; |
| int result = 0; |
| while (source.position < source.length) { |
| char ch = source.value.charAt(source.position); |
| if (!Character.isDigit(ch)) { |
| break; |
| } |
| result = result * 10 + (ch - '0'); |
| source.position += 1; |
| } |
| if (source.position == start) { |
| throw new IllegalArgumentException("Missing integer at " + source); |
| } |
| return result; |
| } |
| |
| static String parseName(ParserUtils.StringPosition source) { |
| if (source.position == source.length) { |
| throw new IllegalArgumentException("Missing name at " + source); |
| } |
| final int start = source.position; |
| if (source.value.charAt(source.position) == '`') { |
| source.position += 1; |
| StringBuilder buffer = new StringBuilder(); |
| boolean closed = false; |
| while (source.position < source.length) { |
| char ch = source.value.charAt(source.position); |
| source.position += 1; |
| if (ch == '`') { |
| if (source.position < source.length && |
| source.value.charAt(source.position) == '`') { |
| source.position += 1; |
| buffer.append('`'); |
| } else { |
| closed = true; |
| break; |
| } |
| } else { |
| buffer.append(ch); |
| } |
| } |
| if (!closed) { |
| source.position = start; |
| throw new IllegalArgumentException("Unmatched quote at " + source); |
| } else if (buffer.length() == 0) { |
| throw new IllegalArgumentException("Empty quoted field name at " + source); |
| } |
| return buffer.toString(); |
| } else { |
| while (source.position < source.length) { |
| char ch = source.value.charAt(source.position); |
| if (!Character.isLetterOrDigit(ch) && ch != '_') { |
| break; |
| } |
| source.position += 1; |
| } |
| if (source.position == start) { |
| throw new IllegalArgumentException("Missing name at " + source); |
| } |
| return source.value.substring(start, source.position); |
| } |
| } |
| |
| static void requireChar(ParserUtils.StringPosition source, char required) { |
| if (source.position >= source.length || |
| source.value.charAt(source.position) != required) { |
| throw new IllegalArgumentException("Missing required char '" + |
| required + "' at " + source); |
| } |
| source.position += 1; |
| } |
| |
| private static boolean consumeChar(ParserUtils.StringPosition source, |
| char ch) { |
| boolean result = source.position < source.length && |
| source.value.charAt(source.position) == ch; |
| if (result) { |
| source.position += 1; |
| } |
| return result; |
| } |
| |
| private static void parseUnion(TypeDescription type, |
| ParserUtils.StringPosition source) { |
| requireChar(source, '<'); |
| do { |
| type.addUnionChild(parseType(source)); |
| } while (consumeChar(source, ',')); |
| requireChar(source, '>'); |
| } |
| |
| private static void parseStruct(TypeDescription type, |
| ParserUtils.StringPosition source) { |
| requireChar(source, '<'); |
| boolean needComma = false; |
| while (!consumeChar(source, '>')) { |
| if (needComma) { |
| requireChar(source, ','); |
| } else { |
| needComma = true; |
| } |
| String fieldName = parseName(source); |
| requireChar(source, ':'); |
| type.addField(fieldName, parseType(source)); |
| } |
| } |
| |
| public static TypeDescription parseType(ParserUtils.StringPosition source) { |
| TypeDescription result = new TypeDescription(parseCategory(source)); |
| switch (result.getCategory()) { |
| case BINARY: |
| case BOOLEAN: |
| case BYTE: |
| case DATE: |
| case DOUBLE: |
| case FLOAT: |
| case INT: |
| case LONG: |
| case SHORT: |
| case STRING: |
| case TIMESTAMP: |
| case TIMESTAMP_INSTANT: |
| break; |
| case CHAR: |
| case VARCHAR: |
| requireChar(source, '('); |
| result.withMaxLength(parseInt(source)); |
| requireChar(source, ')'); |
| break; |
| case DECIMAL: { |
| requireChar(source, '('); |
| int precision = parseInt(source); |
| requireChar(source, ','); |
| result.withScale(parseInt(source)); |
| result.withPrecision(precision); |
| requireChar(source, ')'); |
| break; |
| } |
| case LIST: { |
| requireChar(source, '<'); |
| TypeDescription child = parseType(source); |
| result.addChild(child); |
| requireChar(source, '>'); |
| break; |
| } |
| case MAP: { |
| requireChar(source, '<'); |
| TypeDescription keyType = parseType(source); |
| result.addChild(keyType); |
| requireChar(source, ','); |
| TypeDescription valueType = parseType(source); |
| result.addChild(valueType); |
| requireChar(source, '>'); |
| break; |
| } |
| case UNION: |
| parseUnion(result, source); |
| break; |
| case STRUCT: |
| parseStruct(result, source); |
| break; |
| default: |
| throw new IllegalArgumentException("Unknown type " + |
| result.getCategory() + " at " + source); |
| } |
| return result; |
| } |
| |
| /** |
| * Split a compound name into parts separated by '.'. |
| * @param source the string to parse into simple names |
| * @return a list of simple names from the source |
| */ |
| private static List<String> splitName(ParserUtils.StringPosition source) { |
| List<String> result = new ArrayList<>(); |
| do { |
| result.add(parseName(source)); |
| } while (consumeChar(source, '.')); |
| return result; |
| } |
| |
| |
| private static final Pattern INTEGER_PATTERN = Pattern.compile("^[0-9]+$"); |
| |
| public static TypeDescription findSubtype(TypeDescription schema, |
| ParserUtils.StringPosition source) { |
| List<String> names = ParserUtils.splitName(source); |
| if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) { |
| return schema.findSubtype(Integer.parseInt(names.get(0))); |
| } |
| TypeDescription current = SchemaEvolution.checkAcidSchema(schema) |
| ? SchemaEvolution.getBaseRow(schema) : schema; |
| while (names.size() > 0) { |
| String first = names.remove(0); |
| switch (current.getCategory()) { |
| case STRUCT: { |
| int posn = current.getFieldNames().indexOf(first); |
| if (posn == -1) { |
| throw new IllegalArgumentException("Field " + first + |
| " not found in " + current.toString()); |
| } |
| current = current.getChildren().get(posn); |
| break; |
| } |
| case LIST: |
| if (first.equals("_elem")) { |
| current = current.getChildren().get(0); |
| } else { |
| throw new IllegalArgumentException("Field " + first + |
| "not found in " + current.toString()); |
| } |
| break; |
| case MAP: |
| if (first.equals("_key")) { |
| current = current.getChildren().get(0); |
| } else if (first.equals("_value")) { |
| current = current.getChildren().get(1); |
| } else { |
| throw new IllegalArgumentException("Field " + first + |
| "not found in " + current.toString()); |
| } |
| break; |
| case UNION: { |
| try { |
| int posn = Integer.parseInt(first); |
| if (posn < 0 || posn >= current.getChildren().size()) { |
| throw new NumberFormatException("off end of union"); |
| } |
| current = current.getChildren().get(posn); |
| } catch (NumberFormatException e) { |
| throw new IllegalArgumentException("Field " + first + |
| "not found in " + current.toString(), e); |
| } |
| break; |
| } |
| default: |
| throw new IllegalArgumentException("Field " + first + |
| "not found in " + current.toString()); |
| } |
| } |
| return current; |
| } |
| |
| public static List<TypeDescription> findSubtypeList(TypeDescription schema, |
| StringPosition source) { |
| List<TypeDescription> result = new ArrayList<>(); |
| if (source.hasCharactersLeft()) { |
| do { |
| result.add(findSubtype(schema, source)); |
| } while (consumeChar(source, ',')); |
| } |
| return result; |
| } |
| |
| public static class StringPosition { |
| final String value; |
| int position; |
| final int length; |
| |
| public StringPosition(String value) { |
| this.value = value == null ? "" : value; |
| position = 0; |
| length = this.value.length(); |
| } |
| |
| @Override |
| public String toString() { |
| return '\'' + value.substring(0, position) + '^' + |
| value.substring(position) + '\''; |
| } |
| |
| public String fromPosition(int start) { |
| return value.substring(start, this.position); |
| } |
| |
| public boolean hasCharactersLeft() { |
| return position != length; |
| } |
| } |
| |
| /** |
| * Annotate the given schema with the encryption information. |
| * |
| * Format of the string is a key-list. |
| * <ul> |
| * <li>key-list = key (';' key-list)?</li> |
| * <li>key = key-name ':' field-list</li> |
| * <li>field-list = field-name ( ',' field-list )?</li> |
| * <li>field-name = number | field-part ('.' field-name)?</li> |
| * <li>field-part = quoted string | simple name</li> |
| * </ul> |
| * |
| * @param source the string to parse |
| * @param schema the top level schema |
| * @throws IllegalArgumentException if there are conflicting keys for a field |
| */ |
| public static void parseKeys(StringPosition source, TypeDescription schema) { |
| if (source.hasCharactersLeft()) { |
| do { |
| String keyName = parseName(source); |
| requireChar(source, ':'); |
| for (TypeDescription field : findSubtypeList(schema, source)) { |
| String prev = field.getAttributeValue(TypeDescription.ENCRYPT_ATTRIBUTE); |
| if (prev != null && !prev.equals(keyName)) { |
| throw new IllegalArgumentException("Conflicting encryption keys " + |
| keyName + " and " + prev); |
| } |
| field.setAttribute(TypeDescription.ENCRYPT_ATTRIBUTE, keyName); |
| } |
| } while (consumeChar(source, ';')); |
| } |
| } |
| |
| /** |
| * Annotate the given schema with the masking information. |
| * |
| * Format of the string is a mask-list. |
| * <ul> |
| * <li>mask-list = mask (';' mask-list)?</li> |
| * <li>mask = mask-name (',' parameter)* ':' field-list</li> |
| * <li>field-list = field-name ( ',' field-list )?</li> |
| * <li>field-name = number | field-part ('.' field-name)?</li> |
| * <li>field-part = quoted string | simple name</li> |
| * </ul> |
| * |
| * @param source the string to parse |
| * @param schema the top level schema |
| * @throws IllegalArgumentException if there are conflicting masks for a field |
| */ |
| public static void parseMasks(StringPosition source, TypeDescription schema) { |
| if (source.hasCharactersLeft()) { |
| do { |
| // parse the mask and parameters, but only get the underlying string |
| int start = source.position; |
| parseName(source); |
| while (consumeChar(source, ',')) { |
| parseName(source); |
| } |
| String maskString = source.fromPosition(start); |
| requireChar(source, ':'); |
| for (TypeDescription field : findSubtypeList(schema, source)) { |
| String prev = field.getAttributeValue(TypeDescription.MASK_ATTRIBUTE); |
| if (prev != null && !prev.equals(maskString)) { |
| throw new IllegalArgumentException("Conflicting encryption masks " + |
| maskString + " and " + prev); |
| } |
| field.setAttribute(TypeDescription.MASK_ATTRIBUTE, maskString); |
| } |
| } while (consumeChar(source, ';')); |
| } |
| } |
| |
| public static MaskDescriptionImpl buildMaskDescription(String value) { |
| StringPosition source = new StringPosition(value); |
| String maskName = parseName(source); |
| List<String> params = new ArrayList<>(); |
| while (consumeChar(source, ',')) { |
| params.add(parseName(source)); |
| } |
| return new MaskDescriptionImpl(maskName, |
| params.toArray(new String[params.size()])); |
| } |
| } |