| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.drill.exec.store.easy.json.loader; |
| |
| import org.apache.drill.common.types.TypeProtos.DataMode; |
| import org.apache.drill.common.types.TypeProtos.MinorType; |
| import org.apache.drill.exec.record.metadata.ColumnMetadata; |
| import org.apache.drill.exec.record.metadata.MetadataUtils; |
| import org.apache.drill.exec.record.metadata.TupleMetadata; |
| import org.apache.drill.exec.store.easy.json.parser.JsonStructureParser; |
| import org.apache.drill.exec.store.easy.json.parser.TokenIterator; |
| import org.apache.drill.exec.store.easy.json.parser.ValueDef; |
| import org.apache.drill.exec.store.easy.json.parser.ValueDefFactory; |
| import org.apache.drill.exec.vector.accessor.ObjectWriter; |
| import org.apache.drill.exec.vector.accessor.ScalarWriter; |
| import org.apache.drill.exec.vector.accessor.TupleWriter; |
| import com.google.common.base.Preconditions; |
| |
| /** |
| * Describes a new field within an object. Allows the listener to control |
| * how to handle the field: as unprojected, parsed as a typed field, as |
| * text, as JSON, or as a custom parser. |
| */ |
| public class FieldDefn { |
| |
| private final TupleParser tupleParser; |
| private final String key; |
| private final TokenIterator tokenizer; |
| private ValueDef valueDef; |
| private ColumnMetadata providedColumn; |
| |
| public FieldDefn(TupleParser tupleParser, final String key, TokenIterator tokenizer) { |
| this(tupleParser, key, tokenizer, false); |
| } |
| |
| public FieldDefn(TupleParser tupleParser, final String key, |
| TokenIterator tokenizer, boolean isArray) { |
| this.tupleParser = tupleParser; |
| this.key = key; |
| this.tokenizer = tokenizer; |
| if (isArray) { |
| valueDef = ValueDefFactory.lookAhead(tokenizer); |
| valueDef = new ValueDef(valueDef.type(), valueDef.dimensions() + 1); |
| } |
| } |
| |
| /** |
| * Returns the field name. |
| */ |
| public String key() { return key; } |
| |
| public TupleParser tupleParser() { return tupleParser; } |
| |
| /** |
| * Token stream which allows a custom parser to look ahead |
| * as needed. The caller must "unget" all tokens to leave the |
| * tokenizer at the present location. Note that the underlying |
| * Jackson parser will return text for the last token consumed, |
| * even if tokens are unwound using the token iterator, so do not |
| * look ahead past the first field name or value; on look ahead |
| * over "static" tokens such as object and array start characters. |
| */ |
| public TokenIterator tokenizer() { return tokenizer; } |
| |
| /** |
| * Returns the parent parser which is needed to construct standard |
| * parsers. |
| */ |
| public JsonStructureParser parser() { return tupleParser.structParser(); } |
| |
| /** |
| * Looks ahead to guess the field type based on JSON tokens. |
| * While this is helpful, it really only works if the JSON |
| * is structured like a list of tuples, if the initial value is not {@code null}, |
| * and if initial arrays are not empty. The structure parser cannot see |
| * into the future beyond the first field value; the value listener for each |
| * field must handle "type-deferral" if needed to handle missing or null |
| * values. That is, type-consistency is a semantic task handled by the listener, |
| * not a syntax task handled by the parser. |
| */ |
| public ValueDef lookahead() { |
| Preconditions.checkState(tokenizer != null); |
| if (valueDef == null) { |
| valueDef = ValueDefFactory.lookAhead(tokenizer); |
| } |
| return valueDef; |
| } |
| |
| public TupleWriter writer() { return tupleParser.writer(); } |
| |
| public ColumnMetadata providedColumn() { |
| if (providedColumn == null) { |
| TupleMetadata tupleSchema = tupleParser.providedSchema(); |
| providedColumn = tupleSchema == null ? null : tupleSchema.metadata(key); |
| } |
| return providedColumn; |
| } |
| |
| public ColumnMetadata schemaFor(MinorType type, boolean isArray) { |
| return schemaFor(type, isArray, false); |
| } |
| |
| public ColumnMetadata schemaFor(MinorType type, boolean isArray, boolean forUnknownSchema) { |
| return MetadataUtils.newScalar(key, type, mode(isArray), forUnknownSchema); |
| } |
| |
| public DataMode mode(boolean isArray) { |
| return isArray ? DataMode.REPEATED : DataMode.OPTIONAL; |
| } |
| |
| public ScalarWriter scalarWriterFor(MinorType type, boolean isArray) { |
| return scalarWriterFor(schemaFor(type, isArray)); |
| } |
| |
| public ScalarWriter scalarWriterFor(ColumnMetadata colSchema) { |
| ObjectWriter writer = fieldWriterFor(colSchema); |
| return colSchema.isArray() ? writer.array().scalar() : writer.scalar(); |
| } |
| |
| public ObjectWriter fieldWriterFor(ColumnMetadata colSchema) { |
| final int index = writer().addColumn(colSchema); |
| return writer().column(index); |
| } |
| } |