blob: 0c232baeca3d71a07d230e7c56914d14b7e4a216 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.easy.json.loader;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.store.easy.json.parser.ElementParser;
import org.apache.drill.exec.store.easy.json.parser.FieldParserFactory;
import org.apache.drill.exec.store.easy.json.parser.ValueParser;
import org.apache.drill.exec.store.easy.json.values.VarCharListener;
import org.apache.drill.exec.vector.accessor.ObjectWriter;
import com.google.common.base.Preconditions;
/**
* Create a Drill field listener based on a provided schema. The schema
* takes precedence over the JSON syntax: the schema is expected to
* accurately describe what will occur for this field in the JSON
* input.
*/
public class ProvidedFieldFactory extends BaseFieldFactory {
public ProvidedFieldFactory(JsonLoaderImpl loader, FieldFactory child) {
super(loader, child);
}
/**
* Build a column and its listener based a provided schema.
* The user is responsible to ensure that the provided schema
* accurately reflects the structure of the JSON being parsed.
*/
@Override
public ElementParser fieldParser(FieldDefn fieldDefn) {
if (fieldDefn.providedColumn() == null) {
return child.fieldParser(fieldDefn);
} else {
return parserFor(fieldDefn);
}
}
public ElementParser parserFor(FieldDefn fieldDefn) {
ColumnMetadata providedCol = fieldDefn.providedColumn();
switch (providedCol.structureType()) {
case PRIMITIVE:
return primitiveParserFor(fieldDefn);
case TUPLE:
return objectParserForSchema(fieldDefn);
case VARIANT:
return variantParserForSchema(fieldDefn);
case MULTI_ARRAY:
return multiDimArrayParserForSchema(fieldDefn);
default:
throw loader().unsupportedType(providedCol);
}
}
private ElementParser primitiveParserFor(FieldDefn fieldDefn) {
ColumnMetadata providedCol = fieldDefn.providedColumn();
if (providedCol.type() == MinorType.VARCHAR) {
return stringParserFor(fieldDefn);
} else {
return basicParserFor(fieldDefn);
}
}
private ElementParser basicParserFor(FieldDefn fieldDefn) {
// Delegate parser creation downward: will be done by the extended
// types factory, if present, else by inferred field factory.
ColumnMetadata colSchema = fieldDefn.providedColumn().copy();
ValueParser scalarParser = child.scalarParserFor(fieldDefn, colSchema);
if (colSchema.isArray()) {
return scalarArrayParserFor(scalarParser);
} else {
return scalarParser;
}
}
private ElementParser stringParserFor(FieldDefn fieldDefn) {
String mode = fieldDefn.providedColumn().property(JsonLoader.JSON_MODE);
if (mode == null) {
return basicParserFor(fieldDefn);
}
FieldParserFactory parserFactory = parserFactory();
switch (mode) {
case JsonLoader.JSON_TEXT_MODE:
return parserFactory.textValueParser(varCharListenerFor(fieldDefn));
case JsonLoader.JSON_LITERAL_MODE:
return parserFactory.jsonTextParser(varCharListenerFor(fieldDefn));
default:
return basicParserFor(fieldDefn);
}
}
private VarCharListener varCharListenerFor(FieldDefn fieldDefn) {
return new VarCharListener(loader,
fieldDefn.scalarWriterFor(fieldDefn.providedColumn().copy()));
}
private ElementParser objectParserForSchema(FieldDefn fieldDefn) {
ColumnMetadata providedCol = fieldDefn.providedColumn();
// Propagate the provided map schema into the object
// listener as a provided tuple schema.
ColumnMetadata colSchema = providedCol.cloneEmpty();
TupleMetadata providedSchema = providedCol.tupleSchema();
if (providedCol.isArray()) {
return objectArrayParserFor(fieldDefn, colSchema, providedSchema);
} else {
return objectParserFor(fieldDefn, colSchema, providedSchema);
}
}
/**
* Create a repeated list column and its multiple levels of inner structure
* from a provided schema. Repeated lists can nest to any number of levels to
* provide any number of dimensions. In general, if an array is <i>n</i>-dimensional,
* then there are <i>n</i>-1 repeated lists with some array type as the
* innermost dimension.
*/
private ElementParser multiDimArrayParserForSchema(FieldDefn fieldDefn) {
// Parse the stack of repeated lists to count the "outer" dimensions and
// to locate the innermost array (the "list" which is "repeated").
int dims = 1; // For inner array
ColumnMetadata elementSchema = fieldDefn.providedColumn();
while (MetadataUtils.isRepeatedList(elementSchema)) {
dims++;
elementSchema = elementSchema.childSchema();
Preconditions.checkArgument(elementSchema != null);
}
ColumnMetadata colSchema = repeatedListSchemaFor(fieldDefn.key(), dims,
elementSchema.cloneEmpty());
ObjectWriter fieldWriter = fieldDefn.fieldWriterFor(colSchema);
switch (elementSchema.structureType()) {
case PRIMITIVE:
return multiDimScalarArrayFor(fieldWriter, dims);
case TUPLE:
return multiDimObjectArrayFor(fieldWriter,
dims, elementSchema.tupleSchema());
case VARIANT:
return multiDimVariantArrayParserFor(fieldWriter, dims);
default:
throw loader().unsupportedType(fieldDefn.providedColumn());
}
}
private ElementParser variantParserForSchema(FieldDefn fieldDefn) {
// A variant can contain multiple types. The schema does not
// declare the types; rather they are discovered by the reader.
// That is, there is no VARIANT<INT, DOUBLE>, there is just VARIANT.
ColumnMetadata colSchema = fieldDefn.providedColumn().cloneEmpty();
ObjectWriter fieldWriter = fieldDefn.fieldWriterFor(colSchema);
if (colSchema.isArray()) {
return variantArrayParserFor(fieldWriter.array());
} else {
return variantParserFor(fieldWriter.variant());
}
}
}