blob: b850370429ac4e14c050d30aaa9bd01f1c624d5d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.easy.json.loader;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.store.easy.json.parser.ElementParser;
import org.apache.drill.exec.store.easy.json.parser.ValueDef;
import org.apache.drill.exec.store.easy.json.parser.ValueDef.JsonType;
import org.apache.drill.exec.store.easy.json.values.VarCharListener;
import org.apache.drill.exec.store.easy.json.parser.ValueParser;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Create Drill field listeners based on the observed look-ahead
* tokens in JSON.
*/
public class InferredFieldFactory extends BaseFieldFactory {
protected static final Logger logger = LoggerFactory.getLogger(InferredFieldFactory.class);
public InferredFieldFactory(JsonLoaderImpl loader) {
super(loader);
}
/**
* Build a column and its listener based on a look-ahead hint.
*/
@Override
public ElementParser fieldParser(FieldDefn fieldDefn) {
ValueDef valueDef = fieldDefn.lookahead();
if (valueDef.type().isUnknown()) {
return parserForUnknown(fieldDefn);
} else {
return resolveField(fieldDefn);
}
}
/**
* Create a listener when we don't have type information. For the case
* {@code null} appears before other values.
*/
private ElementParser parserForUnknown(FieldDefn fieldDefn) {
ValueDef valueDef = fieldDefn.lookahead();
if (!valueDef.isArray()) {
// For the case null appears before other values.
return new NullFieldParser(fieldDefn.tupleParser(), fieldDefn.key());
} else if (valueDef.dimensions() > 1) {
// An unknown nested array: [[]], etc. Must guess a type.
return forceRepeatedListResolution(fieldDefn);
} else if (valueDef.type() == JsonType.NULL) {
// For the case of [null], must force resolution
return forceArrayResolution(fieldDefn);
} else {
// For the case [] appears before other values.
return new EmptyArrayFieldParser(fieldDefn.tupleParser(), fieldDefn.key());
}
}
private ElementParser forceRepeatedListResolution(FieldDefn fieldDefn) {
ColumnMetadata innerSchema = schemaForUnknown(fieldDefn, true);
int dims = fieldDefn.lookahead().dimensions();
ColumnMetadata fieldSchema = repeatedListSchemaFor(fieldDefn.key(), dims, innerSchema);
return buildOuterArrays(
fieldDefn.fieldWriterFor(fieldSchema), dims,
innerWriter -> scalarArrayParserFor(
unknownParserFor(innerWriter.array().scalar())));
}
@Override
public ElementParser forceNullResolution(FieldDefn fieldDefn) {
logger.warn("Ambiguous type! JSON field {}" +
" contains all nulls. Assuming JSON text.", fieldDefn.key());
return forceResolution(fieldDefn, false);
}
@Override
public ElementParser forceArrayResolution(FieldDefn fieldDefn) {
logger.warn("Ambiguous type! JSON field {}" +
" contains all empty arrays. Assuming array of JSON text.", fieldDefn.key());
return scalarArrayParserFor(forceResolution(fieldDefn, true));
}
private ValueParser forceResolution(FieldDefn fieldDefn, boolean isArray) {
return unknownParserFor(fieldDefn.scalarWriterFor(schemaForUnknown(fieldDefn, isArray)));
}
private ColumnMetadata schemaForUnknown(FieldDefn fieldDefn, boolean isArray) {
return loader.options().unknownsAsJson
? fieldDefn.schemaFor(MinorType.VARCHAR, isArray, true)
: fieldDefn.schemaFor(loader.options().nullType, isArray, true);
}
private ValueParser unknownParserFor(ScalarWriter writer) {
return loader.options().unknownsAsJson
? parserFactory().jsonTextParser(new VarCharListener(loader, writer))
: parserFactory().simpleValueParser(scalarListenerFor(writer));
}
private ElementParser resolveField(FieldDefn fieldDefn) {
ValueDef valueDef = fieldDefn.lookahead();
Preconditions.checkArgument(!valueDef.type().isUnknown());
if (!valueDef.isArray()) {
if (valueDef.type().isObject()) {
return objectParserFor(fieldDefn);
} else {
return scalarParserFor(fieldDefn, false);
}
} else if (valueDef.dimensions() == 1) {
if (valueDef.type().isObject()) {
return objectArrayParserFor(fieldDefn);
} else {
return scalarArrayParserFor(scalarParserFor(fieldDefn, true));
}
} else { // 2+ dimensions
if (valueDef.type().isObject()) {
return multiDimObjectArrayParserFor(fieldDefn);
} else {
return multiDimScalarArrayParserFor(fieldDefn);
}
}
}
public ValueParser scalarParserFor(FieldDefn fieldDefn, boolean isArray) {
if (loader.options().allTextMode) {
return parserFactory().textValueParser(
new VarCharListener(loader, fieldDefn.scalarWriterFor(MinorType.VARCHAR, isArray)));
} else {
return scalarParserFor(fieldDefn, fieldDefn.schemaFor(scalarTypeFor(fieldDefn), isArray));
}
}
/**
* Create a multi- (2+) dimensional scalar array from a JSON value description.
*/
private ElementParser multiDimScalarArrayParserFor(FieldDefn fieldDefn) {
ColumnMetadata innerSchema = fieldDefn.schemaFor(scalarTypeFor(fieldDefn), true);
int dims = fieldDefn.lookahead().dimensions();
ColumnMetadata fieldSchema = repeatedListSchemaFor(fieldDefn.key(), dims, innerSchema);
return multiDimScalarArrayFor(
fieldDefn.fieldWriterFor(fieldSchema), dims);
}
/**
* Create a map array column and its associated object array listener
* for the given key.
*/
public ElementParser objectArrayParserFor(FieldDefn fieldDefn) {
return objectArrayParserFor(fieldDefn, MetadataUtils.newMapArray(fieldDefn.key()), null);
}
/**
* Create a RepeatedList which contains (empty) Map objects using the provided
* schema. That is, create a multi-dimensional array of maps.
* The map fields are created on the fly, optionally using the provided schema.
*/
private ElementParser multiDimObjectArrayParserFor(FieldDefn fieldDefn) {
ColumnMetadata innerSchema = MetadataUtils.newMapArray(fieldDefn.key());
int dims = fieldDefn.lookahead().dimensions();
ColumnMetadata fieldSchema = repeatedListSchemaFor(fieldDefn.key(), dims, innerSchema);
return multiDimObjectArrayFor(fieldDefn.fieldWriterFor(fieldSchema), dims, null);
}
/**
* Create a RepeatedList which contains Unions. (Actually, this is an
* array of List objects internally.) The variant is variable, it makes no
* sense to specify a schema for the variant. Also, omitting the schema
* save a large amount of complexity that will likely never be needed.
*/
@SuppressWarnings("unused")
private ElementParser repeatedListOfVariantListenerFor(FieldDefn fieldDefn) {
ColumnMetadata innerSchema = MetadataUtils.newVariant(fieldDefn.key(), DataMode.REPEATED);
int dims = fieldDefn.lookahead().dimensions();
ColumnMetadata fieldSchema = repeatedListSchemaFor(fieldDefn.key(), dims, innerSchema);
return multiDimVariantArrayParserFor(fieldDefn.fieldWriterFor(fieldSchema), dims);
}
/**
* Convert the JSON type, obtained by looking ahead one token, to a Drill
* scalar type. Report an error if the JSON type does not map to a Drill
* type (which can occur in a context where we expect a scalar, but got
* an object or array.)
*/
private MinorType scalarTypeFor(FieldDefn fieldDefn) {
MinorType colType = drillTypeFor(fieldDefn.lookahead().type());
if (colType == null) {
throw loader().unsupportedJsonTypeException(fieldDefn.key(), fieldDefn.lookahead().type());
}
return colType;
}
public MinorType drillTypeFor(JsonType type) {
if (loader().options().allTextMode) {
return MinorType.VARCHAR;
}
switch (type) {
case BOOLEAN:
return MinorType.BIT;
case FLOAT:
return MinorType.FLOAT8;
case INTEGER:
if (loader().options().readNumbersAsDouble) {
return MinorType.FLOAT8;
} else {
return MinorType.BIGINT;
}
case STRING:
return MinorType.VARCHAR;
default:
throw new IllegalStateException(type.name());
}
}
}