blob: a3f428d3cd0267bd5c299050095341a8c2bf2d1b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.vector.complex.fn;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.complex.impl.ComplexCopier;
import org.apache.drill.common.expression.PathSegment;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.vector.complex.writer.BaseWriter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.List;
public class JsonReaderUtils {
public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer,
Collection<SchemaPath> columns,
boolean allTextMode,
List<BaseWriter.ListWriter> emptyArrayWriters) {
List<BaseWriter.MapWriter> writerList = new ArrayList<>();
List<PathSegment> fieldPathList = new ArrayList<>();
BitSet emptyWriters = new BitSet(columns.size());
int fieldIndex = 0;
// first pass: collect which fields are empty
for (SchemaPath schemaPath : columns) {
PathSegment fieldPath = schemaPath.getRootSegment();
BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
fieldPath = fieldPath.getChild();
}
writerList.add(fieldWriter);
fieldPathList.add(fieldPath);
if (fieldWriter.isEmptyMap()) {
emptyWriters.set(fieldIndex, true);
}
if (fieldIndex == 0 && !allTextMode) {
// when allTextMode is false, there is not much benefit to producing all
// the empty fields; just produce 1 field. The reason is that the type of the
// fields is unknown, so if we produce multiple Integer fields by default, a
// subsequent batch that contains non-integer fields will error out in any case.
// Whereas, with allTextMode true, we are sure that all fields are going to be
// treated as varchar, so it makes sense to produce all the fields, and in fact
// is necessary in order to avoid schema change exceptions by downstream operators.
break;
}
fieldIndex++;
}
// second pass: create default typed vectors corresponding to empty fields
// Note: this is not easily do-able in 1 pass because the same fieldWriter
// may be shared by multiple fields whereas we want to keep track of all fields
// independently, so we rely on the emptyWriters.
for (int j = 0; j < fieldPathList.size(); j++) {
BaseWriter.MapWriter fieldWriter = writerList.get(j);
PathSegment fieldPath = fieldPathList.get(j);
if (emptyWriters.get(j)) {
if (allTextMode) {
fieldWriter.varChar(fieldPath.getNameSegment().getPath());
} else {
fieldWriter.integer(fieldPath.getNameSegment().getPath());
}
}
}
for (BaseWriter.ListWriter field : emptyArrayWriters) {
// checks that array has not been initialized
if (field.getValueCapacity() == 0) {
if (allTextMode) {
field.varChar();
} else {
field.integer();
}
}
}
}
/**
* Creates writers which correspond to the specified schema for specified root writer.
*
* @param writer parent writer for writers to create
* @param columns collection of columns for which writers should be created
* @param schema table schema
* @param allTextMode whether all primitive writers should be of varchar type
*/
public static void writeColumnsUsingSchema(BaseWriter.ComplexWriter writer,
Collection<SchemaPath> columns, TupleMetadata schema, boolean allTextMode) {
BaseWriter.MapWriter mapWriter = writer.rootAsMap();
for (SchemaPath column : columns) {
if (column.isDynamicStar()) {
writeSchemaColumns(schema, mapWriter, allTextMode);
} else {
ColumnMetadata columnMetadata = schema.metadata(column.getRootSegmentPath());
writeColumnToMapWriter(mapWriter, column.getRootSegment(), columnMetadata, allTextMode);
}
}
}
/**
* Creates writer for column which corresponds to specified {@code PathSegment column} with type taken from
* the specified {@code ColumnMetadata columnMetadata}.
* For the case when specified {@code PathSegment column} is map, writers only for its child segments will be created.
* For the case when specified {@code PathSegment column} is array segment, all child writers will be created.
*
* @param writer parent writer for writers to create
* @param column column for which writers should be created
* @param columnMetadata column metadata
* @param allTextMode whether all primitive writers should be of varchar type
*/
private static void writeColumnToMapWriter(BaseWriter.MapWriter writer,
PathSegment column, ColumnMetadata columnMetadata, boolean allTextMode) {
PathSegment child = column.getChild();
if (child != null && child.isNamed()) {
String name = column.getNameSegment().getPath();
ColumnMetadata childMetadata = columnMetadata.tupleSchema().metadata(name);
writeColumnToMapWriter(writer.map(name), child, childMetadata, allTextMode);
} else {
writeSingleOrArrayColumn(columnMetadata, writer, allTextMode);
}
}
/**
* Creates writers for specified {@code ColumnMetadata columnMetadata}.
* For the case when column is array, creates list writer and required child writers.
*
* @param columnMetadata column metadata
* @param writer parent writer for writers to create
* @param allTextMode whether all primitive writers should be of varchar type
*/
private static void writeSingleOrArrayColumn(ColumnMetadata columnMetadata,
BaseWriter.MapWriter writer, boolean allTextMode) {
if (columnMetadata.isArray()) {
writeArrayColumn(columnMetadata, writer.list(columnMetadata.name()), allTextMode);
} else {
writeColumn(columnMetadata, writer, allTextMode);
}
}
/**
* Creates writers for all columns taken from {@code TupleMetadata schema}.
*
* @param schema table or map schema
* @param fieldWriter parent writer for writers to create
* @param allTextMode whether all primitive writers should be of varchar type
*/
private static void writeSchemaColumns(TupleMetadata schema, BaseWriter.MapWriter fieldWriter,
boolean allTextMode) {
for (ColumnMetadata columnMetadata : schema) {
writeSingleOrArrayColumn(columnMetadata, fieldWriter, allTextMode);
}
}
/**
* Creates writers for specified {@code ColumnMetadata columnMetadata} considering its children.
* For the case of {@code TUPLE} or {@code MULTI_ARRAY}, all child writers will be created recursively.
*
* @param columnMetadata column metadata
* @param fieldWriter parent writer for writers to create
* @param allTextMode whether all primitive writers should be of varchar type
*/
private static void writeColumn(ColumnMetadata columnMetadata,
BaseWriter.MapWriter fieldWriter, boolean allTextMode) {
switch (columnMetadata.structureType()) {
case DICT:
writeSchemaColumns(
columnMetadata.tupleSchema(), fieldWriter.dict(columnMetadata.name()), allTextMode);
break;
case TUPLE:
writeSchemaColumns(
columnMetadata.tupleSchema(), fieldWriter.map(columnMetadata.name()), allTextMode);
break;
case MULTI_ARRAY:
writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(columnMetadata.name()), allTextMode);
break;
case PRIMITIVE:
if (allTextMode) {
fieldWriter.varChar(columnMetadata.name());
} else {
ComplexCopier.getMapWriterForType(
columnMetadata.majorType(), fieldWriter, columnMetadata.name());
}
break;
default:
throw new UnsupportedOperationException(
String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
}
}
/**
* Writes column which corresponds to specified {@code ColumnMetadata columnMetadata}
* into specified {@code BaseWriter.ListWriter fieldWriter}.
*
* @param columnMetadata column metadata
* @param fieldWriter parent writer for writers to create
* @param allTextMode whether all primitive writers should be of varchar type
*/
private static void writeArrayColumn(ColumnMetadata columnMetadata,
BaseWriter.ListWriter fieldWriter, boolean allTextMode) {
switch (columnMetadata.structureType()) {
case DICT:
writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.dict(), allTextMode);
break;
case TUPLE:
writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.map(), allTextMode);
break;
case MULTI_ARRAY:
writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(), allTextMode);
break;
case PRIMITIVE:
if (allTextMode) {
fieldWriter.varChar();
} else {
ComplexCopier.getListWriterForType(columnMetadata.majorType(), fieldWriter);
}
break;
default:
throw new UnsupportedOperationException(
String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
}
}
}