exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.vector.complex.fn;

 import org.apache.drill.exec.record.metadata.ColumnMetadata;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.vector.complex.impl.ComplexCopier;
 import org.apache.drill.common.expression.PathSegment;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.vector.complex.writer.BaseWriter;

 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Collection;
 import java.util.List;

 public class JsonReaderUtils {

   public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer,
                                     Collection<SchemaPath> columns,
                                     boolean allTextMode,
                                     List<BaseWriter.ListWriter> emptyArrayWriters) {

     List<BaseWriter.MapWriter> writerList = new ArrayList<>();
     List<PathSegment> fieldPathList = new ArrayList<>();
     BitSet emptyWriters = new BitSet(columns.size());
     int fieldIndex = 0;

     // first pass: collect which fields are empty
     for (SchemaPath schemaPath : columns) {
       PathSegment fieldPath = schemaPath.getRootSegment();
       BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
       while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
         fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
         fieldPath = fieldPath.getChild();
       }
       writerList.add(fieldWriter);
       fieldPathList.add(fieldPath);
       if (fieldWriter.isEmptyMap()) {
         emptyWriters.set(fieldIndex, true);
       }
       if (fieldIndex == 0 && !allTextMode) {
         // when allTextMode is false, there is not much benefit to producing all
         // the empty fields; just produce 1 field. The reason is that the type of the
         // fields is unknown, so if we produce multiple Integer fields by default, a
         // subsequent batch that contains non-integer fields will error out in any case.
         // Whereas, with allTextMode true, we are sure that all fields are going to be
         // treated as varchar, so it makes sense to produce all the fields, and in fact
         // is necessary in order to avoid schema change exceptions by downstream operators.
         break;
       }
       fieldIndex++;
     }

     // second pass: create default typed vectors corresponding to empty fields
     // Note: this is not easily do-able in 1 pass because the same fieldWriter
     // may be shared by multiple fields whereas we want to keep track of all fields
     // independently, so we rely on the emptyWriters.
     for (int j = 0; j < fieldPathList.size(); j++) {
       BaseWriter.MapWriter fieldWriter = writerList.get(j);
       PathSegment fieldPath = fieldPathList.get(j);
       if (emptyWriters.get(j)) {
         if (allTextMode) {
           fieldWriter.varChar(fieldPath.getNameSegment().getPath());
         } else {
           fieldWriter.integer(fieldPath.getNameSegment().getPath());
         }
       }
     }

     for (BaseWriter.ListWriter field : emptyArrayWriters) {
       // checks that array has not been initialized
       if (field.getValueCapacity() == 0) {
         if (allTextMode) {
           field.varChar();
         } else {
           field.integer();
         }
       }
     }
   }

   /**
    * Creates writers which correspond to the specified schema for specified root writer.
    *
    * @param writer      parent writer for writers to create
    * @param columns     collection of columns for which writers should be created
    * @param schema      table schema
    * @param allTextMode whether all primitive writers should be of varchar type
    */
   public static void writeColumnsUsingSchema(BaseWriter.ComplexWriter writer,
       Collection<SchemaPath> columns, TupleMetadata schema, boolean allTextMode) {
     BaseWriter.MapWriter mapWriter = writer.rootAsMap();
     for (SchemaPath column : columns) {
       if (column.isDynamicStar()) {
         writeSchemaColumns(schema, mapWriter, allTextMode);
       } else {
         ColumnMetadata columnMetadata = schema.metadata(column.getRootSegmentPath());
         writeColumnToMapWriter(mapWriter, column.getRootSegment(), columnMetadata, allTextMode);
       }
     }
   }

   /**
    * Creates writer for column which corresponds to specified {@code PathSegment column} with type taken from
    * the specified {@code ColumnMetadata columnMetadata}.
    * For the case when specified {@code PathSegment column} is map, writers only for its child segments will be created.
    * For the case when specified {@code PathSegment column} is array segment, all child writers will be created.
    *
    * @param writer         parent writer for writers to create
    * @param column         column for which writers should be created
    * @param columnMetadata column metadata
    * @param allTextMode    whether all primitive writers should be of varchar type
    */
   private static void writeColumnToMapWriter(BaseWriter.MapWriter writer,
       PathSegment column, ColumnMetadata columnMetadata, boolean allTextMode) {
     PathSegment child = column.getChild();
     if (child != null && child.isNamed()) {
       String name = column.getNameSegment().getPath();
       ColumnMetadata childMetadata = columnMetadata.tupleSchema().metadata(name);
       writeColumnToMapWriter(writer.map(name), child, childMetadata, allTextMode);
     } else {
       writeSingleOrArrayColumn(columnMetadata, writer, allTextMode);
     }
   }

   /**
    * Creates writers for specified {@code ColumnMetadata columnMetadata}.
    * For the case when column is array, creates list writer and required child writers.
    *
    * @param columnMetadata column metadata
    * @param writer         parent writer for writers to create
    * @param allTextMode    whether all primitive writers should be of varchar type
    */
   private static void writeSingleOrArrayColumn(ColumnMetadata columnMetadata,
       BaseWriter.MapWriter writer, boolean allTextMode) {
     if (columnMetadata.isArray()) {
       writeArrayColumn(columnMetadata, writer.list(columnMetadata.name()), allTextMode);
     } else {
       writeColumn(columnMetadata, writer, allTextMode);
     }
   }

   /**
    * Creates writers for all columns taken from {@code TupleMetadata schema}.
    *
    * @param schema      table or map schema
    * @param fieldWriter parent writer for writers to create
    * @param allTextMode whether all primitive writers should be of varchar type
    */
   private static void writeSchemaColumns(TupleMetadata schema, BaseWriter.MapWriter fieldWriter,
       boolean allTextMode) {
     for (ColumnMetadata columnMetadata : schema) {
       writeSingleOrArrayColumn(columnMetadata, fieldWriter, allTextMode);
     }
   }

   /**
    * Creates writers for specified {@code ColumnMetadata columnMetadata} considering its children.
    * For the case of {@code TUPLE} or {@code MULTI_ARRAY}, all child writers will be created recursively.
    *
    * @param columnMetadata column metadata
    * @param fieldWriter    parent writer for writers to create
    * @param allTextMode    whether all primitive writers should be of varchar type
    */
   private static void writeColumn(ColumnMetadata columnMetadata,
       BaseWriter.MapWriter fieldWriter, boolean allTextMode) {
     switch (columnMetadata.structureType()) {
       case DICT:
         writeSchemaColumns(
             columnMetadata.tupleSchema(), fieldWriter.dict(columnMetadata.name()), allTextMode);
         break;
       case TUPLE:
         writeSchemaColumns(
             columnMetadata.tupleSchema(), fieldWriter.map(columnMetadata.name()), allTextMode);
         break;
       case MULTI_ARRAY:
         writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(columnMetadata.name()), allTextMode);
         break;
       case PRIMITIVE:
         if (allTextMode) {
           fieldWriter.varChar(columnMetadata.name());
         } else {
           ComplexCopier.getMapWriterForType(
               columnMetadata.majorType(), fieldWriter, columnMetadata.name());
         }
         break;
       default:
         throw new UnsupportedOperationException(
             String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
     }
   }

   /**
    * Writes column which corresponds to specified {@code ColumnMetadata columnMetadata}
    * into specified {@code BaseWriter.ListWriter fieldWriter}.
    *
    * @param columnMetadata column metadata
    * @param fieldWriter    parent writer for writers to create
    * @param allTextMode    whether all primitive writers should be of varchar type
    */
   private static void writeArrayColumn(ColumnMetadata columnMetadata,
       BaseWriter.ListWriter fieldWriter, boolean allTextMode) {
     switch (columnMetadata.structureType()) {
       case DICT:
         writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.dict(), allTextMode);
         break;
       case TUPLE:
         writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.map(), allTextMode);
         break;
       case MULTI_ARRAY:
         writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(), allTextMode);
         break;
       case PRIMITIVE:
         if (allTextMode) {
           fieldWriter.varChar();
         } else {
           ComplexCopier.getListWriterForType(columnMetadata.majorType(), fieldWriter);
         }
         break;
       default:
         throw new UnsupportedOperationException(
             String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.vector.complex.fn;

	import org.apache.drill.exec.record.metadata.ColumnMetadata;
	import org.apache.drill.exec.record.metadata.TupleMetadata;
	import org.apache.drill.exec.vector.complex.impl.ComplexCopier;
	import org.apache.drill.common.expression.PathSegment;
	import org.apache.drill.common.expression.SchemaPath;
	import org.apache.drill.exec.vector.complex.writer.BaseWriter;

	import java.util.ArrayList;
	import java.util.BitSet;
	import java.util.Collection;
	import java.util.List;

	public class JsonReaderUtils {

	public static void ensureAtLeastOneField(BaseWriter.ComplexWriter writer,
	Collection<SchemaPath> columns,
	boolean allTextMode,
	List<BaseWriter.ListWriter> emptyArrayWriters) {

	List<BaseWriter.MapWriter> writerList = new ArrayList<>();
	List<PathSegment> fieldPathList = new ArrayList<>();
	BitSet emptyWriters = new BitSet(columns.size());
	int fieldIndex = 0;

	// first pass: collect which fields are empty
	for (SchemaPath schemaPath : columns) {
	PathSegment fieldPath = schemaPath.getRootSegment();
	BaseWriter.MapWriter fieldWriter = writer.rootAsMap();
	while (fieldPath.getChild() != null && !fieldPath.getChild().isArray()) {
	fieldWriter = fieldWriter.map(fieldPath.getNameSegment().getPath());
	fieldPath = fieldPath.getChild();
	}
	writerList.add(fieldWriter);
	fieldPathList.add(fieldPath);
	if (fieldWriter.isEmptyMap()) {
	emptyWriters.set(fieldIndex, true);
	}
	if (fieldIndex == 0 && !allTextMode) {
	// when allTextMode is false, there is not much benefit to producing all
	// the empty fields; just produce 1 field. The reason is that the type of the
	// fields is unknown, so if we produce multiple Integer fields by default, a
	// subsequent batch that contains non-integer fields will error out in any case.
	// Whereas, with allTextMode true, we are sure that all fields are going to be
	// treated as varchar, so it makes sense to produce all the fields, and in fact
	// is necessary in order to avoid schema change exceptions by downstream operators.
	break;
	}
	fieldIndex++;
	}

	// second pass: create default typed vectors corresponding to empty fields
	// Note: this is not easily do-able in 1 pass because the same fieldWriter
	// may be shared by multiple fields whereas we want to keep track of all fields
	// independently, so we rely on the emptyWriters.
	for (int j = 0; j < fieldPathList.size(); j++) {
	BaseWriter.MapWriter fieldWriter = writerList.get(j);
	PathSegment fieldPath = fieldPathList.get(j);
	if (emptyWriters.get(j)) {
	if (allTextMode) {
	fieldWriter.varChar(fieldPath.getNameSegment().getPath());
	} else {
	fieldWriter.integer(fieldPath.getNameSegment().getPath());
	}
	}
	}

	for (BaseWriter.ListWriter field : emptyArrayWriters) {
	// checks that array has not been initialized
	if (field.getValueCapacity() == 0) {
	if (allTextMode) {
	field.varChar();
	} else {
	field.integer();
	}
	}
	}
	}

	/**
	* Creates writers which correspond to the specified schema for specified root writer.
	*
	* @param writer parent writer for writers to create
	* @param columns collection of columns for which writers should be created
	* @param schema table schema
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	public static void writeColumnsUsingSchema(BaseWriter.ComplexWriter writer,
	Collection<SchemaPath> columns, TupleMetadata schema, boolean allTextMode) {
	BaseWriter.MapWriter mapWriter = writer.rootAsMap();
	for (SchemaPath column : columns) {
	if (column.isDynamicStar()) {
	writeSchemaColumns(schema, mapWriter, allTextMode);
	} else {
	ColumnMetadata columnMetadata = schema.metadata(column.getRootSegmentPath());
	writeColumnToMapWriter(mapWriter, column.getRootSegment(), columnMetadata, allTextMode);
	}
	}
	}

	/**
	* Creates writer for column which corresponds to specified {@code PathSegment column} with type taken from
	* the specified {@code ColumnMetadata columnMetadata}.
	* For the case when specified {@code PathSegment column} is map, writers only for its child segments will be created.
	* For the case when specified {@code PathSegment column} is array segment, all child writers will be created.
	*
	* @param writer parent writer for writers to create
	* @param column column for which writers should be created
	* @param columnMetadata column metadata
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	private static void writeColumnToMapWriter(BaseWriter.MapWriter writer,
	PathSegment column, ColumnMetadata columnMetadata, boolean allTextMode) {
	PathSegment child = column.getChild();
	if (child != null && child.isNamed()) {
	String name = column.getNameSegment().getPath();
	ColumnMetadata childMetadata = columnMetadata.tupleSchema().metadata(name);
	writeColumnToMapWriter(writer.map(name), child, childMetadata, allTextMode);
	} else {
	writeSingleOrArrayColumn(columnMetadata, writer, allTextMode);
	}
	}

	/**
	* Creates writers for specified {@code ColumnMetadata columnMetadata}.
	* For the case when column is array, creates list writer and required child writers.
	*
	* @param columnMetadata column metadata
	* @param writer parent writer for writers to create
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	private static void writeSingleOrArrayColumn(ColumnMetadata columnMetadata,
	BaseWriter.MapWriter writer, boolean allTextMode) {
	if (columnMetadata.isArray()) {
	writeArrayColumn(columnMetadata, writer.list(columnMetadata.name()), allTextMode);
	} else {
	writeColumn(columnMetadata, writer, allTextMode);
	}
	}

	/**
	* Creates writers for all columns taken from {@code TupleMetadata schema}.
	*
	* @param schema table or map schema
	* @param fieldWriter parent writer for writers to create
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	private static void writeSchemaColumns(TupleMetadata schema, BaseWriter.MapWriter fieldWriter,
	boolean allTextMode) {
	for (ColumnMetadata columnMetadata : schema) {
	writeSingleOrArrayColumn(columnMetadata, fieldWriter, allTextMode);
	}
	}

	/**
	* Creates writers for specified {@code ColumnMetadata columnMetadata} considering its children.
	* For the case of {@code TUPLE} or {@code MULTI_ARRAY}, all child writers will be created recursively.
	*
	* @param columnMetadata column metadata
	* @param fieldWriter parent writer for writers to create
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	private static void writeColumn(ColumnMetadata columnMetadata,
	BaseWriter.MapWriter fieldWriter, boolean allTextMode) {
	switch (columnMetadata.structureType()) {
	case DICT:
	writeSchemaColumns(
	columnMetadata.tupleSchema(), fieldWriter.dict(columnMetadata.name()), allTextMode);
	break;
	case TUPLE:
	writeSchemaColumns(
	columnMetadata.tupleSchema(), fieldWriter.map(columnMetadata.name()), allTextMode);
	break;
	case MULTI_ARRAY:
	writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(columnMetadata.name()), allTextMode);
	break;
	case PRIMITIVE:
	if (allTextMode) {
	fieldWriter.varChar(columnMetadata.name());
	} else {
	ComplexCopier.getMapWriterForType(
	columnMetadata.majorType(), fieldWriter, columnMetadata.name());
	}
	break;
	default:
	throw new UnsupportedOperationException(
	String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
	}
	}

	/**
	* Writes column which corresponds to specified {@code ColumnMetadata columnMetadata}
	* into specified {@code BaseWriter.ListWriter fieldWriter}.
	*
	* @param columnMetadata column metadata
	* @param fieldWriter parent writer for writers to create
	* @param allTextMode whether all primitive writers should be of varchar type
	*/
	private static void writeArrayColumn(ColumnMetadata columnMetadata,
	BaseWriter.ListWriter fieldWriter, boolean allTextMode) {
	switch (columnMetadata.structureType()) {
	case DICT:
	writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.dict(), allTextMode);
	break;
	case TUPLE:
	writeSchemaColumns(columnMetadata.tupleSchema(), fieldWriter.map(), allTextMode);
	break;
	case MULTI_ARRAY:
	writeArrayColumn(columnMetadata.childSchema(), fieldWriter.list(), allTextMode);
	break;
	case PRIMITIVE:
	if (allTextMode) {
	fieldWriter.varChar();
	} else {
	ComplexCopier.getListWriterForType(columnMetadata.majorType(), fieldWriter);
	}
	break;
	default:
	throw new UnsupportedOperationException(
	String.format("Unsupported type [%s] for column [%s].", columnMetadata.majorType(), columnMetadata.name()));
	}
	}
	}