blob: 14e61fa14321ddf39c50797b9ac1fd5197b72744 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.resultSet.impl;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.resultSet.ResultVectorCache;
import org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.IsSetVectorState;
import org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState;
import org.apache.drill.exec.physical.resultSet.impl.UnionState.UnionVectorState;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.VariantMetadata;
import org.apache.drill.exec.record.metadata.VariantSchema;
import org.apache.drill.exec.vector.accessor.ObjectWriter;
import org.apache.drill.exec.vector.accessor.VariantWriter;
import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
import org.apache.drill.exec.vector.accessor.writer.ListWriterImpl;
import org.apache.drill.exec.vector.accessor.writer.SimpleListShim;
import org.apache.drill.exec.vector.accessor.writer.UnionVectorShim;
import org.apache.drill.exec.vector.accessor.writer.UnionWriterImpl;
import org.apache.drill.exec.vector.accessor.writer.WriterEvents;
import org.apache.drill.exec.vector.complex.ListVector;
import org.apache.drill.exec.vector.complex.UnionVector;
import org.apache.drill.exec.vector.complex.impl.UnionWriter;
/**
* Represents the contents of a list vector. A list vector is an odd creature.
* It starts as a list of nothing, evolves to be a nullable array of a single
* type, then becomes a nullable array of nullable unions holding nullable
* types.
* <p>
* At the writer level, the list consists of two parts: an array writer and
* a union writer. The union writer is needed because, unless the client tells
* us otherwise, we must be prepared for the list to become a union.
* <p>
* Holds the column states for the "columns" that make up the type members
* of the union, and implements the writer callbacks to add members to
* a list (disguised as a union), creating the actual union with the
* number of member types becomes two or more.
* <p>
* This class is similar to the {@link UnionState}, except that this
* version must handle the list transitions from no members to single
* member to union, and so this class is a bit more complex than the
* simple union case.
* <p>
* This implementation is based on a desired invariant: that once a client
* obtains a writer for the list, that writer never becomes invalid. This
* means we must carefully consider the list lifecycle. The list is
* represented as an array writer. When the list has
* no members, there would be no child for the array writer, a call to
* <tt>listArray.entry()</tt> would have to return null, which would be
* awkward and unlike any other writer use case. Once the list has a single
* type, the call to <tt>listArray.entry()</tt> might return a writer for
* that type. But, once the list becomes a repeated union, then
* <tt>listArray.entry()</tt> would have to return a union writer. This is
* the kind of muddy semantics we wish to avoid.
* <p>
* Instead, we model the list as a repeated union at all times. When the
* list has no type, then the list is a repeated union with no members.
* Once the list has a member, we have a repeated union of one member type.
* Finally, when adding another type, we have a repeated union of two
* types. The key is, in all cases, <tt>listArray.entry()</tt> returns
* a {@link UnionWriter}, so the client gets a consistent view.
* <p>
* Since the list itself changes form (no type, single type, then
* union), we hide that lifecycle internal to the writer and to this
* list state. The result is that the client need not care about the
* odd list lifecycle. But, on the flip side, this class, and the union
* writer, must go out of their way to hide these details.
* <p>
* At the writer level, the union writer uses "shims" to map from the union
* view to the actual list representation (no type, single type or union.)
* <p>
* At this level, this class must handle those cases as well, creating the
* union (by promoting the list) when needed. The result is a bit complex
* (for the code here), but simple for the client.
*/
public class ListState extends ContainerState
implements VariantWriter.VariantWriterListener {
/**
* Wrapper around the list vector (and its optional contained union).
* Manages the state of the "overhead" vectors such as the bits and
* offset vectors for the list, and (via the union vector state) the
* types vector for the union. The union vector state starts of as
* a dummy state (before the list has been "promoted" to a union)
* then becomes populated with the union state once the list is
* promoted.
*/
protected static class ListVectorState implements VectorState {
private final ColumnMetadata schema;
private final ListVector vector;
private final VectorState bitsVectorState;
private final VectorState offsetVectorState;
private VectorState memberVectorState;
public ListVectorState(UnionWriterImpl writer, ListVector vector) {
this.schema = writer.schema();
this.vector = vector;
bitsVectorState = new IsSetVectorState(writer, vector.getBitsVector());
offsetVectorState = new OffsetVectorState(writer, vector.getOffsetVector(), writer.elementPosition());
memberVectorState = new NullVectorState();
}
public ListVectorState(ListWriterImpl writer, WriterEvents elementWriter, ListVector vector) {
this.schema = writer.schema();
this.vector = vector;
bitsVectorState = new IsSetVectorState(writer, vector.getBitsVector());
offsetVectorState = new OffsetVectorState(writer, vector.getOffsetVector(), elementWriter);
memberVectorState = new NullVectorState();
}
private void replaceMember(VectorState memberState) {
memberVectorState = memberState;
}
private VectorState memberVectorState() { return memberVectorState; }
@Override
public int allocate(int cardinality) {
return bitsVectorState.allocate(cardinality) +
offsetVectorState.allocate(cardinality + 1) +
memberVectorState.allocate(childCardinality(cardinality));
}
@Override
public void rollover(int cardinality) {
bitsVectorState.rollover(cardinality);
offsetVectorState.rollover(cardinality);
memberVectorState.rollover(childCardinality(cardinality));
}
private int childCardinality(int cardinality) {
return cardinality * schema.expectedElementCount();
}
@Override
public void harvestWithLookAhead() {
bitsVectorState.harvestWithLookAhead();
offsetVectorState.harvestWithLookAhead();
memberVectorState.harvestWithLookAhead();
}
@Override
public void startBatchWithLookAhead() {
bitsVectorState.startBatchWithLookAhead();
offsetVectorState.startBatchWithLookAhead();
memberVectorState.startBatchWithLookAhead();
}
@Override
public void close() {
bitsVectorState.close();
offsetVectorState.close();
memberVectorState.close();
}
@SuppressWarnings("unchecked")
@Override
public ListVector vector() {
return vector;
}
@Override
public boolean isProjected() {
return true;
}
@Override
public void dump(HierarchicalFormatter format) {
// TODO Auto-generated method stub
}
}
/**
* Map of types to member columns, used to track the set of child
* column states for this list. This map mimics the actual set of
* vectors in the union (or list, before a list becomes a union),
* and matches the set of child writers in the union writer.
*/
private final Map<MinorType, ColumnState> columns = new HashMap<>();
public ListState(LoaderInternals loader, ResultVectorCache vectorCache) {
super(loader, vectorCache);
}
public VariantMetadata variantSchema() {
return parentColumn.schema().variantSchema();
}
public ListWriterImpl listWriter() {
return (ListWriterImpl) parentColumn.writer.array();
}
private UnionWriterImpl unionWriter() {
return (UnionWriterImpl) listWriter().variant();
}
private ListVector listVector() {
return parentColumn.vector();
}
private UnionVector unionVector() {
return (UnionVector) listVectorState().memberVectorState().vector();
}
private ListVectorState listVectorState() {
return (ListVectorState) parentColumn.vectorState();
}
private boolean isSingleType() {
return variantSchema().isSimple();
}
@Override
public ObjectWriter addType(MinorType type) {
return addMember(VariantSchema.memberMetadata(type));
}
@Override
public ObjectWriter addMember(ColumnMetadata member) {
if (variantSchema().hasType(member.type())) {
throw new IllegalArgumentException("Duplicate type: " + member.type().toString());
}
if (isSingleType()) {
throw new IllegalStateException("List is defined to contains a single type.");
}
return addColumn(member).writer();
}
/**
* Add a new column representing a type within the list. This is where the list
* strangeness occurs. The list starts with no type, then evolves to have a single
* type (held by the list vector). Upon the second type, the list vector is modified
* to hold a union vector, which then holds the existing type and the new type.
* After that, the third and later types simply are added to the union.
* Very, very ugly, but it is how the list vector works until we improve it...
* <p>
* We must make three parallel changes:
* <ul>
* <li>Modify the list vector structure.</li>
* <li>Modify the union writer structure. (If a list type can evolve, then the
* writer structure is an array of unions. But, since the union itself does
* not exist in the 0 and 1 type cases, we use "shims" to model these odd
* cases.</li>
* <li>Modify the vector state for the list. If the list is "promoted" to a
* union, then add the union to the list vector's state for management in
* vector events.</li>
* </ul>
*/
@Override
protected void addColumn(ColumnState colState) {
final MinorType type = colState.schema().type();
assert ! columns.containsKey(type);
// Add the new column (type) to metadata.
final int prevColCount = columns.size();
columns.put(type, colState);
if (prevColCount == 0) {
addFirstType(colState);
} else if (prevColCount == 1) {
addSecondType(colState);
} else {
// Already have a union; just add another type.
unionVector().addType(colState.vector());
}
}
private void addFirstType(ColumnState colState) {
// Going from no types to one type.
// Add the member to the list as its data vector.
listVector().setChildVector(colState.vector());
// Don't add the type to the vector state; we manage it as part
// of the collection of member columns.
// Note that we may have written 1 or more nulls to the array
// before we make this 0-to-1 type transition. If the new type is
// a scalar, it is nullable. Automatic back-fill will fill prior values
// with null, so there is nothing to do here.
//
// Note, however, that if this first type is a map, there is no way
// to mark that map as null; we loose the nullability state. However,
// if we later promote the single-type map to a union, we can't
// recover the null states and so the previously-null values won't
// be null. We could fix this, for a single batch, by keeping track
// of the null positions. But, there is no way to mark later maps
// as null. Another choice would be to force a transition directly
// to a union if the first type is a map. None of this has ever worked
// and so is left as an exercise for later once we work out what we
// actually want to support.
// Create the single type shim.
unionWriter().bindShim(new SimpleListShim());
}
/**
* Perform the delicate dance of promoting a list vector from a single type to
* a union, while leaving the writer client blissfully ignorant that the underlying
* vector representation just did a radical change. Key tasks:
* <ul>
* <li>Create the new column (type member) requested by the client.</li>
* <li>The List vector currently has a single type. Promote the list to
* a union, adding the existing type (column) as the first union member.</li>
* <li>Initialize the union's type vector with either the type of the existing
* column, or null, depending on the setting of the is-set bits in the existing
* column vector.</li>
* <li>Since we've written values into the union's type vector, mark the
* last-write position in the union vector's type vector writer to reflect
* these writes. (Otherwise, the writer will helpfully zero-fill the previous
* positions as part of it's back-fill handling.</li>
* <li>Replace the single-type shim in the union vector with a full union
* shim.</li>
* <li>Move the existing column writer or the member column across from the
* single-writer shim to the new union shim.</li>
* <li>Augment the list vector's vector state to include a vector state for
* the newly created union vector.</li>
* </ul>
* <p>
* Here, yet again, an editorial comment might be useful. List vectors are
* very strange and not at all well designed for high-speed writing. They are
* too complex; too much can go wrong and there are too many states to handle.
* Not only that, variant types don't play well with a relational model like
* SQL. This code works, but the overall list concept really needs rethinking.
*
* @param colState the column state for the newly added type column; the
* one causing the list to change from single-type to a union
*/
private void addSecondType(ColumnState colState) {
final UnionWriterImpl unionWriter = unionWriter();
final ListVector listVector = listVector();
// Going from one type to a union
// Convert the list from single type to a union,
// moving across the previous type vector.
final int typeFillCount = unionWriter.elementPosition().writeIndex();
final UnionVector unionVector = listVector.convertToUnion(
innerCardinality(), typeFillCount);
unionVector.addType(colState.vector());
// Replace the single-type shim with a union shim, copying
// across the existing writer.
final SimpleListShim oldShim = (SimpleListShim) unionWriter.shim();
final UnionVectorShim newShim = new UnionVectorShim(unionVector);
unionWriter.bindShim(newShim);
newShim.addMemberWriter(oldShim.memberWriter());
newShim.initTypeIndex(typeFillCount);
// The union vector will be managed within the list vector state.
// (Do this last because the union vector state expects the union
// writer to be operating in "union mode".
listVectorState().replaceMember(new UnionVectorState(unionVector, unionWriter));
}
/**
* Set the one and only type when building a single-type list.
*
* @param memberState the column state for the list elements
*/
public void setSubColumn(ColumnState memberState) {
assert columns.isEmpty();
columns.put(memberState.schema().type(), memberState);
}
@Override
protected Collection<ColumnState> columnStates() {
return columns.values();
}
@Override
public int innerCardinality() {
return parentColumn.innerCardinality();
}
@Override
protected boolean isVersioned() { return false; }
}