blob: 4149850be979a76076b0ba8063a900f38ba54f8b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.protocol;
import java.util.ArrayList;
import java.util.List;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.vector.ValueVector;
/**
* Tracks changes to schemas via "snapshots" over time. That is, given
* a schema, tracks if a new schema is the same as the current one. For
* example, each batch output from a series of readers might be compared,
* as they are returned, to detect schema changes from one batch to the
* next. This class does not track vector-by-vector changes as a schema
* is built, but rather periodic "snapshots" at times determined by the
* operator.
* <p>
* If an operator is guaranteed to emit a consistent schema, then no
* checks need be done, and this tracker will report no schema change.
* On the other hand, a scanner might check schema more often. At least
* once per reader, and more often if a reader is "late-schema": if the
* reader can change schema batch-by-batch.
* <p>
* Drill defines "schema change" in a very specific way. Not only must
* the set of columns be the same, and have the same types, it must also
* be the case that the <b>vectors</b> that hold the columns be identical.
* Generated code contains references to specific vector objects; passing
* along different vectors requires new code to be generated and is treated
* as a schema change.
* <p>
* Drill has no concept of "same schema, different vectors." A change in
* vector is just as serious as a change in schema. Hence, operators
* try to use the same vectors for their entire lives. That is the change
* tracked here.
* <p>
* Schema versions start at 1. A schema version of 0 means that no
* output batch was ever presented.
*/
// TODO: Does not handle SV4 situations
public class SchemaTracker {
private int schemaVersion;
private BatchSchema currentSchema;
private List<ValueVector> currentVectors = new ArrayList<>();
public void trackSchema(VectorContainer newBatch) {
if (schemaVersion == 0 || ! isSameSchema(newBatch)) {
schemaVersion++;
captureSchema(newBatch);
}
}
private boolean isSameSchema(VectorContainer newBatch) {
if (currentVectors.size() != newBatch.getNumberOfColumns()) {
return false;
}
// Compare vectors by identity: not just same type,
// must be same instance.
for (int i = 0; i < currentVectors.size(); i++) {
if (currentVectors.get(i) != newBatch.getValueVector(i).getValueVector()) {
return false;
}
}
return true;
}
private void captureSchema(VectorContainer newBatch) {
currentVectors.clear();
for (VectorWrapper<?> vw : newBatch) {
currentVectors.add(vw.getValueVector());
}
currentSchema = newBatch.getSchema();
}
public int schemaVersion() { return schemaVersion; }
public BatchSchema schema() { return currentSchema; }
}