blob: a30bda94a1b98d47480e1e4c2e82eec158a388af [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.validate;
import static org.apache.drill.test.rowSet.RowSetUtilities.intArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.RepeatedVarCharVector;
import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.VarCharVector;
import org.apache.drill.test.SubOperatorTest;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(RowSetTests.class)
public class TestBatchValidator extends SubOperatorTest {
public static class CapturingReporter implements BatchValidator.ErrorReporter {
public List<String> errors = new ArrayList<>();
@Override
public void error(String name, ValueVector vector, String msg) {
error(String.format("%s (%s): %s",
name, vector.getClass().getSimpleName(), msg));
}
@Override
public void warn(String name, ValueVector vector, String msg) {
error(name, vector, msg);
}
@Override
public void error(String msg) {
errors.add(msg);
}
@Override
public int errorCount() {
return errors.size();
}
}
@Test
public void testValidFixed() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addNullable("b", MinorType.INT)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow(10, 100)
.addRow(20, 120)
.addRow(30, null)
.addRow(40, 140)
.build();
batch.clear();
}
@Test
public void testValidVariable() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.addNullable("b", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("col1.1", "col1.2")
.addRow("col2.1", "col2.2")
.addRow("col3.1", null)
.addRow("col4.1", "col4.2")
.build();
batch.clear();
}
@Test
public void testValidRepeated() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT, DataMode.REPEATED)
.add("b", MinorType.VARCHAR, DataMode.REPEATED)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow(intArray(), strArray())
.addRow(intArray(1, 2, 3), strArray("fred", "barney", "wilma"))
.addRow(intArray(4), strArray("dino"))
.build();
assertTrue(BatchValidator.validate(batch.vectorAccessible()));
batch.clear();
}
@Test
public void testVariableMissingLast() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("x")
.addRow("y")
.addRow("z")
.build();
// Here we are evil: stomp on the last offset to simulate corruption.
// Don't do this in real code!
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
VarCharVector vc = (VarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
assertTrue(ov.getAccessor().get(3) > 0);
ov.getMutator().set(3, 0);
// Validator should catch the error.
checkForError(batch, BAD_OFFSETS);
batch.clear();
}
private static void checkForError(SingleRowSet batch, String expectedError) {
CapturingReporter cr = new CapturingReporter();
new BatchValidator(cr).validateBatch(batch.vectorAccessible(), batch.rowCount());
assertTrue(cr.errors.size() > 0);
Pattern p = Pattern.compile(expectedError);
Matcher m = p.matcher(cr.errors.get(0));
assertTrue(m.find());
}
@Test
public void testVariableCorruptFirst() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("x")
.addRow("y")
.addRow("z")
.build();
zapOffset(batch, 0, 1);
// Validator should catch the error.
checkForError(batch, "Offset \\(0\\) must be 0");
batch.clear();
}
public void zapOffset(SingleRowSet batch, int index, int bogusValue) {
// Here we are evil: stomp on an offset to simulate corruption.
// Don't do this in real code!
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
VarCharVector vc = (VarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(index, bogusValue);
}
@Test
public void testVariableCorruptMiddleLow() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("xx")
.addRow("yy")
.addRow("zz")
.build();
zapOffset(batch, 2, 1);
// Validator should catch the error.
checkForError(batch, BAD_OFFSETS);
batch.clear();
}
@Test
public void testVariableCorruptMiddleHigh() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("xx")
.addRow("yy")
.addRow("zz")
.build();
zapOffset(batch, 1, 10);
// Validator should catch the error.
checkForError(batch, "Invalid offset");
batch.clear();
}
@Test
public void testVariableCorruptLastOutOfRange() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow("xx")
.addRow("yy")
.addRow("zz")
.build();
zapOffset(batch, 3, 100_000);
// Validator should catch the error.
checkForError(batch, "Invalid offset");
batch.clear();
}
private static final String BAD_OFFSETS = "Offset vector .* contained \\d+, expected >= \\d+";
@Test
public void testRepeatedBadArrayOffset() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR, DataMode.REPEATED)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow((Object) strArray())
.addRow((Object) strArray("fred", "barney", "wilma"))
.addRow((Object) strArray("dino"))
.build();
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
RepeatedVarCharVector vc = (RepeatedVarCharVector) v;
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(3, 1);
checkForError(batch, BAD_OFFSETS);
batch.clear();
}
@Test
public void testRepeatedBadValueOffset() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.VARCHAR, DataMode.REPEATED)
.buildSchema();
SingleRowSet batch = fixture.rowSetBuilder(schema)
.addRow((Object) strArray())
.addRow((Object) strArray("fred", "barney", "wilma"))
.addRow((Object) strArray("dino"))
.build();
VectorAccessible va = batch.vectorAccessible();
ValueVector v = va.iterator().next().getValueVector();
RepeatedVarCharVector rvc = (RepeatedVarCharVector) v;
VarCharVector vc = rvc.getDataVector();
UInt4Vector ov = vc.getOffsetVector();
ov.getMutator().set(4, 100_000);
checkForError(batch, "Invalid offset");
batch.clear();
}
}