blob: 4057f3365ac165c610dcc64a3cba040830ee322f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.rowSet;
import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray;
import static org.apache.drill.test.rowSet.RowSetUtilities.mapValue;
import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import java.util.Iterator;
import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.ArrayReader;
import org.apache.drill.exec.vector.accessor.ArrayWriter;
import org.apache.drill.exec.vector.accessor.ScalarReader;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.apache.drill.exec.vector.accessor.TupleReader;
import org.apache.drill.exec.vector.accessor.TupleWriter;
import org.apache.drill.exec.vector.complex.MapVector;
import org.apache.drill.exec.vector.complex.RepeatedMapVector;
import org.apache.drill.test.ClientFixture;
import org.apache.drill.test.ClusterFixture;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.apache.drill.test.rowSet.RowSetUtilities;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Test map support in the column readers and writers.
* <p>
* The tests here are a simplified form of those in
* TestResultSetLoaderMaps -- the RowSet mechanism requires a fixed
* schema, which makes this mechanism far simpler.
*/
@Category(RowSetTests.class)
public class TestMapAccessors extends SubOperatorTest {
@Test
public void testBasics() {
final TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addMap("m")
.add("c", MinorType.INT)
.add("d", MinorType.VARCHAR)
.resumeSchema()
.add("e", MinorType.VARCHAR)
.buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
// Verify structure and schema
final TupleMetadata actualSchema = rootWriter.tupleSchema();
assertEquals(3, actualSchema.size());
assertTrue(actualSchema.metadata(1).isMap());
assertEquals(2, actualSchema.metadata("m").tupleSchema().size());
assertEquals(2, actualSchema.column("m").getChildren().size());
// Write a row the way that clients will do.
final ScalarWriter aWriter = rootWriter.scalar("a");
final TupleWriter mWriter = rootWriter.tuple("m");
final ScalarWriter cWriter = mWriter.scalar("c");
final ScalarWriter dWriter = mWriter.scalar("d");
final ScalarWriter eWriter = rootWriter.scalar("e");
aWriter.setInt(10);
cWriter.setInt(110);
dWriter.setString("fred");
eWriter.setString("pebbles");
rootWriter.save();
// Write another using the test-time conveniences
rootWriter.addRow(20, mapValue(210, "barney"), "bam-bam");
RowSet result = builder.build();
assertEquals(2, result.rowCount());
// Validate internal structure.
VectorContainer container = result.container();
assertEquals(3, container.getNumberOfColumns());
ValueVector v = container.getValueVector(1).getValueVector();
assertTrue(v instanceof MapVector);
MapVector mv = (MapVector) v;
assertEquals(2, mv.getAccessor().getValueCount());
// Validate data. Do so using the readers to avoid verifying
// using the very mechanisms we want to test.
RowSetReader rootReader = result.reader();
final ScalarReader aReader = rootReader.scalar("a");
final TupleReader mReader = rootReader.tuple("m");
final ScalarReader cReader = mReader.scalar("c");
final ScalarReader dReader = mReader.scalar("d");
final ScalarReader eReader = rootReader.scalar("e");
rootReader.next();
assertEquals(10, aReader.getInt());
assertEquals(110, cReader.getInt());
assertEquals("fred", dReader.getString());
assertEquals("pebbles", eReader.getString());
rootReader.next();
assertEquals(20, aReader.getInt());
assertEquals(210, cReader.getInt());
assertEquals("barney", dReader.getString());
assertEquals("bam-bam", eReader.getString());
// Verify using the convenience methods.
final SingleRowSet expected = fixture.rowSetBuilder(schema)
.addRow(10, mapValue(110, "fred"), "pebbles")
.addRow(20, mapValue(210, "barney"), "bam-bam")
.build();
new RowSetComparison(expected).verify(result);
// Test that the row set rebuilds its internal structure from
// a vector container.
RowSet wrapped = fixture.wrap(result.container());
RowSetUtilities.verify(expected, wrapped);
}
/**
* Create nested maps. Use required, variable-width columns since
* those require the most processing and are most likely to
* fail if anything is out of place.
*/
@Test
public void testNestedMapsRequired() {
final TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addMap("m1")
.add("b", MinorType.VARCHAR)
.addMap("m2")
.add("c", MinorType.VARCHAR)
.resumeMap()
.add("d", MinorType.VARCHAR)
.resumeSchema()
.buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
rootWriter.addRow(10, mapValue("b1", mapValue("c1"), "d1"));
rootWriter.addRow(20, mapValue("b2", mapValue("c2"), "d2"));
// Validate directly
RowSet result = builder.build();
RowSetReader rootReader = result.reader();
TupleReader m1Reader = rootReader.tuple("m1");
TupleReader m2Reader = m1Reader.tuple("m2");
rootReader.next();
assertEquals(10, rootReader.scalar("a").getInt());
assertEquals("b1", m1Reader.scalar("b").getString());
assertEquals("c1", m2Reader.scalar("c").getString());
assertEquals("d1", m1Reader.scalar("d").getString());
rootReader.next();
assertEquals(20, rootReader.scalar("a").getInt());
assertEquals("b2", m1Reader.scalar("b").getString());
assertEquals("c2", m2Reader.scalar("c").getString());
assertEquals("d2", m1Reader.scalar("d").getString());
// Validate with convenience methods
RowSet expected = fixture.rowSetBuilder(schema)
.addRow(10, mapValue("b1", mapValue("c1"), "d1"))
.addRow(20, mapValue("b2", mapValue("c2"), "d2"))
.build();
new RowSetComparison(expected).verify(result);
// Test that the row set rebuilds its internal structure from
// a vector container.
RowSet wrapped = fixture.wrap(result.container());
RowSetUtilities.verify(expected, wrapped);
}
@Test
public void testBasicRepeatedMap() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addMapArray("m")
.add("c", MinorType.INT)
.add("d", MinorType.VARCHAR)
.resumeSchema()
.buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
// Verify structure and schema
TupleMetadata actualSchema = rootWriter.tupleSchema();
assertEquals(2, actualSchema.size());
assertTrue(actualSchema.metadata(1).isArray());
assertTrue(actualSchema.metadata(1).isMap());
assertEquals(2, actualSchema.metadata("m").tupleSchema().size());
assertEquals(2, actualSchema.column("m").getChildren().size());
TupleWriter mapWriter = rootWriter.array("m").tuple();
assertSame(actualSchema.metadata("m").tupleSchema(), mapWriter.schema().tupleSchema());
assertSame(mapWriter.tupleSchema(), mapWriter.schema().tupleSchema());
assertSame(mapWriter.tupleSchema().metadata(0), mapWriter.scalar(0).schema());
assertSame(mapWriter.tupleSchema().metadata(1), mapWriter.scalar(1).schema());
// Write a couple of rows with arrays.
rootWriter
.addRow(10, mapArray(
mapValue(110, "d1.1"),
mapValue(120, "d2.2")))
.addRow(20, mapArray())
.addRow(30, mapArray(
mapValue(310, "d3.1"),
mapValue(320, "d3.2"),
mapValue(330, "d3.3")));
// Verify the first batch
RowSet actual = builder.build();
RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector();
MaterializedField mapField = mapVector.getField();
assertEquals(2, mapField.getChildren().size());
Iterator<MaterializedField> iter = mapField.getChildren().iterator();
assertTrue(mapWriter.scalar(0).schema().schema().isEquivalent(iter.next()));
assertTrue(mapWriter.scalar(1).schema().schema().isEquivalent(iter.next()));
SingleRowSet expected = fixture.rowSetBuilder(schema)
.addRow(10, mapArray(
mapValue(110, "d1.1"),
mapValue(120, "d2.2")))
.addRow(20, mapArray())
.addRow(30, mapArray(
mapValue(310, "d3.1"),
mapValue(320, "d3.2"),
mapValue(330, "d3.3")))
.build();
new RowSetComparison(expected).verify(actual);
// Test that the row set rebuilds its internal structure from
// a vector container.
RowSet wrapped = fixture.wrap(actual.container());
RowSetUtilities.verify(expected, wrapped);
}
@Test
public void testNestedArray() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addMapArray("m")
.add("c", MinorType.INT)
.addArray("d", MinorType.VARCHAR)
.resumeSchema()
.buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
// Write a couple of rows with arrays within arrays.
// (And, of course, the Varchar is actually an array of
// bytes, so that's three array levels.)
rootWriter
.addRow(10, mapArray(
mapValue(110, strArray("d1.1.1", "d1.1.2")),
mapValue(120, strArray("d1.2.1", "d1.2.2"))))
.addRow(20, mapArray())
.addRow(30, mapArray(
mapValue(310, strArray("d3.1.1", "d3.2.2")),
mapValue(320, strArray()),
mapValue(330, strArray("d3.3.1", "d1.2.2"))));
// Verify the batch
RowSet actual = builder.build();
SingleRowSet expected = fixture.rowSetBuilder(schema)
.addRow(10, mapArray(
mapValue(110, strArray("d1.1.1", "d1.1.2")),
mapValue(120, strArray("d1.2.1", "d1.2.2"))))
.addRow(20, mapArray())
.addRow(30, mapArray(
mapValue(310, strArray("d3.1.1", "d3.2.2")),
mapValue(320, strArray()),
mapValue(330, strArray("d3.3.1", "d1.2.2"))))
.build();
new RowSetComparison(expected).verify(actual);
// Test that the row set rebuilds its internal structure from
// a vector container.
RowSet wrapped = fixture.wrap(actual.container());
RowSetUtilities.verify(expected, wrapped);
}
/**
* Test a doubly-nested array of maps.
*/
@Test
public void testDoubleNestedArray() {
TupleMetadata schema = new SchemaBuilder()
.add("a", MinorType.INT)
.addMapArray("m1")
.add("b", MinorType.INT)
.addMapArray("m2")
.add("c", MinorType.INT)
.addArray("d", MinorType.VARCHAR)
.resumeMap()
.resumeSchema()
.buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSetWriter rootWriter = builder.writer();
ScalarWriter aWriter = rootWriter.scalar("a");
ArrayWriter a1Writer = rootWriter.array("m1");
TupleWriter m1Writer = a1Writer.tuple();
ScalarWriter bWriter = m1Writer.scalar("b");
ArrayWriter a2Writer = m1Writer.array("m2");
TupleWriter m2Writer = a2Writer.tuple();
ScalarWriter cWriter = m2Writer.scalar("c");
ScalarWriter dWriter = m2Writer.array("d").scalar();
for (int i = 0; i < 5; i++) {
aWriter.setInt(i);
for (int j = 0; j < 4; j++) {
int a1Key = i + 10 + j;
bWriter.setInt(a1Key);
for (int k = 0; k < 3; k++) {
int a2Key = a1Key * 10 + k;
cWriter.setInt(a2Key);
for (int l = 0; l < 2; l++) {
dWriter.setString("d-" + (a2Key * 10 + l));
}
a2Writer.save();
}
a1Writer.save();
}
rootWriter.save();
}
RowSet results = builder.build();
RowSetReader reader = results.reader();
ScalarReader aReader = reader.scalar("a");
ArrayReader a1Reader = reader.array("m1");
TupleReader m1Reader = a1Reader.tuple();
ScalarReader bReader = m1Reader.scalar("b");
ArrayReader a2Reader = m1Reader.array("m2");
TupleReader m2Reader = a2Reader.tuple();
ScalarReader cReader = m2Reader.scalar("c");
ArrayReader dArray = m2Reader.array("d");
ScalarReader dReader = dArray.scalar();
for (int i = 0; i < 5; i++) {
assertTrue(reader.next());
assertEquals(i, aReader.getInt());
for (int j = 0; j < 4; j++) {
assertTrue(a1Reader.next());
int a1Key = i + 10 + j;
assertEquals(a1Key, bReader.getInt());
for (int k = 0; k < 3; k++) {
assertTrue(a2Reader.next());
int a2Key = a1Key * 10 + k;
assertEquals(a2Key, cReader.getInt());
for (int l = 0; l < 2; l++) {
assertTrue(dArray.next());
assertEquals("d-" + (a2Key * 10 + l), dReader.getString());
}
}
}
}
results.clear();
}
/**
* Test that the schema inference handles repeated map vectors.
* <p>
* It turns out that when some operators create a map array, it adds the
* $offset$ vector to the list of children for the map's MaterializedField.
* But, the RowSet utilities do not. This test verifies that both forms
* work.
*/
@Test
public void testDrill6809() throws Exception {
try (ClusterFixture cluster = ClusterFixture.standardCluster(dirTestWatcher);
ClientFixture client = cluster.clientFixture()) {
// Input: {"a" : [ {"c": 1} ], "b" : 2.1}
String sql = "select * from `cp`.`jsoninput/repeatedmap_sort_bug.json`";
RowSet actual = client.queryBuilder().sql(sql).rowSet();
TupleMetadata schema = new SchemaBuilder()
.addMapArray("a")
.addNullable("c", MinorType.BIGINT)
.resumeSchema()
.addNullable("b", MinorType.FLOAT8)
.buildSchema();
RowSet expected = fixture.rowSetBuilder(schema)
.addRow(mapArray(mapValue(1L)), 2.1D)
.build();
RowSetUtilities.verify(expected, actual);
}
}
}