blob: 1e548fdeb02d952b269147ec7da89c890002c38d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.avro;
import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.test.BaseTest;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class AvroSchemaUtilTest extends BaseTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test
public void testExtractSchemaFromNullableNotUnion() {
Schema schema = SchemaBuilder.builder().stringType();
thrown.expect(UserException.class);
thrown.expectMessage("VALIDATION ERROR");
AvroSchemaUtil.extractSchemaFromNullable(schema, "s");
}
@Test
public void testExtractSchemaFromNullableComplexUnion() {
Schema schema = SchemaBuilder.unionOf()
.doubleType().and()
.longType().and()
.nullType()
.endUnion();
thrown.expect(UserException.class);
thrown.expectMessage("UNSUPPORTED_OPERATION ERROR");
AvroSchemaUtil.extractSchemaFromNullable(schema, "u");
}
@Test
public void testExtractSchemaFromNullable() {
Schema schema = SchemaBuilder.builder().nullable().stringType();
Schema actual = AvroSchemaUtil.extractSchemaFromNullable(schema, "s");
assertEquals(SchemaBuilder.builder().stringType(), actual);
}
@Test
public void testConvertSimpleTypes() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.requiredString("col_string")
.requiredBytes("col_bytes")
.requiredBoolean("col_boolean")
.requiredInt("col_int")
.requiredLong("col_long")
.requiredFloat("col_float")
.requiredDouble("col_double")
.optionalString("col_opt_string")
.optionalBytes("col_opt_bytes")
.optionalBoolean("col_opt_boolean")
.optionalInt("col_opt_int")
.optionalLong("col_opt_long")
.optionalFloat("col_opt_float")
.optionalDouble("col_opt_double")
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("col_string", TypeProtos.MinorType.VARCHAR)
.add("col_bytes", TypeProtos.MinorType.VARBINARY)
.add("col_boolean", TypeProtos.MinorType.BIT)
.add("col_int", TypeProtos.MinorType.INT)
.add("col_long", TypeProtos.MinorType.BIGINT)
.add("col_float", TypeProtos.MinorType.FLOAT4)
.add("col_double", TypeProtos.MinorType.FLOAT8)
.addNullable("col_opt_string", TypeProtos.MinorType.VARCHAR)
.addNullable("col_opt_bytes", TypeProtos.MinorType.VARBINARY)
.addNullable("col_opt_boolean", TypeProtos.MinorType.BIT)
.addNullable("col_opt_int", TypeProtos.MinorType.INT)
.addNullable("col_opt_long", TypeProtos.MinorType.BIGINT)
.addNullable("col_opt_float", TypeProtos.MinorType.FLOAT4)
.addNullable("col_opt_double", TypeProtos.MinorType.FLOAT8)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertDecimal() {
Schema decBytes = LogicalTypes.decimal(10, 2)
.addToSchema(SchemaBuilder.builder().bytesType());
Schema decFixed = LogicalTypes.decimal(5, 2)
.addToSchema(SchemaBuilder.builder().fixed("dec_fixed").size(5));
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_dec_bytes").type(decBytes).noDefault()
.name("col_dec_fixed").type(decFixed).noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("col_dec_bytes", TypeProtos.MinorType.VARDECIMAL, 10, 2)
.add("col_dec_fixed", TypeProtos.MinorType.VARDECIMAL, 5, 2)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertDateTime() {
Schema timestampMillis = LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder().longType());
Schema timestampMicros = LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder().longType());
Schema date = LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
Schema timeMillis = LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
Schema timeMicros = LogicalTypes.timeMicros().addToSchema(SchemaBuilder.builder().longType());
Schema duration = new LogicalType("duration")
.addToSchema(SchemaBuilder.builder().fixed("duration_fixed").size(12));
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_timestamp_millis").type(timestampMillis).noDefault()
.name("col_timestamp_micros").type(timestampMicros).noDefault()
.name("col_date").type(date).noDefault()
.name("col_time_millis").type(timeMillis).noDefault()
.name("col_time_micros").type(timeMicros).noDefault()
.name("col_duration").type(duration).noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("col_timestamp_millis", TypeProtos.MinorType.TIMESTAMP)
.add("col_timestamp_micros", TypeProtos.MinorType.TIMESTAMP)
.add("col_date", TypeProtos.MinorType.DATE)
.add("col_time_millis", TypeProtos.MinorType.TIME)
.add("col_time_micros", TypeProtos.MinorType.TIME)
.add("col_duration", TypeProtos.MinorType.INTERVAL)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertNullType() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_null").type().nullType().noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.addNullable("col_null", TypeProtos.MinorType.VARCHAR)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertEnum() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_enum").type().enumeration("letters").symbols("A", "B", "C").noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("col_enum", TypeProtos.MinorType.VARCHAR)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertFixed() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_fixed").type().fixed("md5").size(16).noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("col_fixed", TypeProtos.MinorType.VARBINARY)
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertArray() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_array").type().array().items().booleanType().noDefault()
.name("col_opt_array").type().array().items().nullable().longType().noDefault()
.name("col_nested_array").type().array().items()
.array().items()
.stringType()
.noDefault()
.name("col_array_map").type().array().items()
.record("arr_rec")
.fields()
.optionalString("s")
.requiredLong("i")
.endRecord()
.noDefault()
.name("col_array_dict").type().array().items()
.map().values().intType().noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.addArray("col_array", TypeProtos.MinorType.BIT)
.addArray("col_opt_array", TypeProtos.MinorType.BIGINT)
.addArray("col_nested_array", TypeProtos.MinorType.VARCHAR, 2)
.addMapArray("col_array_map")
.addNullable("s", TypeProtos.MinorType.VARCHAR)
.add("i", TypeProtos.MinorType.BIGINT)
.resumeSchema()
.addDictArray("col_array_dict", TypeProtos.MinorType.VARCHAR)
.value(TypeProtos.MinorType.INT)
.resumeSchema()
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertMap() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_map_simple").type().record("map_simple_rec")
.fields()
.optionalInt("i")
.requiredString("s")
.endRecord()
.noDefault()
.name("col_map_complex").type().record("map_complex_rec")
.fields()
.name("col_nested_map").type().record("map_nested_rec")
.fields()
.optionalBoolean("nest_b")
.requiredDouble("nest_d")
.endRecord()
.noDefault()
.name("col_nested_dict").type().map().values().stringType().noDefault()
.name("col_nested_array").type().array().items().booleanType().noDefault()
.endRecord()
.noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.addMap("col_map_simple")
.addNullable("i", TypeProtos.MinorType.INT)
.add("s", TypeProtos.MinorType.VARCHAR)
.resumeSchema()
.addMap("col_map_complex")
.addMap("col_nested_map")
.addNullable("nest_b", TypeProtos.MinorType.BIT)
.add("nest_d", TypeProtos.MinorType.FLOAT8)
.resumeMap()
.addDict("col_nested_dict", TypeProtos.MinorType.VARCHAR)
.value(TypeProtos.MinorType.VARCHAR)
.resumeMap()
.addArray("col_nested_array", TypeProtos.MinorType.BIT)
.resumeSchema()
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertDict() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.name("col_dict").type().map().values().stringType().noDefault()
.name("col_opt_dict").type().map().values().nullable().intType().noDefault()
.name("col_dict_val_array").type().map().values().array().items().stringType().noDefault()
.name("col_dict_val_dict").type().map().values().map().values().intType().noDefault()
.name("col_dict_val_map").type().map().values()
.record("dict_val")
.fields()
.optionalInt("i")
.requiredString("s")
.endRecord().noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.addDict("col_dict", TypeProtos.MinorType.VARCHAR).value(TypeProtos.MinorType.VARCHAR).resumeSchema()
.addDict("col_opt_dict", TypeProtos.MinorType.VARCHAR).nullableValue(TypeProtos.MinorType.INT).resumeSchema()
.addDict("col_dict_val_array", TypeProtos.MinorType.VARCHAR)
.repeatedValue(TypeProtos.MinorType.VARCHAR).resumeSchema()
.addDict("col_dict_val_dict", TypeProtos.MinorType.VARCHAR)
.dictValue()
.key(TypeProtos.MinorType.VARCHAR)
.value(TypeProtos.MinorType.INT)
.resumeDict()
.resumeSchema()
.addDict("col_dict_val_map", TypeProtos.MinorType.VARCHAR)
.mapValue()
.addNullable("i", TypeProtos.MinorType.INT)
.add("s", TypeProtos.MinorType.VARCHAR)
.resumeDict()
.resumeSchema()
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertComplexUnion() {
Schema schema = SchemaBuilder.record("rec")
.fields()
.optionalString("s")
.name("u").type().unionOf()
.stringType().and().longType().and().nullType().endUnion().noDefault()
.endRecord();
thrown.expect(UserException.class);
thrown.expectMessage("UNSUPPORTED_OPERATION ERROR");
AvroSchemaUtil.convert(schema);
}
@Test
public void testConvertWithNamedTypes() {
Schema schema = SchemaBuilder.record("MixedList")
.fields()
.name("rec_l").type().record("LongList")
.fields()
.requiredLong("l")
.name("list_l_1").type("LongList").noDefault()
.name("list_l_2").type("LongList").noDefault()
.endRecord()
.noDefault()
.name("rec_s").type().record("StringList")
.fields()
.requiredString("s")
.name("list_s_1").type("StringList").noDefault()
.name("list_s_2").type("StringList").noDefault()
.endRecord()
.noDefault()
.name("rec_m").type().record("rec_m")
.fields()
.name("list_l").type("LongList").noDefault()
.name("list_s").type("StringList").noDefault()
.name("a").type().array().items().type("MixedList").noDefault()
.endRecord()
.noDefault()
.name("mixed").type("MixedList").noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.addMap("rec_l")
.add("l", TypeProtos.MinorType.BIGINT)
.addMap("list_l_1")
.resumeMap()
.addMap("list_l_2")
.resumeMap()
.resumeSchema()
.addMap("rec_s")
.add("s", TypeProtos.MinorType.VARCHAR)
.addMap("list_s_1")
.resumeMap()
.addMap("list_s_2")
.resumeMap()
.resumeSchema()
.addMap("rec_m")
.addMap("list_l")
.add("l", TypeProtos.MinorType.BIGINT)
.addMap("list_l_1")
.resumeMap()
.addMap("list_l_2")
.resumeMap()
.resumeMap()
.addMap("list_s")
.add("s", TypeProtos.MinorType.VARCHAR)
.addMap("list_s_1")
.resumeMap()
.addMap("list_s_2")
.resumeMap()
.resumeMap()
.addMapArray("a")
.resumeMap()
.resumeSchema()
.addMap("mixed")
.resumeSchema()
.build();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
@Test
public void testConvertWithNamespaces() {
Schema schema = SchemaBuilder.record("rec").namespace("n1")
.fields()
.requiredString("s")
.name("m").type().record("rec").namespace("n2")
.fields()
.requiredLong("l")
.endRecord()
.noDefault()
.endRecord();
TupleMetadata tupleMetadata = new org.apache.drill.exec.record.metadata.SchemaBuilder()
.add("s", TypeProtos.MinorType.VARCHAR)
.addMap("m")
.add("l", TypeProtos.MinorType.BIGINT)
.resumeSchema()
.buildSchema();
assertTrue(tupleMetadata.isEquivalent(AvroSchemaUtil.convert(schema)));
}
}