blob: 5a7b20922d103b22a03421549603b27af73373c5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.thrift;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.thrift.projection.StrictFieldProjectionFilter;
import org.apache.parquet.thrift.projection.ThriftProjectionException;
import org.apache.parquet.thrift.projection.deprecated.DeprecatedFieldProjectionFilter;
import org.apache.parquet.thrift.struct.ThriftField;
import org.apache.parquet.thrift.struct.ThriftType;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
import org.apache.parquet.thrift.test.compat.MapStructV2;
import org.apache.parquet.thrift.test.compat.SetStructV2;
import org.apache.parquet.thrift.test.TestLogicalType;
import org.apache.thrift.TBase;
import org.junit.Test;
import com.twitter.data.proto.tutorial.thrift.AddressBook;
import com.twitter.data.proto.tutorial.thrift.Person;
import com.twitter.elephantbird.thrift.test.TestStructInMap;
import java.util.Arrays;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.apache.parquet.thrift.struct.ThriftField.Requirement.REQUIRED;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
public class TestThriftSchemaConverter {
@Test
public void testToMessageType() throws Exception {
String expected =
"message ParquetSchema {\n" +
" optional group persons (LIST) = 1 {\n" +
" repeated group persons_tuple {\n" +
" required group name = 1 {\n" +
" optional binary first_name (UTF8) = 1;\n" +
" optional binary last_name (UTF8) = 2;\n" +
" }\n" +
" optional int32 id = 2;\n" +
" optional binary email (UTF8) = 3;\n" +
" optional group phones (LIST) = 4 {\n" +
" repeated group phones_tuple {\n" +
" optional binary number (UTF8) = 1;\n" +
" optional binary type (ENUM) = 2;\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
final MessageType converted = schemaConverter.convert(AddressBook.class);
assertEquals(MessageTypeParser.parseMessageType(expected), converted);
}
@Test
public void testToProjectedThriftType() {
shouldGetProjectedSchema("name/first_name", "name.first_name", "message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" }}", Person.class);
shouldGetProjectedSchema("name/first_name;name/last_name", "name.first_name;name.last_name" ,"message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" optional binary last_name (UTF8) = 2;" +
" }}", Person.class);
shouldGetProjectedSchema("name/{first,last}_name;", "name.{first,last}_name;", "message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" optional binary last_name (UTF8) = 2;" +
" }}", Person.class);
shouldGetProjectedSchema("name/*", "name" ,"message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" optional binary last_name (UTF8) = 2;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("*/*_name", "*.*_name" ,"message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" optional binary last_name (UTF8) = 2;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("name/first_*", "name.first_*","message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("*/*", "*.*", "message ParquetSchema {" +
" required group name = 1 {" +
" optional binary first_name (UTF8) = 1;" +
" optional binary last_name (UTF8) = 2;" +
"} " +
" optional group phones (LIST) = 4 {" +
" repeated group phones_tuple {" +
" optional binary number (UTF8) = 1;" +
" optional binary type (ENUM) = 2;" +
" }" +
"}}", Person.class);
}
/* Original message type, before projection
message TestStructInMap {
optional binary name(UTF8);
optional group names(MAP) {
repeated group map(MAP_KEY_VALUE) {
required binary key(UTF8);
optional group value {
optional group name {
optional binary first_name(UTF8);
optional binary last_name(UTF8);
}
optional group phones(MAP) {
repeated group map(MAP_KEY_VALUE) {
required binary key(ENUM);
optional binary value(UTF8);
}
}
}
}
}
}
*/
@Test
public void testProjectMapThriftType() {
//project nested map
shouldGetProjectedSchema("name;names/key*;names/value/**", "name;names.key*;names.value", "message ParquetSchema {\n" +
" optional binary name (UTF8) = 1;\n" +
" optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" optional group value {\n" +
" optional group name = 1 {\n" +
" optional binary first_name (UTF8) = 1;\n" +
" optional binary last_name (UTF8) = 2;\n" +
" }\n" +
" optional group phones (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (ENUM);\n" +
" optional binary value (UTF8);\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}", TestStructInMap.class);
//project only one level of nested map
shouldGetProjectedSchema("name;names/key;names/value/name/*", "name;names.key;names.value.name","message ParquetSchema {\n" +
" optional binary name (UTF8) = 1;\n" +
" optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" optional group value {\n" +
" optional group name = 1 {\n" +
" optional binary first_name (UTF8) = 1;\n" +
" optional binary last_name (UTF8) = 2;\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}", TestStructInMap.class);
}
@Test
public void testProjectOnlyKeyInMap() {
shouldGetProjectedSchema("name;names/key", "name;names.key", "message ParquetSchema {\n" +
" optional binary name (UTF8) = 1;\n" +
" optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" optional group value {\n" +
" optional group name = 1 {\n" +
" optional binary first_name (UTF8) = 1;\n" +
" }\n" +
" }" +
" }\n" +
" }\n" +
"}",TestStructInMap.class);
}
private void shouldThrowWhenProjectionFilterMatchesNothing(String filters, String unmatchedFilter, Class<? extends TBase<?, ?>> thriftClass) {
try {
getDeprecatedFilteredSchema(filters, thriftClass);
fail("should throw projection exception when filter matches nothing");
} catch (ThriftProjectionException e) {
assertEquals("The following projection patterns did not match any columns in this schema:\n"
+ unmatchedFilter + "\n", e.getMessage());
}
}
private void shouldThrowWhenNoColumnsAreSelected(String filters, Class<? extends TBase<?, ?>> thriftClass) {
try {
getDeprecatedFilteredSchema(filters, thriftClass);
fail("should throw projection exception when no columns are selected");
} catch (ThriftProjectionException e) {
assertEquals("No columns have been selected", e.getMessage());
}
}
@Test
public void testThrowWhenNoColumnsAreSelected() {
shouldThrowWhenNoColumnsAreSelected("non_existing", TestStructInMap.class);
}
@Test
public void testThrowWhenProjectionFilterMatchesNothing() {
shouldThrowWhenProjectionFilterMatchesNothing("name;non_existing", "non_existing", TestStructInMap.class);
shouldThrowWhenProjectionFilterMatchesNothing("**;non_existing", "non_existing", TestStructInMap.class);
shouldThrowWhenProjectionFilterMatchesNothing("**;names/non_existing", "names/non_existing", TestStructInMap.class);
shouldThrowWhenProjectionFilterMatchesNothing("**;names/non_existing;non_existing", "names/non_existing\nnon_existing", TestStructInMap.class);
}
@Test
public void testProjectOnlyValueInMap() {
try {
getDeprecatedFilteredSchema("name;names/value/**", TestStructInMap.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only the values of a map, you must keep the keys as well: names", e.getMessage());
}
try {
getStrictFilteredSchema("name;names.value", TestStructInMap.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only the values of a map, you must keep the keys as well: names", e.getMessage());
}
}
private void doTestPartialKeyProjection(String deprecated, String strict) {
try {
getDeprecatedFilteredSchema(deprecated, MapStructV2.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only a subset of the fields in a map key, for path map1", e.getMessage());
}
try {
getStrictFilteredSchema(strict, MapStructV2.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only a subset of the fields in a map key, for path map1", e.getMessage());
}
}
@Test
public void testPartialKeyProjection() {
doTestPartialKeyProjection("map1/key/age", "map1.key.age");
doTestPartialKeyProjection("map1/key/age;map1/value/**", "map1.{key.age,value}");
}
@Test
public void testSetPartialProjection() {
try {
getDeprecatedFilteredSchema("set1/age", SetStructV2.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only a subset of the fields in a set, for path set1", e.getMessage());
}
try {
getStrictFilteredSchema("set1.age", SetStructV2.class);
fail("this should throw");
} catch (ThriftProjectionException e) {
assertEquals("Cannot select only a subset of the fields in a set, for path set1", e.getMessage());
}
}
@Test
public void testConvertStructCreatedViaDeprecatedConstructor() {
String expected = "message ParquetSchema {\n" +
" required binary a (UTF8) = 1;\n" +
" required binary b (UTF8) = 2;\n" +
"}\n";
ThriftSchemaConverter converter = new ThriftSchemaConverter();
StructType structType = new StructType(
Arrays.asList(
new ThriftField("a", (short) 1, REQUIRED, new ThriftType.StringType()),
new ThriftField("b", (short) 2, REQUIRED, new ThriftType.StringType())
)
);
final MessageType converted = converter.convert(structType);
assertEquals(MessageTypeParser.parseMessageType(expected), converted);
}
public static void shouldGetProjectedSchema(String deprecatedFilterDesc, String strictFilterDesc, String expectedSchemaStr, Class<? extends TBase<?,?>> thriftClass) {
MessageType depRequestedSchema = getDeprecatedFilteredSchema(deprecatedFilterDesc, thriftClass);
MessageType strictRequestedSchema = getStrictFilteredSchema(strictFilterDesc, thriftClass);
MessageType expectedSchema = parseMessageType(expectedSchemaStr);
assertEquals(expectedSchema, depRequestedSchema);
assertEquals(expectedSchema, strictRequestedSchema);
}
private static MessageType getDeprecatedFilteredSchema(String filterDesc, Class<? extends TBase<?,?>> thriftClass) {
DeprecatedFieldProjectionFilter fieldProjectionFilter = new DeprecatedFieldProjectionFilter(filterDesc);
return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass);
}
private static MessageType getStrictFilteredSchema(String semicolonDelimitedString, Class<? extends TBase<?,?>> thriftClass) {
StrictFieldProjectionFilter fieldProjectionFilter = StrictFieldProjectionFilter.fromSemicolonDelimitedString(semicolonDelimitedString);
return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass);
}
@Test
public void testToThriftType() throws Exception {
final StructType converted = ThriftSchemaConverter.toStructType(AddressBook.class);
final String json = converted.toJSON();
final ThriftType fromJSON = StructType.fromJSON(json);
assertEquals(json, fromJSON.toJSON());
}
@Test
public void testLogicalTypeConvertion() throws Exception {
String expected =
"message ParquetSchema {\n" +
" required int32 test_i16 (INTEGER(16,true)) = 1;" +
"}";
ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
final MessageType converted = schemaConverter.convert(TestLogicalType.class);
assertEquals(MessageTypeParser.parseMessageType(expected), converted);
}
}