blob: fa200ab424abbf669359f3749927c512cfc98a3a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.parser;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
import static org.junit.Assert.assertEquals;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
import static org.apache.parquet.schema.OriginalType.*;
import static org.apache.parquet.schema.Types.buildMessage;
import org.junit.Test;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Types;
import org.apache.parquet.schema.Types.MessageTypeBuilder;
public class TestParquetParser {
@Test
public void testPaperExample() {
String example =
"message Document {\n" +
" required int64 DocId;\n" +
" optional group Links {\n" +
" repeated int64 Backward;\n" +
" repeated int64 Forward; }\n" +
" repeated group Name {\n" +
" repeated group Language {\n" +
" required binary Code;\n" +
" required binary Country; }\n" +
" optional binary Url; }}";
MessageType parsed = parseMessageType(example);
MessageType manuallyMade =
new MessageType("Document",
new PrimitiveType(REQUIRED, INT64, "DocId"),
new GroupType(OPTIONAL, "Links",
new PrimitiveType(REPEATED, INT64, "Backward"),
new PrimitiveType(REPEATED, INT64, "Forward")),
new GroupType(REPEATED, "Name",
new GroupType(REPEATED, "Language",
new PrimitiveType(REQUIRED, BINARY, "Code"),
new PrimitiveType(REQUIRED, BINARY, "Country")),
new PrimitiveType(OPTIONAL, BINARY, "Url")));
assertEquals(manuallyMade, parsed);
MessageType parsedThenReparsed = parseMessageType(parsed.toString());
assertEquals(manuallyMade, parsedThenReparsed);
parsed = parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
manuallyMade =
new MessageType("m",
new GroupType(REQUIRED, "a",
new PrimitiveType(REQUIRED, BINARY, "b")),
new GroupType(REQUIRED, "c",
new PrimitiveType(REQUIRED, INT64, "d")));
assertEquals(manuallyMade, parsed);
parsedThenReparsed = parseMessageType(parsed.toString());
assertEquals(manuallyMade, parsedThenReparsed);
}
@Test
public void testEachPrimitiveType() {
MessageTypeBuilder builder = buildMessage();
StringBuilder schema = new StringBuilder();
schema.append("message EachType {\n");
for (PrimitiveTypeName type : PrimitiveTypeName.values()) {
// add a schema entry, e.g., " required int32 int32_;\n"
if (type == FIXED_LEN_BYTE_ARRAY) {
schema.append(" required fixed_len_byte_array(3) fixed_;");
builder.required(FIXED_LEN_BYTE_ARRAY).length(3).named("fixed_");
} else {
schema.append(" required ").append(type)
.append(" ").append(type).append("_;\n");
builder.required(type).named(type.toString() + "_");
}
}
schema.append("}\n");
MessageType expected = builder.named("EachType");
MessageType parsed = parseMessageType(schema.toString());
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testSTRINGAnnotation() {
String message =
"message StringMessage {\n" +
" required binary string (STRING);\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(BINARY).as(stringType()).named("string")
.named("StringMessage");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testUTF8Annotation() {
String message =
"message StringMessage {\n" +
" required binary string (UTF8);\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(BINARY).as(UTF8).named("string")
.named("StringMessage");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testIDs() {
String message =
"message Message {\n" +
" required binary string (UTF8) = 6;\n" +
" required int32 i=1;\n" +
" required binary s2= 3;\n" +
" required binary s3 =4;\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(BINARY).as(OriginalType.UTF8).id(6).named("string")
.required(INT32).id(1).named("i")
.required(BINARY).id(3).named("s2")
.required(BINARY).id(4).named("s3")
.named("Message");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testMAPAnnotations() {
// this is primarily to test group annotations
String message =
"message Message {\n" +
" optional group aMap (MAP) {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" required int32 value;\n" +
" }\n" +
" }\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.optionalGroup().as(MAP)
.repeatedGroup().as(MAP_KEY_VALUE)
.required(BINARY).as(UTF8).named("key")
.required(INT32).named("value")
.named("map")
.named("aMap")
.named("Message");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testLISTAnnotation() {
// this is primarily to test group annotations
String message =
"message Message {\n" +
" required group aList (LIST) {\n" +
" repeated binary string (UTF8);\n" +
" }\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.requiredGroup().as(LIST)
.repeated(BINARY).as(UTF8).named("string")
.named("aList")
.named("Message");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testDecimalFixedAnnotation() {
String message =
"message DecimalMessage {\n" +
" required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(FIXED_LEN_BYTE_ARRAY).length(4)
.as(DECIMAL).precision(9).scale(2)
.named("aDecimal")
.named("DecimalMessage");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testDecimalBinaryAnnotation() {
String message =
"message DecimalMessage {\n" +
" required binary aDecimal (DECIMAL(9,2));\n" +
"}\n";
MessageType parsed = parseMessageType(message);
MessageType expected = buildMessage()
.required(BINARY).as(DECIMAL).precision(9).scale(2)
.named("aDecimal")
.named("DecimalMessage");
assertEquals(expected, parsed);
MessageType reparsed = parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testTimeAnnotations() {
String message = "message TimeMessage {" +
" required int32 date (DATE);" +
" required int32 time (TIME_MILLIS);" +
" required int64 timestamp (TIMESTAMP_MILLIS);" +
" required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" +
" required int32 newTime (TIME(MILLIS,true));" +
" required int64 nanoTime (TIME(NANOS,true));" +
" required int64 newTimestamp (TIMESTAMP(MILLIS,false));" +
" required int64 nanoTimestamp (TIMESTAMP(NANOS,false));" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(INT32).as(DATE).named("date")
.required(INT32).as(TIME_MILLIS).named("time")
.required(INT64).as(TIMESTAMP_MILLIS).named("timestamp")
.required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval")
.required(INT32).as(timeType(true, MILLIS)).named("newTime")
.required(INT64).as(timeType(true, NANOS)).named("nanoTime")
.required(INT64).as(timestampType(false, MILLIS)).named("newTimestamp")
.required(INT64).as(timestampType(false, NANOS)).named("nanoTimestamp")
.named("TimeMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testIntAnnotations() {
String message = "message IntMessage {" +
" required int32 i8 (INT_8);" +
" required int32 i16 (INT_16);" +
" required int32 i32 (INT_32);" +
" required int64 i64 (INT_64);" +
" required int32 u8 (UINT_8);" +
" required int32 u16 (UINT_16);" +
" required int32 u32 (UINT_32);" +
" required int64 u64 (UINT_64);" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(INT32).as(INT_8).named("i8")
.required(INT32).as(INT_16).named("i16")
.required(INT32).as(INT_32).named("i32")
.required(INT64).as(INT_64).named("i64")
.required(INT32).as(UINT_8).named("u8")
.required(INT32).as(UINT_16).named("u16")
.required(INT32).as(UINT_32).named("u32")
.required(INT64).as(UINT_64).named("u64")
.named("IntMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testIntegerAnnotations() {
String message = "message IntMessage {" +
" required int32 i8 (INTEGER(8,true));" +
" required int32 i16 (INTEGER(16,true));" +
" required int32 i32 (INTEGER(32,true));" +
" required int64 i64 (INTEGER(64,true));" +
" required int32 u8 (INTEGER(8,false));" +
" required int32 u16 (INTEGER(16,false));" +
" required int32 u32 (INTEGER(32,false));" +
" required int64 u64 (INTEGER(64,false));" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(INT32).as(intType(8, true)).named("i8")
.required(INT32).as(intType(16, true)).named("i16")
.required(INT32).as(intType(32, true)).named("i32")
.required(INT64).as(intType(64, true)).named("i64")
.required(INT32).as(intType(8, false)).named("u8")
.required(INT32).as(intType(16, false)).named("u16")
.required(INT32).as(intType(32, false)).named("u32")
.required(INT64).as(intType(64, false)).named("u64")
.named("IntMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
@Test
public void testEmbeddedAnnotations() {
String message = "message EmbeddedMessage {" +
" required binary json (JSON);" +
" required binary bson (BSON);" +
"}\n";
MessageType parsed = MessageTypeParser.parseMessageType(message);
MessageType expected = Types.buildMessage()
.required(BINARY).as(JSON).named("json")
.required(BINARY).as(BSON).named("bson")
.named("EmbeddedMessage");
assertEquals(expected, parsed);
MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
assertEquals(expected, reparsed);
}
}