blob: ea8fcd40e451c2379726611c6255f4e90b6403c0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.schema;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Arrays.asList;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
import org.apache.parquet.TestUtils;
import org.apache.parquet.io.api.Binary;
import org.junit.Test;
public class TestPrimitiveStringifier {
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
@Test
public void testDefaultStringifier() {
PrimitiveStringifier stringifier = DEFAULT_STRINGIFIER;
assertEquals("true", stringifier.stringify(true));
assertEquals("false", stringifier.stringify(false));
assertEquals("0.0", stringifier.stringify(0.0));
assertEquals("123456.7891234567", stringifier.stringify(123456.7891234567));
assertEquals("-98765.43219876543", stringifier.stringify(-98765.43219876543));
assertEquals("0.0", stringifier.stringify(0.0f));
assertEquals("987.6543", stringifier.stringify(987.6543f));
assertEquals("-123.4567", stringifier.stringify(-123.4567f));
assertEquals("0", stringifier.stringify(0));
assertEquals("1234567890", stringifier.stringify(1234567890));
assertEquals("-987654321", stringifier.stringify(-987654321));
assertEquals("0", stringifier.stringify(0l));
assertEquals("1234567890123456789", stringifier.stringify(1234567890123456789l));
assertEquals("-987654321987654321", stringifier.stringify(-987654321987654321l));
assertEquals("null", stringifier.stringify(null));
assertEquals("0x", stringifier.stringify(Binary.EMPTY));
assertEquals("0x0123456789ABCDEF", stringifier.stringify(toBinary(0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF)));
}
@Test
public void testUnsignedStringifier() {
PrimitiveStringifier stringifier = UNSIGNED_STRINGIFIER;
assertEquals("0", stringifier.stringify(0));
assertEquals("2147483647", stringifier.stringify(2147483647));
assertEquals("4294967295", stringifier.stringify(0xFFFFFFFF));
assertEquals("0", stringifier.stringify(0l));
assertEquals("9223372036854775807", stringifier.stringify(9223372036854775807l));
assertEquals("18446744073709551615", stringifier.stringify(0xFFFFFFFFFFFFFFFFl));
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
}
@Test
public void testUTF8Stringifier() {
PrimitiveStringifier stringifier = UTF8_STRINGIFIER;
assertEquals("null", stringifier.stringify(null));
assertEquals("", stringifier.stringify(Binary.EMPTY));
assertEquals("This is a UTF-8 test", stringifier.stringify(Binary.fromString("This is a UTF-8 test")));
assertEquals("これはUTF-8のテストです",
stringifier.stringify(Binary.fromConstantByteArray("これはUTF-8のテストです".getBytes(UTF_8))));
checkThrowingUnsupportedException(stringifier, Binary.class);
}
@Test
public void testIntervalStringifier() {
PrimitiveStringifier stringifier = INTERVAL_STRINGIFIER;
assertEquals("null", stringifier.stringify(null));
assertEquals("<INVALID>", stringifier.stringify(Binary.EMPTY));
assertEquals("<INVALID>",
stringifier.stringify(Binary.fromConstantByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 })));
assertEquals("<INVALID>",
stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 })));
ByteBuffer buffer = ByteBuffer.allocate(12);
assertEquals("interval(0 months, 0 days, 0 millis)",
stringifier.stringify(Binary.fromConstantByteBuffer(buffer)));
buffer.putInt(0x03000000);
buffer.putInt(0x06000000);
buffer.putInt(0x09000000);
buffer.flip();
assertEquals("interval(3 months, 6 days, 9 millis)",
stringifier.stringify(Binary.fromConstantByteBuffer(buffer)));
buffer.clear();
buffer.putInt(0xFFFFFFFF);
buffer.putInt(0xFEFFFFFF);
buffer.putInt(0xFDFFFFFF);
buffer.flip();
assertEquals("interval(4294967295 months, 4294967294 days, 4294967293 millis)",
stringifier.stringify(Binary.fromReusedByteBuffer(buffer)));
checkThrowingUnsupportedException(stringifier, Binary.class);
}
@Test
public void testDateStringifier() {
PrimitiveStringifier stringifier = DATE_STRINGIFIER;
assertEquals("1970-01-01", stringifier.stringify(0));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2017, Calendar.DECEMBER, 14);
assertEquals("2017-12-14", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
cal.clear();
cal.set(1583, Calendar.AUGUST, 3);
assertEquals("1583-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
checkThrowingUnsupportedException(stringifier, Integer.TYPE);
}
@Test
public void testTimestampMillisStringifier() {
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MILLIS_STRINGIFIER, TIMESTAMP_MILLIS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_MILLIS_STRINGIFIER ? "" : "+0000");
assertEquals(withZoneString("1970-01-01T00:00:00.000", timezoneAmendment), stringifier.stringify(0l));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
cal.set(Calendar.MILLISECOND, 120);
assertEquals(withZoneString("2017-12-15T10:09:54.120", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));
cal.clear();
cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
cal.set(Calendar.MILLISECOND, 9);
assertEquals(withZoneString("1948-11-23T20:19:01.009", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}
@Test
public void testTimestampMicrosStringifier() {
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MICROS_STRINGIFIER, TIMESTAMP_MICROS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_MICROS_STRINGIFIER ? "" : "+0000");
assertEquals(withZoneString("1970-01-01T00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long micros = cal.getTimeInMillis() * 1000 + 900;
assertEquals(withZoneString("2053-07-10T22:13:24.084900", timezoneAmendment), stringifier.stringify(micros));
cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
micros = cal.getTimeInMillis() * 1000 - 1;
assertEquals(withZoneString("1848-03-15T09:23:59.764999", timezoneAmendment), stringifier.stringify(micros));
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}
@Test
public void testTimestampNanosStringifier() {
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_NANOS_STRINGIFIER, TIMESTAMP_NANOS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_NANOS_STRINGIFIER ? "" : "+0000");
assertEquals(withZoneString("1970-01-01T00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long nanos = cal.getTimeInMillis() * 1_000_000 + 536;
assertEquals(withZoneString("2053-07-10T22:13:24.084000536", timezoneAmendment), stringifier.stringify(nanos));
cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
nanos = cal.getTimeInMillis() * 1_000_000 - 1;
assertEquals(withZoneString("1848-03-15T09:23:59.764999999", timezoneAmendment), stringifier.stringify(nanos));
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}
@Test
public void testTimeStringifier() {
for (PrimitiveStringifier stringifier : asList(TIME_STRINGIFIER, TIME_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIME_STRINGIFIER ? "" : "+0000");
assertEquals(withZoneString("00:00:00.000", timezoneAmendment), stringifier.stringify(0));
assertEquals(withZoneString("00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));
assertEquals(withZoneString("12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
assertEquals(withZoneString("12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));
assertEquals(withZoneString("-12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
assertEquals(withZoneString("-12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));
assertEquals(withZoneString("123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
assertEquals(withZoneString("12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));
assertEquals(withZoneString("-123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
assertEquals(withZoneString("-12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
}
}
@Test
public void testTimeNanoStringifier() {
for (PrimitiveStringifier stringifier : asList(TIME_NANOS_STRINGIFIER, TIME_NANOS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIME_NANOS_STRINGIFIER ? "" : "+0000");
assertEquals(withZoneString("00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));
assertEquals(withZoneString("12:34:56.789012987", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987)));
assertEquals(withZoneString("-12:34:56.000789012", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012)));
assertEquals(withZoneString("12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789)));
assertEquals(withZoneString("-12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789)));
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
}
}
private String withZoneString(String expected, String zoneString) {
return expected + zoneString;
}
private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) {
return unit.convert(hours, HOURS) + unit.convert(minutes, MINUTES) + unit.convert(seconds, SECONDS) + rest;
}
@Test
public void testDecimalStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.createDecimalStringifier(4);
assertEquals("0.0000", stringifier.stringify(0));
assertEquals("123456.7890", stringifier.stringify(1234567890));
assertEquals("-98765.4321", stringifier.stringify(-987654321));
assertEquals("0.0000", stringifier.stringify(0l));
assertEquals("123456789012345.6789", stringifier.stringify(1234567890123456789l));
assertEquals("-98765432109876.5432", stringifier.stringify(-987654321098765432l));
assertEquals("null", stringifier.stringify(null));
assertEquals("<INVALID>", stringifier.stringify(Binary.EMPTY));
assertEquals("0.0000", stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 0 })));
assertEquals("9876543210987654321098765432109876543210987654.3210", stringifier.stringify(Binary
.fromConstantByteArray(new BigInteger("98765432109876543210987654321098765432109876543210").toByteArray())));
assertEquals("-1234567890123456789012345678901234567890123456.7890", stringifier.stringify(Binary
.fromConstantByteArray(new BigInteger("-12345678901234567890123456789012345678901234567890").toByteArray())));
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE, Binary.class);
}
@Test
public void testUUIDStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.UUID_STRINGIFIER;
assertEquals("00112233-4455-6677-8899-aabbccddeeff", stringifier.stringify(
toBinary(0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)));
assertEquals("00000000-0000-0000-0000-000000000000", stringifier.stringify(
toBinary(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)));
assertEquals("ffffffff-ffff-ffff-ffff-ffffffffffff", stringifier.stringify(
toBinary(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff)));
assertEquals("0eb1497c-19b6-42bc-b028-b4b612bed141", stringifier.stringify(
toBinary(0x0e, 0xb1, 0x49, 0x7c, 0x19, 0xb6, 0x42, 0xbc, 0xb0, 0x28, 0xb4, 0xb6, 0x12, 0xbe, 0xd1, 0x41)));
// Check that the stringifier does not care about the length, it always takes the first 16 bytes
assertEquals("87a09cca-3b1e-4a0a-9c77-591924c3b57b", stringifier.stringify(
toBinary(0x87, 0xa0, 0x9c, 0xca, 0x3b, 0x1e, 0x4a, 0x0a, 0x9c, 0x77, 0x59, 0x19, 0x24, 0xc3, 0xb5, 0x7b, 0x00,
0x00, 0x00)));
// As there is no validation implemented, if the 16 bytes is not available, the array will be over-indexed
TestUtils.assertThrows("Expected exception for over-indexing", ArrayIndexOutOfBoundsException.class,
() -> stringifier.stringify(
toBinary(0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee)));
checkThrowingUnsupportedException(stringifier, Binary.class);
}
private Binary toBinary(int...bytes) {
byte[] array = new byte[bytes.length];
for (int i = 0; i < array.length; ++i) {
array[i] = (byte) bytes[i];
}
return Binary.fromConstantByteArray(array);
}
private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class<?>... excludes) {
Set<Class<?>> set = new HashSet<>(asList(excludes));
if (!set.contains(Integer.TYPE)) {
try {
stringifier.stringify(0);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
if (!set.contains(Long.TYPE)) {
try {
stringifier.stringify(0l);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
if (!set.contains(Float.TYPE)) {
try {
stringifier.stringify(0.0f);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
if (!set.contains(Double.TYPE)) {
try {
stringifier.stringify(0.0);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
if (!set.contains(Boolean.TYPE)) {
try {
stringifier.stringify(false);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
if (!set.contains(Binary.class)) {
try {
stringifier.stringify(Binary.EMPTY);
fail("An UnsupportedOperationException should have been thrown");
} catch (UnsupportedOperationException e) {
}
}
}
}