blob: 73ae79922636635a70da552424fdb42f0b771636 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.hadoop;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroup;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.junit.Test;
public class TestInterOpReadByteStreamSplit {
private static final String FLOATS_FILE = "byte_stream_split.zstd.parquet";
private static final String EXTENDED_FILE = "byte_stream_split_extended.gzip.parquet";
private static final String CHANGESET = "74278bc";
private final InterOpTester interop = new InterOpTester();
@Test
public void testReadFloats() throws IOException {
Path floatsFile = interop.GetInterOpFile(FLOATS_FILE, CHANGESET);
final int expectRows = 300;
try (ParquetReader<Group> reader =
ParquetReader.builder(new GroupReadSupport(), floatsFile).build()) {
for (int i = 0; i < expectRows; ++i) {
Group group = reader.read();
assertNotNull(group);
float fval = group.getFloat("f32", 0);
double dval = group.getDouble("f64", 0);
// Values are from the normal distribution
assertTrue(Math.abs(fval) < 4.0);
assertTrue(Math.abs(dval) < 4.0);
switch (i) {
case 0:
assertEquals(1.7640524f, fval, 0.0);
assertEquals(-1.3065268517353166, dval, 0.0);
break;
case 1:
assertEquals(0.4001572f, fval, 0.0);
assertEquals(1.658130679618188, dval, 0.0);
break;
case expectRows - 2:
assertEquals(-0.39944902f, fval, 0.0);
assertEquals(-0.9301565025243212, dval, 0.0);
break;
case expectRows - 1:
assertEquals(0.37005588f, fval, 0.0);
assertEquals(-0.17858909208732915, dval, 0.0);
break;
}
}
assertNull(reader.read());
}
}
private void compareColumnValues(Path path, int expectRows, String leftCol, String rightCol) throws IOException {
try (ParquetReader<Group> reader =
ParquetReader.builder(new GroupReadSupport(), path).build()) {
for (int i = 0; i < expectRows; ++i) {
SimpleGroup group = (SimpleGroup) reader.read();
assertNotNull(group);
Object left = group.getObject(leftCol, 0);
Object right = group.getObject(rightCol, 0);
assertEquals(left, right);
}
assertNull(reader.read());
}
}
@Test
public void testReadAllSupportedTypes() throws IOException {
Path extendedFile = interop.GetInterOpFile(EXTENDED_FILE, CHANGESET);
final int expectRows = 200;
compareColumnValues(extendedFile, expectRows, "float_plain", "float_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "double_plain", "double_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "int32_plain", "int32_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "int64_plain", "int64_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "float16_plain", "float16_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "flba5_plain", "flba5_byte_stream_split");
compareColumnValues(extendedFile, expectRows, "decimal_plain", "decimal_byte_stream_split");
}
}