blob: c28af94d76b16574cdf854026d0e9b5834217a8e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
public class TestSchemaEvolution {
@Rule
public TestName testCaseName = new TestName();
Configuration conf;
Path testFilePath;
FileSystem fs;
Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));
@Before
public void setup() throws Exception {
conf = new Configuration();
fs = FileSystem.getLocal(conf);
testFilePath = new Path(workDir, "TestOrcFile." +
testCaseName.getMethodName() + ".orc");
fs.delete(testFilePath, false);
}
@Test
public void testDataTypeConversion1() throws IOException {
TypeDescription fileStruct1 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
assertFalse(same1.hasConversion());
TypeDescription readerStruct1 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
assertFalse(both1.hasConversion());
TypeDescription readerStruct1diff = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
assertTrue(both1diff.hasConversion());
TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null);
assertTrue(both1diffPrecision.hasConversion());
}
@Test
public void testDataTypeConversion2() throws IOException {
TypeDescription fileStruct2 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createUnion()
.addUnionChild(TypeDescription.createByte())
.addUnionChild(TypeDescription.createDecimal()
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
.addField("f4", TypeDescription.createDouble())
.addField("f5", TypeDescription.createBoolean()))
.addField("f6", TypeDescription.createChar().withMaxLength(100));
SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null);
assertFalse(same2.hasConversion());
TypeDescription readerStruct2 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createUnion()
.addUnionChild(TypeDescription.createByte())
.addUnionChild(TypeDescription.createDecimal()
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
.addField("f4", TypeDescription.createDouble())
.addField("f5", TypeDescription.createBoolean()))
.addField("f6", TypeDescription.createChar().withMaxLength(100));
SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null);
assertFalse(both2.hasConversion());
TypeDescription readerStruct2diff = TypeDescription.createStruct()
.addField("f1", TypeDescription.createUnion()
.addUnionChild(TypeDescription.createByte())
.addUnionChild(TypeDescription.createDecimal()
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
.addField("f4", TypeDescription.createDouble())
.addField("f5", TypeDescription.createByte()))
.addField("f6", TypeDescription.createChar().withMaxLength(100));
SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
assertTrue(both2diff.hasConversion());
TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
.addField("f1", TypeDescription.createUnion()
.addUnionChild(TypeDescription.createByte())
.addUnionChild(TypeDescription.createDecimal()
.withPrecision(20).withScale(10)))
.addField("f2", TypeDescription.createStruct()
.addField("f3", TypeDescription.createDate())
.addField("f4", TypeDescription.createDouble())
.addField("f5", TypeDescription.createBoolean()))
.addField("f6", TypeDescription.createChar().withMaxLength(80));
SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
assertTrue(both2diffChar.hasConversion());
}
@Test
public void testFloatToDoubleEvolution() throws Exception {
testFilePath = new Path(workDir, "TestOrcFile." +
testCaseName.getMethodName() + ".orc");
TypeDescription schema = TypeDescription.createFloat();
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
.bufferSize(10000));
VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
DoubleColumnVector dcv = new DoubleColumnVector(1024);
batch.cols[0] = dcv;
batch.reset();
batch.size = 1;
dcv.vector[0] = 74.72f;
writer.addRowBatch(batch);
writer.close();
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
TypeDescription schemaOnRead = TypeDescription.createDouble();
RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead));
batch = schemaOnRead.createRowBatch();
rows.nextBatch(batch);
assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
rows.close();
}
@Test
public void testSafePpdEvaluation() throws IOException {
TypeDescription fileStruct1 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
assertTrue(same1.isPPDSafeConversion(0));
assertFalse(same1.hasConversion());
TypeDescription readerStruct1 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
assertFalse(both1.hasConversion());
assertTrue(both1.isPPDSafeConversion(0));
assertTrue(both1.isPPDSafeConversion(1));
assertTrue(both1.isPPDSafeConversion(2));
assertTrue(both1.isPPDSafeConversion(3));
// int -> long
TypeDescription readerStruct1diff = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
assertTrue(both1diff.hasConversion());
assertFalse(both1diff.isPPDSafeConversion(0));
assertTrue(both1diff.isPPDSafeConversion(1));
assertTrue(both1diff.isPPDSafeConversion(2));
assertTrue(both1diff.isPPDSafeConversion(3));
// decimal(38,10) -> decimal(12, 10)
TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
new boolean[] {true, false, false, true});
assertTrue(both1diffPrecision.hasConversion());
assertFalse(both1diffPrecision.isPPDSafeConversion(0));
assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
assertFalse(both1diffPrecision.isPPDSafeConversion(3));
// add columns
readerStruct1 = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
.addField("f4", TypeDescription.createBoolean());
both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
assertTrue(both1.hasConversion());
assertFalse(both1.isPPDSafeConversion(0));
assertTrue(both1.isPPDSafeConversion(1));
assertTrue(both1.isPPDSafeConversion(2));
assertTrue(both1.isPPDSafeConversion(3));
assertFalse(both1.isPPDSafeConversion(4));
}
@Test
public void testSafePpdEvaluationForInts() throws IOException {
// byte -> short -> int -> long
TypeDescription fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createByte());
SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
assertFalse(schemaEvolution.hasConversion());
// byte -> short
TypeDescription readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createShort());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// byte -> int
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// byte -> long
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// short -> int -> long
fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createShort());
schemaEvolution = new SchemaEvolution(fileSchema, null);
assertFalse(schemaEvolution.hasConversion());
// unsafe conversion short -> byte
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createByte());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// short -> int
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// short -> long
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// int -> long
fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt());
schemaEvolution = new SchemaEvolution(fileSchema, null);
assertFalse(schemaEvolution.hasConversion());
// unsafe conversion int -> byte
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createByte());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// unsafe conversion int -> short
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createShort());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// int -> long
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// long
fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createLong());
schemaEvolution = new SchemaEvolution(fileSchema, null);
assertTrue(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.hasConversion());
// unsafe conversion long -> byte
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createByte());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// unsafe conversion long -> short
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createShort());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// unsafe conversion long -> int
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createString());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createFloat());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createTimestamp());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
}
@Test
public void testSafePpdEvaluationForStrings() throws IOException {
TypeDescription fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createString());
SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
assertTrue(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.hasConversion());
// string -> char
TypeDescription readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createChar());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// string -> varchar
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createVarchar());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createChar());
schemaEvolution = new SchemaEvolution(fileSchema, null);
assertTrue(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.hasConversion());
// char -> string
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createString());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// char -> varchar
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createVarchar());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
fileSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createVarchar());
schemaEvolution = new SchemaEvolution(fileSchema, null);
assertTrue(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.hasConversion());
// varchar -> string
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createString());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertTrue(schemaEvolution.isPPDSafeConversion(1));
// varchar -> char
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createChar());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createDecimal());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createDate());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
// invalid
readerSchema = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt());
schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
assertTrue(schemaEvolution.hasConversion());
assertFalse(schemaEvolution.isPPDSafeConversion(0));
assertFalse(schemaEvolution.isPPDSafeConversion(1));
}
}