ORC-380: Add isOnlyImplicitConversion boolean function to SchemaEvolution
Fixes #286
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 571c0d9..8128308 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -46,7 +46,8 @@
private final boolean[] fileIncluded;
private final TypeDescription fileSchema;
private final TypeDescription readerSchema;
- private boolean hasConversion = false;
+ private boolean hasConversion;
+ private boolean isOnlyImplicitConversion;
private final boolean isAcid;
private final boolean isSchemaEvolutionCaseAware;
@@ -75,6 +76,7 @@
Arrays.copyOf(includedCols, includedCols.length);
this.fileIncluded = new boolean[fileSchema.getMaximumId() + 1];
this.hasConversion = false;
+ this.isOnlyImplicitConversion = true;
this.fileSchema = fileSchema;
isAcid = checkAcidSchema(fileSchema);
this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
@@ -186,6 +188,17 @@
return hasConversion;
}
+ /**
+ * When there Schema Evolution data type conversion i.e. hasConversion() returns true,
+ * is the conversion only the implicit kind?
+ *
+ * (see aaa).
+ * @return
+ */
+ public boolean isOnlyImplicitConversion() {
+ return isOnlyImplicitConversion;
+ }
+
public TypeDescription getFileSchema() {
return fileSchema;
}
@@ -220,6 +233,59 @@
}
/**
+ * Determine if there is implicit conversion from a file to reader type.
+ *
+ * Implicit conversions are:
+ * Small to larger integer (e.g. INT to LONG)
+ * FLOAT to DOUBLE
+ * Some String Family conversions.
+ *
+ * NOTE: This check is independent of the PPD conversion checks.
+ * @return
+ */
+ private boolean typesAreImplicitConversion(final TypeDescription fileType,
+ final TypeDescription readerType) {
+ switch (fileType.getCategory()) {
+ case BYTE:
+ if (readerType.getCategory().equals(TypeDescription.Category.SHORT) ||
+ readerType.getCategory().equals(TypeDescription.Category.INT) ||
+ readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case SHORT:
+ if (readerType.getCategory().equals(TypeDescription.Category.INT) ||
+ readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case INT:
+ if (readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+ return true;
+ }
+ break;
+ case FLOAT:
+ if (readerType.getCategory().equals(TypeDescription.Category.DOUBLE)) {
+ return true;
+ }
+ break;
+ case CHAR:
+ case VARCHAR:
+ if (readerType.getCategory().equals(TypeDescription.Category.STRING)) {
+ return true;
+ }
+ if (readerType.getCategory().equals(TypeDescription.Category.CHAR) ||
+ readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) {
+ return (fileType.getMaxLength() <= readerType.getMaxLength());
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ /**
* Check if column is safe for ppd evaluation
* @param colId reader column id
* @return true if the specified column is safe for ppd evaluation else false
@@ -382,6 +448,9 @@
// maxLength.
if (fileType.getMaxLength() != readerType.getMaxLength()) {
hasConversion = true;
+ if (!typesAreImplicitConversion(fileType, readerType)) {
+ isOnlyImplicitConversion = false;
+ }
}
break;
case DECIMAL:
@@ -390,6 +459,7 @@
if (fileType.getPrecision() != readerType.getPrecision() ||
fileType.getScale() != readerType.getScale()) {
hasConversion = true;
+ isOnlyImplicitConversion = false;
}
break;
case UNION:
@@ -413,6 +483,8 @@
List<TypeDescription> fileChildren = fileType.getChildren();
if (fileChildren.size() != readerChildren.size()) {
hasConversion = true;
+ // UNDONE: Does LLAP detect fewer columns and NULL them out????
+ isOnlyImplicitConversion = false;
}
if (positionalLevels == 0) {
@@ -461,6 +533,9 @@
isOk = ConvertTreeReaderFactory.canConvert(fileType, readerType);
hasConversion = true;
+ if (!typesAreImplicitConversion(fileType, readerType)) {
+ isOnlyImplicitConversion = false;
+ }
}
if (isOk) {
readerFileTypes[readerType.getId()] = fileType;
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index e452e57..d203415 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -97,6 +97,7 @@
.addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, options);
assertTrue(both1diff.hasConversion());
+ assertTrue(both1diff.isOnlyImplicitConversion());
TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
.addField("f1", TypeDescription.createInt())
.addField("f2", TypeDescription.createString())
@@ -104,6 +105,7 @@
SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1,
readerStruct1diffPrecision, options);
assertTrue(both1diffPrecision.hasConversion());
+ assertFalse(both1diffPrecision.isOnlyImplicitConversion());
}
@Test
@@ -144,6 +146,7 @@
.addField("f6", TypeDescription.createChar().withMaxLength(100));
SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, options);
assertTrue(both2diff.hasConversion());
+ assertFalse(both2diff.isOnlyImplicitConversion());
TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
.addField("f1", TypeDescription.createUnion()
.addUnionChild(TypeDescription.createByte())
@@ -156,6 +159,175 @@
.addField("f6", TypeDescription.createChar().withMaxLength(80));
SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, options);
assertTrue(both2diffChar.hasConversion());
+ assertFalse(both2diffChar.isOnlyImplicitConversion());
+ }
+
+ @Test
+ public void testIntegerImplicitConversion() throws IOException {
+ TypeDescription fileStructByte = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameByte = new SchemaEvolution(fileStructByte, null, options);
+ assertFalse(sameByte.hasConversion());
+ TypeDescription readerStructByte = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createByte())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothByte = new SchemaEvolution(fileStructByte, readerStructByte, options);
+ assertFalse(bothByte.hasConversion());
+ TypeDescription readerStructByte1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothByte1diff = new SchemaEvolution(fileStructByte, readerStructByte1diff, options);
+ assertTrue(bothByte1diff.hasConversion());
+ assertTrue(bothByte1diff.isOnlyImplicitConversion());
+ TypeDescription readerStructByte2diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothByte2diff = new SchemaEvolution(fileStructByte, readerStructByte2diff, options);
+ assertTrue(bothByte2diff.hasConversion());
+ assertTrue(bothByte2diff.isOnlyImplicitConversion());
+ TypeDescription readerStruct3diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothByte3diff = new SchemaEvolution(fileStructByte, readerStruct3diff, options);
+ assertTrue(bothByte3diff.hasConversion());
+ assertTrue(bothByte3diff.isOnlyImplicitConversion());
+
+ TypeDescription fileStructShort = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameShort = new SchemaEvolution(fileStructShort, null, options);
+ assertFalse(sameShort.hasConversion());
+ TypeDescription readerStructShort = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createShort())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothShort = new SchemaEvolution(fileStructShort, readerStructShort, options);
+ assertFalse(bothShort.hasConversion());
+ TypeDescription readerStructShort1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothShort1diff = new SchemaEvolution(fileStructShort, readerStructShort1diff, options);
+ assertTrue(bothShort1diff.hasConversion());
+ assertTrue(bothShort1diff.isOnlyImplicitConversion());
+ TypeDescription readerStructShort2diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothShort2diff = new SchemaEvolution(fileStructShort, readerStructShort2diff, options);
+ assertTrue(bothShort2diff.hasConversion());
+ assertTrue(bothShort2diff.isOnlyImplicitConversion());
+
+ TypeDescription fileStructInt = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameInt = new SchemaEvolution(fileStructInt, null, options);
+ assertFalse(sameInt.hasConversion());
+ TypeDescription readerStructInt = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothInt = new SchemaEvolution(fileStructInt, readerStructInt, options);
+ assertFalse(bothInt.hasConversion());
+ TypeDescription readerStructInt1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createLong())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothInt1diff = new SchemaEvolution(fileStructInt, readerStructInt1diff, options);
+ assertTrue(bothInt1diff.hasConversion());
+ assertTrue(bothInt1diff.isOnlyImplicitConversion());
+ }
+
+ @Test
+ public void testFloatImplicitConversion() throws IOException {
+ TypeDescription fileStructFloat = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createFloat())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameFloat = new SchemaEvolution(fileStructFloat, null, options);
+ assertFalse(sameFloat.hasConversion());
+ TypeDescription readerStructFloat = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createFloat())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothFloat = new SchemaEvolution(fileStructFloat, readerStructFloat, options);
+ assertFalse(bothFloat.hasConversion());
+ TypeDescription readerStructFloat1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createDouble())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothFloat1diff = new SchemaEvolution(fileStructFloat, readerStructFloat1diff, options);
+ assertTrue(bothFloat1diff.hasConversion());
+ assertTrue(bothFloat1diff.isOnlyImplicitConversion());
+ }
+
+ @Test
+ public void testCharImplicitConversion() throws IOException {
+ TypeDescription fileStructChar = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameChar = new SchemaEvolution(fileStructChar, null, options);
+ assertFalse(sameChar.hasConversion());
+ TypeDescription readerStructChar = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothChar = new SchemaEvolution(fileStructChar, readerStructChar, options);
+ assertFalse(bothChar.hasConversion());
+ TypeDescription readerStructChar1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothChar1diff = new SchemaEvolution(fileStructChar, readerStructChar1diff, options);
+ assertTrue(bothChar1diff.hasConversion());
+ assertTrue(bothChar1diff.isOnlyImplicitConversion());
+ TypeDescription readerStructChar2diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar().withMaxLength(14))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothChar2diff = new SchemaEvolution(fileStructChar, readerStructChar2diff, options);
+ assertTrue(bothChar2diff.hasConversion());
+ assertFalse(bothChar2diff.isOnlyImplicitConversion());
+ TypeDescription readerStructChar3diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothChar3diff = new SchemaEvolution(fileStructChar, readerStructChar3diff, options);
+ assertTrue(bothChar3diff.hasConversion());
+ assertTrue(bothChar3diff.isOnlyImplicitConversion());
+ TypeDescription readerStructChar4diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar().withMaxLength(14))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothChar4diff = new SchemaEvolution(fileStructChar, readerStructChar4diff, options);
+ assertTrue(bothChar4diff.hasConversion());
+ assertFalse(bothChar4diff.isOnlyImplicitConversion());
+ }
+
+ @Test
+ public void testVarcharImplicitConversion() throws IOException {
+ TypeDescription fileStructVarchar = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution sameVarchar = new SchemaEvolution(fileStructVarchar, null, options);
+ assertFalse(sameVarchar.hasConversion());
+ TypeDescription readerStructVarchar = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothVarchar = new SchemaEvolution(fileStructVarchar, readerStructVarchar, options);
+ assertFalse(bothVarchar.hasConversion());
+ TypeDescription readerStructVarchar1diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createString())
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothVarchar1diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar1diff, options);
+ assertTrue(bothVarchar1diff.hasConversion());
+ assertTrue(bothVarchar1diff.isOnlyImplicitConversion());
+ TypeDescription readerStructVarchar2diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createVarchar().withMaxLength(14))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothVarchar2diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar2diff, options);
+ assertTrue(bothVarchar2diff.hasConversion());
+ assertFalse(bothVarchar2diff.isOnlyImplicitConversion());
+ TypeDescription readerStructVarchar3diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar().withMaxLength(15))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothVarchar3diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar3diff, options);
+ assertTrue(bothVarchar3diff.hasConversion());
+ assertTrue(bothVarchar3diff.isOnlyImplicitConversion());
+ TypeDescription readerStructVarchar4diff = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createChar().withMaxLength(14))
+ .addField("f2", TypeDescription.createString());
+ SchemaEvolution bothVarchar4diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar4diff, options);
+ assertTrue(bothVarchar4diff.hasConversion());
+ assertFalse(bothVarchar4diff.isOnlyImplicitConversion());
}
@Test