ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
Fixes #505
Signed-off-by: Owen O'Malley <omalley@apache.org>
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index bf83431..310364e 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@
"Require schema evolution to match the top level columns using position\n" +
"rather than column names. This provides backwards compatibility with\n" +
"Hive 2.1."),
+ FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+ "orc.force.positional.evolution.level", 1,
+ "Require schema evolution to match the the defined no. of level columns using position\n" +
+ "rather than column names. This provides backwards compatibility with Hive 2.1."),
ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
"How often should MemoryManager check the memory sizes? Measured in rows\n" +
"added to all of the writers. Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 0aae622..4aa1cff 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -184,6 +184,7 @@
private boolean[] include;
private long offset = 0;
private long length = Long.MAX_VALUE;
+ private int positionalEvolutionLevel;
private SearchArgument sarg = null;
private String[] columnNames = null;
private Boolean useZeroCopy = null;
@@ -207,6 +208,7 @@
skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+ positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
isSchemaEvolutionCaseAware =
OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
}
@@ -330,6 +332,18 @@
}
/**
+ * Set no. of levels to force schema evolution to be positional instead of
+ * based on the column names.
+ * @param value force positional evolution
+ * @return this
+ */
+ public Options positionalEvolutionLevel(int value) {
+ this.positionalEvolutionLevel = value;
+ return this;
+ }
+
+
+ /**
* Set boolean flag to determine if the comparision of field names in schema
* evolution is case sensitive
* @param value the flag for schema evolution is case sensitive or not.
@@ -404,6 +418,10 @@
return forcePositionalEvolution;
}
+ public int getPositionalEvolutionLevel() {
+ return positionalEvolutionLevel;
+ }
+
public boolean getIsSchemaEvolutionCaseAware() {
return isSchemaEvolutionCaseAware;
}
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 1e86f26..dac5437 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -106,7 +106,7 @@
new TypeDescription[this.readerSchema.getMaximumId() + 1];
int positionalLevels = 0;
if (options.getForcePositionalEvolution()) {
- positionalLevels = isAcid ? 2 : 1;
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
} else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
if (!this.fileSchema.equals(this.readerSchema)) {
if (!allowMissingMetadata) {
@@ -119,7 +119,7 @@
+ " caused by a writer earlier than HIVE-4243. The reader will"
+ " reconcile schemas based on index. File type: " +
this.fileSchema + ", reader type: " + this.readerSchema);
- positionalLevels = isAcid ? 2 : 1;
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
}
}
}
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index be7b616..f5fe82f 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1723,6 +1723,19 @@
assertEquals(null, evo.getFileType(4));
}
+ @Test
+ public void testPositionalEvolutionLevel() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(2);
+ TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ }
+
// These are helper methods that pull some of the common code into one
// place.