ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException

Fixes #505

Signed-off-by: Owen O'Malley <omalley@apache.org>
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index bf83431..310364e 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@
       "Require schema evolution to match the top level columns using position\n" +
       "rather than column names. This provides backwards compatibility with\n" +
       "Hive 2.1."),
+  FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+      "orc.force.positional.evolution.level", 1,
+      "Require schema evolution to match the the defined no. of level columns using position\n" +
+          "rather than column names. This provides backwards compatibility with Hive 2.1."),
   ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
     "How often should MemoryManager check the memory sizes? Measured in rows\n" +
       "added to all of the writers.  Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 0aae622..4aa1cff 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -184,6 +184,7 @@
     private boolean[] include;
     private long offset = 0;
     private long length = Long.MAX_VALUE;
+    private int positionalEvolutionLevel;
     private SearchArgument sarg = null;
     private String[] columnNames = null;
     private Boolean useZeroCopy = null;
@@ -207,6 +208,7 @@
       skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
       tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
       forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+      positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
       isSchemaEvolutionCaseAware =
           OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
     }
@@ -330,6 +332,18 @@
     }
 
     /**
+     * Set no. of levels to force schema evolution to be positional instead of
+     * based on the column names.
+     * @param value force positional evolution
+     * @return this
+     */
+    public Options positionalEvolutionLevel(int value) {
+      this.positionalEvolutionLevel = value;
+      return this;
+    }
+
+
+    /**
      * Set boolean flag to determine if the comparision of field names in schema
      * evolution is case sensitive
      * @param value the flag for schema evolution is case sensitive or not.
@@ -404,6 +418,10 @@
       return forcePositionalEvolution;
     }
 
+    public int getPositionalEvolutionLevel() {
+      return positionalEvolutionLevel;
+    }
+
     public boolean getIsSchemaEvolutionCaseAware() {
       return isSchemaEvolutionCaseAware;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 1e86f26..dac5437 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -106,7 +106,7 @@
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
       int positionalLevels = 0;
       if (options.getForcePositionalEvolution()) {
-        positionalLevels = isAcid ? 2 : 1;
+        positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
       } else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
         if (!this.fileSchema.equals(this.readerSchema)) {
           if (!allowMissingMetadata) {
@@ -119,7 +119,7 @@
                 + " caused by a writer earlier than HIVE-4243. The reader will"
                 + " reconcile schemas based on index. File type: " +
                 this.fileSchema + ", reader type: " + this.readerSchema);
-            positionalLevels = isAcid ? 2 : 1;
+            positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
           }
         }
       }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index be7b616..f5fe82f 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1723,6 +1723,19 @@
     assertEquals(null, evo.getFileType(4));
   }
 
+  @Test
+  public void testPositionalEvolutionLevel() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(2);
+    TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+  }
+
   // These are helper methods that pull some of the common code into one
   // place.