DRILL-8417: Allow Excel Reader to Ignore Formula Errors (#2783)

diff --git a/contrib/format-excel/README.md b/contrib/format-excel/README.md
index 207203c..3d63a2e 100644
--- a/contrib/format-excel/README.md
+++ b/contrib/format-excel/README.md
@@ -1,10 +1,10 @@
 # Excel Format Plugin
-This plugin enables Drill to read Microsoft Excel files. This format is best used with Excel files that do not have extensive formatting, however it will work with formatted files, by allowing you to define a region within the file where the data is. 
+This plugin enables Drill to read Microsoft Excel files. This format is best used with Excel files that do not have extensive formatting, however it will work with formatted files, by allowing you to define a region within the file where the data is.
 
-The plugin will automatically evaluate cells which contain formulae. 
+The plugin will automatically evaluate cells which contain formulae.
 
-## Plugin Configuration 
-This plugin has several configuration variables which must be set in order to read Excel files effectively. Since Excel files often contain other elements besides data, you can use the configuration variables to define a region within your spreadsheet in which Drill should extract data. This is potentially useful if your spreadsheet contains a lot of formatting or other complications. 
+## Plugin Configuration
+This plugin has several configuration variables which must be set in order to read Excel files effectively. Since Excel files often contain other elements besides data, you can use the configuration variables to define a region within your spreadsheet in which Drill should extract data. This is potentially useful if your spreadsheet contains a lot of formatting or other complications.
 
 ### Configuration Options:
 The most basic configuration is simply to add the following to a file based storage plugin:
@@ -24,8 +24,10 @@
 * `firstColumn`: In order to define a region within a spreadsheet, this is the left-most column index. This is indexed from one. If set to `0` Drill will start at the left most
  column.
 * `lastColumn`: To define a region within a spreadsheet, this is the right-most column index. This is indexed from one. If set to `0` Drill will read all available columns. This
- is not inclusive, so if you ask for columns 2-5 you will get columns 2,3 and 4. 
+ is not inclusive, so if you ask for columns 2-5 you will get columns 2,3 and 4.
 * `allTextMode`: When set to `true`, Drill will not attempt to infer column data types and will read everything as `VARCHAR`. Defaults to `false`;
+* `ignoreErrors`:  Defaults to `true`.  When set to `true` Drill will return `null` for any
+  formulas or any values that are unparseable.
 * `maxArraySize`: Overrides the default [POI config](https://poi.apache.org/components/configuration.html) for `setByteArrayMaxOverride(int maxOverride)`. May be needed with large Excel files.
 * `thresholdBytesForTempFiles`: Overrides the default [POI config](https://poi.apache.org/components/configuration.html) for `ZipInputStreamZipEntrySource.setThresholdBytesForTempFiles(int thresholdBytes)`. May be needed with large Excel files.
 * `useTempFilePackageParts`: Overrides the default [POI config](https://poi.apache.org/components/configuration.html) for `ZipPackage.setUseTempFilePackageParts(boolean tempFilePackageParts)`. May be needed with large Excel files.
@@ -35,24 +37,24 @@
  execute the query as follows:
 
 ```
-SELECT <fields> 
+SELECT <fields>
 FROM dfs.`somefile.xlsx`
 ```
 To query a different sheet other than the default, use the `table()` function as shown below:
 ```
-SELECT <fields> 
+SELECT <fields>
 FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))
 ```
 To join data together from different sheets in one file, use the query below:
 ```
-SELECT <fields> 
+SELECT <fields>
 FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet')) AS t1
-INNER JOIN table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'thirdSheet')) AS t2 
+INNER JOIN table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'thirdSheet')) AS t2
 ON t1.id = t2.id
 ```
 
 ### Implicit Columns
-Drill includes several columns of file metadata in the query results. These fields are **not** included in star queries and thus must be explicitly listed in a query. 
+Drill includes several columns of file metadata in the query results. These fields are **not** included in star queries and thus must be explicitly listed in a query.
 
 The fields are:
 
@@ -75,10 +77,10 @@
 
 ### Known Limitations:
 At present, Drill requires that all columns be of the same data type. If they are not, Drill will throw an exception upon trying to read a column of mixed data type. If you are
- trying to query data with heterogenoeus columns, it will be necessary to set `allTextMode` to `true`. 
+ trying to query data with heterogenoeus columns, it will be necessary to set `allTextMode` to `true`.
 
 An additional limitation is that Drill infers the column data type from the first row of data. If a column is `null` in the first row, Drill will default to a datatype of
- `VARCHAR`. However if in fact the column is `NUMERIC` this will cause errors. 
- 
+ `VARCHAR`. However if in fact the column is `NUMERIC` this will cause errors.
+
  Drill ignores blank rows.
 
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
index baa99b7..326e19f 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
@@ -175,6 +175,7 @@
     final int firstColumn;
     final int lastColumn;
     final boolean allTextMode;
+    final boolean ignoreErrors;
     final String sheetName;
     final int maxArraySize;
     final int thresholdBytesForTempFiles;
@@ -187,6 +188,7 @@
       firstColumn = plugin.getConfig().getFirstColumn();
       lastColumn = plugin.getConfig().getLastColumn();
       allTextMode = plugin.getConfig().getAllTextMode();
+      ignoreErrors = plugin.getConfig().getIgnoreErrors();
       sheetName = plugin.getConfig().getSheetName();
       maxArraySize = plugin.getConfig().getMaxArraySize();
       thresholdBytesForTempFiles = plugin.getConfig().getThresholdBytesForTempFiles();
@@ -420,7 +422,7 @@
 
   /**
    * Helper function to get the selected sheet from the configuration
-   * @return Sheet The selected sheet
+   * @return {@link Sheet} The selected sheet
    */
   private Sheet getSheet() {
     int sheetIndex = 0;
@@ -520,7 +522,20 @@
 
       populateColumnArray(cell, colPosition);
       if (colWriterIndex < cellWriterArray.size()) {
-        cellWriterArray.get(colWriterIndex).load(cell);
+        CellWriter writer = cellWriterArray.get(colWriterIndex);
+        try {
+          writer.load(cell);
+        } catch (NumberFormatException e) {
+          if (readerConfig.ignoreErrors) {
+            logger.warn("Error writing cell: {} {}", cell.getStringCellValue(), e.getMessage());
+            writer.writeNull();
+          } else {
+            throw UserException.dataReadError()
+                .message("Error writing data: " + e.getMessage())
+                .addContext(errorContext)
+                .build(logger);
+          }
+        }
       }
 
       colPosition++;
@@ -785,6 +800,10 @@
     }
 
     public void load(Cell cell) {}
+
+    public void writeNull() {
+      columnWriter.setNull();
+    }
   }
 
   public class StringCellWriter extends ExcelBatchReader.CellWriter {
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java
index a78f026..61221ce 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelFormatConfig.java
@@ -51,6 +51,7 @@
   private final int firstColumn;
   private final int lastColumn;
   private final boolean allTextMode;
+  private final boolean ignoreErrors;
   private final String sheetName;
   private final int maxArraySize;
   private final int thresholdBytesForTempFiles;
@@ -65,6 +66,7 @@
       @JsonProperty("firstColumn") Integer firstColumn,
       @JsonProperty("lastColumn") Integer lastColumn,
       @JsonProperty("allTextMode") Boolean allTextMode,
+      @JsonProperty("ignoreErrors") Boolean ignoreErrors,
       @JsonProperty("sheetName") String sheetName,
       @JsonProperty("maxArraySize") Integer maxArraySize,
       @JsonProperty("thresholdBytesForTempFiles") Integer thresholdBytesForTempFiles,
@@ -79,6 +81,7 @@
     this.firstColumn = firstColumn == null ? 0 : firstColumn;
     this.lastColumn = lastColumn == null ? 0 : lastColumn;
     this.allTextMode = allTextMode == null ? false : allTextMode;
+    this.ignoreErrors = ignoreErrors == null ? true : ignoreErrors;
     this.sheetName = sheetName == null ? "" : sheetName;
     this.maxArraySize = maxArraySize == null ? -1 : maxArraySize;
     this.thresholdBytesForTempFiles = thresholdBytesForTempFiles == null ? -1 : thresholdBytesForTempFiles;
@@ -110,6 +113,9 @@
   public boolean getAllTextMode() {
     return allTextMode;
   }
+  public boolean getIgnoreErrors() {
+    return ignoreErrors;
+  }
 
   public String getSheetName() {
     return sheetName;
@@ -151,7 +157,7 @@
   @Override
   public int hashCode() {
     return Objects.hash(extensions, headerRow, lastRow,
-        firstColumn, lastColumn, allTextMode, sheetName);
+        firstColumn, lastColumn, allTextMode, ignoreErrors, sheetName);
   }
 
   @Override
@@ -169,6 +175,7 @@
       && Objects.equals(firstColumn, other.firstColumn)
       && Objects.equals(lastColumn, other.lastColumn)
       && Objects.equals(allTextMode, other.allTextMode)
+      && Objects.equals(ignoreErrors, other.ignoreErrors)
       && Objects.equals(sheetName, other.sheetName)
       && Objects.equals(maxArraySize, other.maxArraySize)
       && Objects.equals(thresholdBytesForTempFiles, other.thresholdBytesForTempFiles)
@@ -185,6 +192,7 @@
         .field("firstColumn", firstColumn)
         .field("lastColumn", lastColumn)
         .field("allTextMode", allTextMode)
+        .field("ignoreErrors", ignoreErrors)
         .field("maxArraySize", maxArraySize)
         .field("thresholdBytesForTempFiles", thresholdBytesForTempFiles)
         .field("useTempFilePackageParts", useTempFilePackageParts)
diff --git a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
index 960fcbe..769cce6 100644
--- a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
+++ b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
@@ -20,6 +20,7 @@
 
 import org.apache.drill.categories.RowSetTest;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.physical.rowSet.RowSet;
@@ -330,6 +331,43 @@
   }
 
   @Test
+  public void testErrorOnFormulaQuery() throws RpcException {
+    String sql = "SELECT * FROM  table(cp.`excel/text-formula.xlsx` (type => 'excel', sheetName " +
+        "=> 'Sheet with Errors', ignoreErrors => True))";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+        .addNullable("field1", MinorType.FLOAT8)
+        .addNullable("field2", MinorType.FLOAT8)
+        .addNullable("result", MinorType.FLOAT8)
+        .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+        .addRow(1,2,0.5)
+        .addRow(2,3,0.6666667)
+        .addRow(3,4,0.75)
+        .addRow(4,0, null)
+        .addRow(5,6,0.8333333)
+        .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
+  public void testErrorOnFormulaQueryWithoutIgnoreErrors() throws RpcException {
+    String sql = "SELECT * FROM  table(cp.`excel/text-formula.xlsx` (type => 'excel', " +
+        "ignoreErrors => False, sheetName => 'Sheet with Errors'))";
+    try {
+      client.queryBuilder().sql(sql).rowSet();
+      fail();
+    } catch (UserException e) {
+      assertTrue(e.getMessage().contains("DATA_READ ERROR: Error writing data: For input string: " +
+          "\"#DIV/0!\""));
+    }
+  }
+
+
+  @Test
   public void testExplicitNonDefaultSheetQuery() throws RpcException {
     String sql = "SELECT event_date, ip_address, user_agent FROM  table(cp.`excel/test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))";
 
diff --git a/contrib/format-excel/src/test/resources/excel/text-formula.xlsx b/contrib/format-excel/src/test/resources/excel/text-formula.xlsx
index 9cce25e..1e9309a 100644
--- a/contrib/format-excel/src/test/resources/excel/text-formula.xlsx
+++ b/contrib/format-excel/src/test/resources/excel/text-formula.xlsx
Binary files differ