Issue fix for CSV loading with header and skip header not parsing well. (#10398)
diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java
index 60b04d3..f454d91 100644
--- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java
+++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java
@@ -216,6 +216,10 @@
public Map<String, String> parseToMap(String input)
{
final Map<String, Object> inner = delegate.parseToMap(input);
+ if (null == inner) {
+ // Skip null or missing values, treat them as if there were no row at all.
+ return ImmutableMap.of();
+ }
final String k = Preconditions.checkNotNull(
inner.get(key),
"Key column [%s] missing data in line [%s]",
@@ -296,9 +300,10 @@
this.valueColumn,
Arrays.toString(columns.toArray())
);
-
+ CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows);
+ csvParser.startFileFromBeginning();
this.parser = new DelegateParser(
- new CSVParser(null, columns, hasHeaderRow, skipHeaderRows),
+ csvParser,
this.keyColumn,
this.valueColumn
);
@@ -401,6 +406,7 @@
hasHeaderRow,
skipHeaderRows
);
+ delegate.startFileFromBeginning();
Preconditions.checkArgument(
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
"Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`"
diff --git a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java
index d5ac42a..dc50126 100644
--- a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java
+++ b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java
@@ -96,7 +96,25 @@
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A,B,C"));
}
-
+ @Test
+ public void testCSVWithHeader()
+ {
+ UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser(
+ ImmutableList.of("col1", "col2", "col3"),
+ "col2",
+ "col3",
+ true,
+ 1
+ );
+ // parser return empyt list as the 1 row header need to be skipped.
+ Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip "));
+ //Header also need to be skipped.
+ Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3"));
+ // test the header is parsed
+ Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
+ // The third row will parse to data
+ Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3"));
+ }
@Test(expected = IllegalArgumentException.class)
public void testBadCSV()
{
@@ -146,6 +164,26 @@
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
}
+ @Test
+ public void testWithHeaderAndListDelimiterTSV()
+ {
+ UriExtractionNamespace.TSVFlatDataParser parser = new UriExtractionNamespace.TSVFlatDataParser(
+ ImmutableList.of("col1", "col2", "col3"),
+ "\\u0001",
+ "\\u0002", "col2",
+ "col3",
+ true,
+ 1
+ );
+ // skipping one row
+ Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("Skipping some rows"));
+ // skip the header as well
+ Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1\\u0001col2\\u0001col3"));
+ // test if the headers are parsed well.
+ Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
+ // test if the data row is parsed correctly
+ Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
+ }
@Test(expected = IllegalArgumentException.class)
public void testBadTSV()