DRILL-8411: GoogleSheets Reader Will Not Read More than 1K Rows (#2774)
diff --git a/common/src/main/java/org/apache/drill/common/Typifier.java b/common/src/main/java/org/apache/drill/common/Typifier.java
index 1c4b570..5a416fd 100644
--- a/common/src/main/java/org/apache/drill/common/Typifier.java
+++ b/common/src/main/java/org/apache/drill/common/Typifier.java
@@ -272,7 +272,7 @@
* @param date Input date string
* @return LocalDateTime representation of the input String.
*/
- private static LocalDateTime stringAsDateTime(String date) {
+ public static LocalDateTime stringAsDateTime(String date) {
for (DateTimeFormatter format : formats) {
try {
return LocalDateTime.parse(date, format);
@@ -289,7 +289,7 @@
* @param date Input date string
* @return LocalDateTime representation of the input String.
*/
- private static LocalDate stringAsDate(String date) {
+ public static LocalDate stringAsDate(String date) {
for (DateTimeFormatter format : dateFormats) {
try {
return LocalDate.parse(date, format);
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
index db518fd..90eebe8 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
@@ -62,7 +62,7 @@
// The default batch size is 1k rows. It appears that Google sets the maximum batch size at 1000
// rows. There is conflicting information about this online, but during testing, ranges with more than
// 1000 rows would throw invalid request errors.
- private static final int BATCH_SIZE = 1000;
+ protected static final int BATCH_SIZE = 1000;
private static final String SHEET_COLUMN_NAME = "_sheets";
private static final String TITLE_COLUMN_NAME = "_title";
@@ -225,12 +225,7 @@
@Override
public boolean next() {
logger.debug("Processing batch.");
- while (!rowWriter.isFull()) {
- if (!processRow()) {
- return false;
- }
- }
- return true;
+ return processRow();
}
private boolean processRow() {
@@ -240,12 +235,16 @@
// Get next range
String range = rangeBuilder.next();
if (range == null) {
+ rangeBuilder.lastBatch();
return false;
}
data = GoogleSheetsUtils.getDataFromRange(service, sheetID, range);
} else {
List<String> batches = rangeBuilder.nextBatch();
- if (!batches.isEmpty()) {
+ if (batches == null) {
+ rangeBuilder.lastBatch();
+ return false;
+ } else if (!batches.isEmpty()) {
data = GoogleSheetsUtils.getBatchData(service, sheetID, batches);
} else {
data = Collections.emptyList();
@@ -293,12 +292,14 @@
rowWriter.save();
}
- // If the results contained less than the batch size, stop iterating.
- if (rowWriter.rowCount() < BATCH_SIZE) {
+ // If there is another batch, return true
+ if (rowWriter.rowCount() + BATCH_SIZE < rangeBuilder.getRowCount()) {
+ return true;
+ } else {
+ // If the results contained less than the batch size, stop iterating.
rangeBuilder.lastBatch();
return false;
}
- return true;
}
private void projectMetadata() {
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
index 471106c..37aa8cd 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
@@ -19,6 +19,7 @@
package org.apache.drill.exec.store.googlesheets.columns;
import org.apache.commons.lang3.StringUtils;
+import org.apache.drill.common.Typifier;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.slf4j.Logger;
@@ -27,6 +28,7 @@
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalTime;
+import java.time.ZoneOffset;
public abstract class GoogleSheetsColumnWriter {
protected static final Logger logger = LoggerFactory.getLogger(GoogleSheetsColumnWriter.class);
@@ -106,7 +108,7 @@
if (StringUtils.isNotEmpty(stringValue)) {
LocalDate finalValue;
try {
- finalValue = LocalDate.parse(stringValue);
+ finalValue = Typifier.stringAsDate(stringValue);
} catch (NumberFormatException e) {
finalValue = null;
}
@@ -211,7 +213,7 @@
if (StringUtils.isNotEmpty(stringValue)) {
Instant finalValue;
try {
- finalValue = Instant.parse(stringValue);
+ finalValue = Typifier.stringAsDateTime(stringValue).toInstant(ZoneOffset.UTC);
} catch (NumberFormatException e) {
finalValue = null;
}
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
index f5e8417..eb03193 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
@@ -146,7 +146,7 @@
if (!hasMore) {
return null;
} else if (getStartIndex() > getEndIndex()) {
- hasMore = false;
+ lastBatch();
return null;
}
@@ -197,6 +197,9 @@
private List<String> buildBatchList() {
if (isStarQuery) {
return null;
+ } else if (getStartIndex() > getEndIndex()) {
+ hasMore = false;
+ return null;
}
List<String> batchList = new ArrayList<>();
@@ -238,6 +241,10 @@
return buildBatchList();
}
+ public int getRowCount() {
+ return rowCount;
+ }
+
@Override
public String toString() {
return new PlanStringBuilder(this)
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
index edf6190..804aea3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
@@ -1275,7 +1275,7 @@
@Test
public void testAggregateWithPivot() throws Exception {
String query = "SELECT * FROM (\n" +
- "SELECT education_level, salary, marital_status, extract(year from age(birth_date)) age\n" +
+ "SELECT education_level, salary, marital_status, extract(year from age('2023-02-23', birth_date)) age\n" +
"FROM cp.`employee.json`)\n" +
"PIVOT (avg(salary) avg_salary, avg(age) avg_age FOR marital_status IN ('M' married, 'S' single))";
testBuilder()