DRILL-8414: Index Paginator Not Working When Provided URL (#2779)
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
index 579643e..e6661ad 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
@@ -357,31 +357,58 @@
// At this point we know that there are more pages to come. Send the data to the paginator and
// use that to generate the next page.
- if (StringUtils.isNotEmpty(indexPaginator.getIndexParam())) {
+ if (StringUtils.isNotEmpty(indexPaginator.getIndexParam()) && paginationFields.containsKey(indexPaginator.getIndexParam())) {
indexPaginator.setIndexValue(paginationFields.get(indexPaginator.getIndexParam()).toString());
}
- if (StringUtils.isNotEmpty(indexPaginator.getNextPageParam())) {
- indexPaginator.setNextPageValue(paginationFields.get(indexPaginator.getNextPageParam()).toString());
+ if (StringUtils.isNotEmpty(indexPaginator.getNextPageParam()) && paginationFields.containsKey(indexPaginator.getNextPageParam())) {
+ Object nextPageValue = paginationFields.get(indexPaginator.getNextPageParam());
+ if (nextPageValue != null) {
+ indexPaginator.setNextPageValue(nextPageValue.toString());
+ } else {
+ // If the next URL is null, notify the paginator and stop paginating
+ paginator.notifyPartialPage();
+ indexPaginator.setNextPageValue(null);
+ }
}
} else {
// This covers the case of keyset/index pagination where there isn't a boolean parameter to indicate whether there are more results.
// In this case, we will interpret the absence of the pagination field, receiving the same value, or a null value as the marker to stop pagination.
- if ( (!paginationFields.containsKey(indexPaginator.getIndexParam())) ||
- paginationFields.get(indexPaginator.getIndexParam()) == null
- ) {
- // End pagination
- paginator.notifyPartialPage();
- } else {
- // Otherwise, check to see if the field is present but empty, or contains the value from the last page.
- // This will prevent runaway pagination calls.
- String indexParameter = paginationFields.get(indexPaginator.getIndexParam()).toString();
- // Empty value or the last value is the same as the current one.
- if (StringUtils.isEmpty(indexParameter) || (StringUtils.equals(indexParameter, indexPaginator.getLastIndexValue()))) {
+
+ // This represents the case when the index field is being used.
+ if (StringUtils.isNotEmpty(indexPaginator.getIndexParam())) {
+ if ((!paginationFields.containsKey(indexPaginator.getIndexParam())) || paginationFields.get(indexPaginator.getIndexParam()) == null) {
+ // End pagination
paginator.notifyPartialPage();
} else {
- // Whew! We made it... get the next page.
- indexPaginator.setIndexValue(indexParameter);
+ // Otherwise, check to see if the field is present but empty, or contains the value from the last page.
+ // This will prevent runaway pagination calls.
+ String indexParameter = paginationFields.get(indexPaginator.getIndexParam()).toString();
+ // Empty value or the last value is the same as the current one.
+ if (StringUtils.isEmpty(indexParameter) || (StringUtils.equals(indexParameter, indexPaginator.getLastIndexValue()))) {
+ paginator.notifyPartialPage();
+ } else {
+ // Whew! We made it... get the next page.
+ indexPaginator.setIndexValue(indexParameter);
+ }
+ }
+ } else {
+ // Case when the next page field is used.
+ if ((!paginationFields.containsKey(indexPaginator.getNextPageParam()))
+ || paginationFields.get(indexPaginator.getNextPageParam()) == null
+ || paginationFields.get(indexPaginator.getNextPageParam()) == "true") {
+ // End pagination
+ paginator.notifyPartialPage();
+ } else {
+ // Get the next page
+ String nextURL = paginationFields.get(indexPaginator.getNextPageParam()).toString();
+ // Empty value or the last value is the same as the current one.
+ if (StringUtils.isEmpty(nextURL) || (StringUtils.equals(nextURL, indexPaginator.getLastNextPageValue()))) {
+ paginator.notifyPartialPage();
+ } else {
+ // Whew! We made it... get the next page.
+ indexPaginator.setNextPageValue(nextURL);
+ }
}
}
}
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
index 67bc89c..75fcba3 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/paginator/IndexPaginator.java
@@ -39,6 +39,7 @@
private String indexValue;
private String lastIndexValue;
+ private String lastNextPageValue;
private Boolean hasMoreValue;
private String nextPageValue;
private int pageCount;
@@ -48,13 +49,14 @@
this.hasMoreParam = hasMoreParam;
this.indexParam = indexParam;
this.nextPageParam = nextPageParam;
+ this.lastNextPageValue = "";
+ this.lastIndexValue = "";
pageCount = 0;
}
@Override
public boolean hasNext() {
return !partialPageReceived;
-
}
public String getIndexParam() {
@@ -81,7 +83,7 @@
}
public void setNextPageValue(String nextPageValue) {
- this.lastIndexValue = this.nextPageValue;
+ this.lastNextPageValue = this.nextPageValue;
this.nextPageValue = nextPageValue;
}
@@ -89,6 +91,10 @@
return lastIndexValue;
}
+ public String getLastNextPageValue() {
+ return lastNextPageValue;
+ }
+
public boolean isFirstPage() {
return pageCount < 1;
}
@@ -116,6 +122,36 @@
}
builder.removeAllEncodedQueryParameters(indexParam);
builder.addQueryParameter(indexParam, indexValue);
+ } else if (StringUtils.isNotEmpty(nextPageValue)) {
+ // Case when we're using the next page URL. We have two cases here:
+ // 1. The nextPage contains the full URL of the next page.
+ // 2. The next page contains the path but lacks the base URL.
+ if (StringUtils.startsWith(nextPageValue, "http://") ||
+ StringUtils.startsWith(nextPageValue, "https://")) {
+ pageCount++;
+ return nextPageValue;
+ } else {
+ // If the next page just contains the path, we have to reconstruct a URL from the incoming path.
+ int segmentIndex = 0;
+
+ // Remove leading slash in path to avoid double slashes in URL
+ if (nextPageValue.startsWith("/")) {
+ nextPageValue = nextPageValue.substring(1);
+ }
+
+ // Now remove the path segments and replace with the updated ones from the URL.
+ for (String segment : builder.build().pathSegments()) {
+ if (nextPageValue.contains(segment)) {
+ for (int i = builder.build().pathSegments().size() - 1; i >= segmentIndex; i--) {
+ builder.removePathSegment(i);
+ }
+ break;
+ }
+ segmentIndex++;
+ }
+ builder.addPathSegments(nextPageValue);
+ }
+
}
pageCount++;
return builder.build().url().toString();
diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
index ed30d8d..afa5fe3 100644
--- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
+++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestPagination.java
@@ -65,6 +65,14 @@
private static String TEST_JSON_INDEX_PAGE2;
private static String TEST_JSON_INDEX_PAGE3;
private static String TEST_JSON_INDEX_PAGE4;
+ private static String TEST_JSON_INDEX_PAGE5;
+ private static String TEST_JSON_INDEX_PAGE6;
+ private static String TEST_JSON_INDEX_PAGE7;
+ private static String TEST_JSON_INDEX_PAGE8;
+ private static String TEST_JSON_INDEX_PAGE9;
+ private static String TEST_JSON_INDEX_PAGE10;
+ private static String TEST_JSON_INDEX_PAGE11;
+ private static String TEST_JSON_INDEX_PAGE12;
private static String TEST_JSON_NESTED_INDEX;
private static String TEST_JSON_NESTED_INDEX2;
private static String TEST_XML_PAGE1;
@@ -89,6 +97,14 @@
TEST_JSON_INDEX_PAGE3 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response3.json"), Charsets.UTF_8).read();
TEST_JSON_INDEX_PAGE4 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response4.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE5 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response5.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE6 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response6.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE7 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response7.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE8 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response8.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE9 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response9.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE10 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response10.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE11 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response11.json"), Charsets.UTF_8).read();
+ TEST_JSON_INDEX_PAGE12 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/index_response12.json"), Charsets.UTF_8).read();
TEST_JSON_NESTED_INDEX = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields.json"), Charsets.UTF_8).read();
TEST_JSON_NESTED_INDEX2 = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/nested_pagination_fields2.json"), Charsets.UTF_8).read();
@@ -171,6 +187,37 @@
.inputType("json")
.build();
+ HttpPaginatorConfig nextPagePaginator = HttpPaginatorConfig.builder()
+ .nextPageParam("nextPage")
+ .hasMoreParam("has-more")
+ .method("index")
+ .build();
+
+ HttpApiConfig mockJsonConfigWithNextPagePaginator = HttpApiConfig.builder()
+ .url("http://localhost:8092/json")
+ .method("get")
+ .headers(headers)
+ .requireTail(false)
+ .dataPath("companies")
+ .paginator(nextPagePaginator)
+ .inputType("json")
+ .build();
+
+ HttpPaginatorConfig nextPagePaginatorNoHasMore = HttpPaginatorConfig.builder()
+ .nextPageParam("nextPage")
+ .method("index")
+ .build();
+
+ HttpApiConfig mockJsonConfigWithNextPagePaginatorNoHasMore = HttpApiConfig.builder()
+ .url("http://localhost:8092/json")
+ .method("get")
+ .headers(headers)
+ .requireTail(false)
+ .dataPath("companies")
+ .paginator(nextPagePaginatorNoHasMore)
+ .inputType("json")
+ .build();
+
HttpPaginatorConfig nestedIndexPaginator = HttpPaginatorConfig.builder()
.indexParam("after")
.method("index")
@@ -274,6 +321,8 @@
configs.put("csv_paginator", mockCsvConfigWithPaginator);
configs.put("json_index", mockJsonConfigWithKeyset);
configs.put("json_index_datapath", mockJsonConfigWithKeysetAndDataPath);
+ configs.put("next_page", mockJsonConfigWithNextPagePaginator);
+ configs.put("next_page2", mockJsonConfigWithNextPagePaginatorNoHasMore);
configs.put("nested_keyset", mockJsonConfigWitNestedKeyset);
configs.put("nested_keyset_and_datapath", mockJsonConfigWitNestedKeysetAndDataPath);
configs.put("json_paginator", mockJsonConfigWithPaginator);
@@ -447,6 +496,75 @@
assertEquals(4, count);
}
}
+
+ @Test
+ public void nextPagePaginationWithURLAndHasMore() throws Exception {
+ String sql = "SELECT * FROM `local`.`next_page`";
+ try (MockWebServer server = startServer()) {
+
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE5));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE6));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE7));
+
+ List<QueryDataBatch> results = client.queryBuilder()
+ .sql(sql)
+ .results();
+
+ int count = 0;
+ for(QueryDataBatch b : results){
+ count += b.getHeader().getRowCount();
+ b.release();
+ }
+ assertEquals(3, results.size());
+ assertEquals(6, count);
+ }
+ }
+
+ @Test
+ public void nextPagePaginationWithURLAndNoHasMore() throws Exception {
+ String sql = "SELECT * FROM `local`.`next_page2`";
+ try (MockWebServer server = startServer()) {
+
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE8));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE9));
+
+ List<QueryDataBatch> results = client.queryBuilder()
+ .sql(sql)
+ .results();
+
+ int count = 0;
+ for(QueryDataBatch b : results){
+ count += b.getHeader().getRowCount();
+ b.release();
+ }
+ assertEquals(2, results.size());
+ assertEquals(4, count);
+ }
+ }
+
+ @Test
+ public void nextPagePaginationWithPathAndNoHasMore() throws Exception {
+ String sql = "SELECT * FROM `local`.`next_page2`";
+ try (MockWebServer server = startServer()) {
+
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE10));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE11));
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_INDEX_PAGE12));
+
+ List<QueryDataBatch> results = client.queryBuilder()
+ .sql(sql)
+ .results();
+
+ int count = 0;
+ for(QueryDataBatch b : results){
+ count += b.getHeader().getRowCount();
+ b.release();
+ }
+ assertEquals(3, results.size());
+ assertEquals(6, count);
+ }
+ }
+
@Test
public void jsonQueryWithoutHasMore() throws Exception {
String sql = "SELECT * FROM `local`.`nested_keyset` LIMIT 4";
diff --git a/contrib/storage-http/src/test/resources/data/index_response10.json b/contrib/storage-http/src/test/resources/data/index_response10.json
new file mode 100644
index 0000000..9f6d854
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response10.json
@@ -0,0 +1,14 @@
+{
+ "offset": 115279791,
+ "nextPage": "/json/services/data/v37.0/query/01gp000000AJ51VAAT-800",
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response11.json b/contrib/storage-http/src/test/resources/data/index_response11.json
new file mode 100644
index 0000000..80436f6
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response11.json
@@ -0,0 +1,14 @@
+{
+ "offset": 115279791,
+ "nextPage": "/json/services/data/v37.0/query/01gp000000AJ51VAAT-1600",
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response12.json b/contrib/storage-http/src/test/resources/data/index_response12.json
new file mode 100644
index 0000000..d13e412
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response12.json
@@ -0,0 +1,14 @@
+{
+ "offset": 115279791,
+ "nextPage": null,
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response5.json b/contrib/storage-http/src/test/resources/data/index_response5.json
new file mode 100644
index 0000000..f99eeef
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response5.json
@@ -0,0 +1,15 @@
+{
+ "has-more": true,
+ "offset": 115279791,
+ "nextPage": "http://localhost:8092/json?page=2",
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response6.json b/contrib/storage-http/src/test/resources/data/index_response6.json
new file mode 100644
index 0000000..72e21df
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response6.json
@@ -0,0 +1,15 @@
+{
+ "has-more": true,
+ "offset": 115279791,
+ "nextPage": "http://localhost:8092/json?page=3",
+ "companies": [
+ {
+ "portalId": 625154,
+ "companyId": 1152006364
+ },
+ {
+ "portalId": 625415,
+ "companyId": 1152379791
+ }
+ ]
+}
\ No newline at end of file
diff --git a/contrib/storage-http/src/test/resources/data/index_response7.json b/contrib/storage-http/src/test/resources/data/index_response7.json
new file mode 100644
index 0000000..419a854
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response7.json
@@ -0,0 +1,15 @@
+{
+ "has-more": false,
+ "offset": 115279791,
+ "nextPage": null,
+ "companies": [
+ {
+ "portalId": 625155,
+ "companyId": 1152005636
+ },
+ {
+ "portalId": 625315,
+ "companyId": 1152479791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response8.json b/contrib/storage-http/src/test/resources/data/index_response8.json
new file mode 100644
index 0000000..6722737
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response8.json
@@ -0,0 +1,14 @@
+{
+ "offset": 115279791,
+ "nextPage": "http://localhost:8092/json?page=2",
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}
diff --git a/contrib/storage-http/src/test/resources/data/index_response9.json b/contrib/storage-http/src/test/resources/data/index_response9.json
new file mode 100644
index 0000000..d13e412
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/index_response9.json
@@ -0,0 +1,14 @@
+{
+ "offset": 115279791,
+ "nextPage": null,
+ "companies": [
+ {
+ "portalId": 62515,
+ "companyId": 115200636
+ },
+ {
+ "portalId": 62515,
+ "companyId": 115279791
+ }
+ ]
+}