SOLR-17018: add QueryLimits support to Learning To Rank rescoring (#2348)
Co-authored-by: Christine Poerschke <cpoerschke@apache.org>
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e4bf541..c48284e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -152,6 +152,9 @@
* SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics.
(Paul McArthur)
+
+* SOLR-17018: Add QueryLimits support to Learning To Rank rescoring.
+ (Alessandro Benedetti)
* SOLR-14892: Queries with shards.info and shards.tolerant can yield multiple null keys in place of shard names
(Mathieu Marie, David Smiley)
diff --git a/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java
new file mode 100644
index 0000000..c3f7190
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/IncompleteRerankingException.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+public class IncompleteRerankingException extends RuntimeException {
+
+ public IncompleteRerankingException() {
+ super();
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
index 17f206d..bf4c19b 100644
--- a/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/ReRankCollector.java
@@ -128,22 +128,26 @@
}
ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;
- ScoreDoc[] mainScoreDocsClone =
- (reRankScaler != null && reRankScaler.scaleScores())
- ? deepCloneAndZeroOut(mainScoreDocs)
- : null;
+ boolean zeroOutScores = reRankScaler != null && reRankScaler.scaleScores();
+ ScoreDoc[] mainScoreDocsClone = deepClone(mainScoreDocs, zeroOutScores);
ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)];
System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length);
mainDocs.scoreDocs = reRankScoreDocs;
// If we're scaling scores use the replace rescorer because we just want the re-rank score.
- TopDocs rescoredDocs =
- reRankScaler != null && reRankScaler.scaleScores()
- ? reRankScaler
- .getReplaceRescorer()
- .rescore(searcher, mainDocs, mainDocs.scoreDocs.length)
- : reRankQueryRescorer.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);
+ TopDocs rescoredDocs;
+ try {
+ rescoredDocs =
+ zeroOutScores // previously zero-ed out scores are to be replaced
+ ? reRankScaler
+ .getReplaceRescorer()
+ .rescore(searcher, mainDocs, mainDocs.scoreDocs.length)
+ : reRankQueryRescorer.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);
+ } catch (IncompleteRerankingException ex) {
+ mainDocs.scoreDocs = mainScoreDocsClone;
+ rescoredDocs = mainDocs;
+ }
// Lower howMany to return if we've collected fewer documents.
howMany = Math.min(howMany, mainScoreDocs.length);
@@ -208,13 +212,15 @@
}
}
- private ScoreDoc[] deepCloneAndZeroOut(ScoreDoc[] scoreDocs) {
+ private ScoreDoc[] deepClone(ScoreDoc[] scoreDocs, boolean zeroOut) {
ScoreDoc[] scoreDocs1 = new ScoreDoc[scoreDocs.length];
for (int i = 0; i < scoreDocs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
if (scoreDoc != null) {
scoreDocs1[i] = new ScoreDoc(scoreDoc.doc, scoreDoc.score);
- scoreDoc.score = 0f;
+ if (zeroOut) {
+ scoreDoc.score = 0f;
+ }
}
}
return scoreDocs1;
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
index 19ac717..a6b4534 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRRescorer.java
@@ -31,6 +31,8 @@
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.Weight;
import org.apache.solr.ltr.interleaving.OriginalRankingLTRScoringQuery;
+import org.apache.solr.search.IncompleteRerankingException;
+import org.apache.solr.search.QueryLimits;
import org.apache.solr.search.SolrIndexSearcher;
/**
@@ -234,6 +236,13 @@
scorer.getDocInfo().setOriginalDocScore(hit.score);
hit.score = scorer.score();
+ if (QueryLimits.getCurrentLimits()
+ .maybeExitWithPartialResults(
+ "Learning To Rank rescoring -"
+ + " The full reranking didn't complete."
+ + " If partial results are tolerated the reranking got reverted and all documents preserved their original score and ranking.")) {
+ throw new IncompleteRerankingException();
+ }
if (hitUpto < topN) {
reranked[hitUpto] = hit;
// if the heap is not full, maybe I want to log the features for this
diff --git a/solr/modules/ltr/src/test-files/featureExamples/features-slow.json b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json
new file mode 100644
index 0000000..a60c47d
--- /dev/null
+++ b/solr/modules/ltr/src/test-files/featureExamples/features-slow.json
@@ -0,0 +1,7 @@
+[
+ {
+ "name" : "slow",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : { "q" : "{!func}sleep(1000,999)" }
+ }
+]
diff --git a/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json
new file mode 100644
index 0000000..824b9c4
--- /dev/null
+++ b/solr/modules/ltr/src/test-files/modelExamples/linear-slow-model.json
@@ -0,0 +1,14 @@
+{
+ "class": "org.apache.solr.ltr.model.LinearModel",
+ "name": "slowModel",
+ "features": [
+ {
+ "name": "slow"
+ }
+ ],
+ "params": {
+ "weights": {
+ "slow": 1
+ }
+ }
+}
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
index c2c47c2..a8924b2 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
@@ -29,6 +29,9 @@
loadFeatures("features-linear.json");
loadModels("linear-model.json");
+
+ loadFeatures("features-slow.json");
+ loadModels("linear-slow-model.json"); // just a linear model with one feature
}
@AfterClass
@@ -137,4 +140,93 @@
query.add("rq", "{!ltr reRankDocs=3 model=6029760550880411648}");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==0");
}
+
+ @Test
+ public void ltr_expensiveFeatureRescoring_shouldTimeOutAndReturnPartialResults()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished
+ * */
+
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(300);
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/numFound/==4",
+ "/responseHeader/partialResults/==true",
+ "/responseHeader/partialResultsDetails/=='Limits exceeded! (Learning To Rank rescoring - "
+ + "The full reranking didn\\'t complete. "
+ + "If partial results are tolerated the reranking got reverted and "
+ + "all documents preserved their original score and ranking.)"
+ + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'",
+ "/response/docs/[0]/id=='8'",
+ "/response/docs/[0]/score==10.0",
+ "/response/docs/[1]/id=='9'",
+ "/response/docs/[1]/score==5.0",
+ "/response/docs/[2]/id=='7'",
+ "/response/docs/[2]/score==3.0",
+ "/response/docs/[3]/id=='6'",
+ "/response/docs/[3]/score==1.0");
+ }
+
+ @Test
+ public void ltr_expensiveFeatureRescoringAndPartialResultsNotTolerated_shouldRaiseException()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished
+ * */
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(300);
+ query.add("partialResults", "false");
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/error/msg=='org.apache.solr.search.QueryLimitsExceededException: Limits exceeded! (Learning To Rank rescoring - "
+ + "The full reranking didn\\'t complete. "
+ + "If partial results are tolerated the reranking got reverted and all documents preserved their original score and ranking.)"
+ + ": Query limits: [TimeAllowedLimit:LIMIT EXCEEDED]'");
+ }
+
+ @Test
+ public void ltr_expensiveFeatureRescoringWithinTimeAllowed_shouldReturnRerankedResults()
+ throws Exception {
+ /* One SolrFeature is defined: {!func}sleep(1000,999)
+ * It simulates a slow feature extraction, sleeping for 1000ms and returning 999 as a score when finished
+ * */
+
+ final String solrQuery = "_query_:{!edismax qf='id' v='8^=10 9^=5 7^=3 6^=1'}";
+ final SolrQuery query = new SolrQuery();
+ query.setQuery(solrQuery);
+ query.setFields("id", "score");
+ query.setRows(4);
+ query.setTimeAllowed(5000);
+ query.add("fv", "true");
+ query.add("rq", "{!ltr model=slowModel reRankDocs=3}");
+
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/numFound/==4",
+ "/response/docs/[0]/id=='7'",
+ "/response/docs/[0]/score==999.0",
+ "/response/docs/[1]/id=='8'",
+ "/response/docs/[1]/score==999.0",
+ "/response/docs/[2]/id=='9'",
+ "/response/docs/[2]/score==999.0",
+ "/response/docs/[3]/id=='6'",
+ // original score for the 4th document due to reRankDocs=3 limit
+ "/response/docs/[3]/score==1.0");
+ }
}
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
index 411b6df..ef4c519 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
@@ -499,6 +499,17 @@
}}
----
+=== Running a Rerank Query and Query Limits
+
+Apache Solr allows to define Query Limits to interrupt particularly expensive queries (xref:query-guide:common-query-parameters.adoc#timeallowed-parameter[Time Allowed], xref:query-guide:common-query-parameters.adoc#cpuallowed-parameter[Cpu Allowed]).
+
+If a query limit is exceeded while reranking, the rescoring is aborted and fully reverted.
+
+The original ranked list is returned and the response marked with the responseHeader 'partialResults'.
+The details of what limit was exceeded is returned in the responseHeader 'partialResultsDetails'.
+
+See xref:query-guide:common-query-parameters.adoc#partialresults-parameter[Partial Results Parameter] for more details on how to handle partial results.
+
=== Running a Rerank Query Interleaving Two Models
To rerank the results of a query, interleaving two models (myModelA, myModelB) add the `rq` parameter to your search, passing two models in input, for example: