Stop exploring HNSW graph if scores are not getting better. (#12770)
I noticed while testing lower dimensionality and quantization, we would explore the HNSW graph way too much. I was stuck figuring out why until I noticed the searcher checks for distance equality (not just if the distance is better) when exploring neighbors-of-neighbors. This seems like a bad heuristic, but to double check I looked at what nmslib does. This pointed me back to this commit: nmslib/nmslib#106
Seems like this performance hitch was discovered awhile ago :).
This commit adjusts HNSW to only explore the graph layer if the distance is actually better.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 12b058f..8fbde7a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -288,6 +288,8 @@
* GITHUB#12736: Fix NullPointerException when Monitor.getQuery cannot find the requested queryId (Davis Cook)
+* GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent)
+
Build
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index c18c16c..0135fc5 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -174,8 +174,7 @@
}
float friendSimilarity = scorer.score(friendOrd);
visitedCount++;
- if (friendSimilarity > currentScore
- || (friendSimilarity == currentScore && friendOrd < currentEp)) {
+ if (friendSimilarity > currentScore) {
currentScore = friendSimilarity;
currentEp = friendOrd;
foundBetter = true;
@@ -243,7 +242,7 @@
}
float friendSimilarity = scorer.score(friendOrd);
results.incVisitedCount(1);
- if (friendSimilarity >= minAcceptedSimilarity) {
+ if (friendSimilarity > minAcceptedSimilarity) {
candidates.add(friendOrd, friendSimilarity);
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
if (results.collect(friendOrd, friendSimilarity)) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
index 0353125..d45902d 100644
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@@ -281,12 +281,12 @@
DocIdSetIterator it = scorer.iterator();
assertEquals(3, it.cost());
- assertEquals(1, it.nextDoc());
- assertEquals(1 / 6f, scorer.score(), 0);
- assertEquals(3, it.advance(3));
+ assertEquals(2, it.nextDoc());
assertEquals(1 / 2f, scorer.score(), 0);
+ assertEquals(4, it.advance(4));
+ assertEquals(1 / 6f, scorer.score(), 0);
- assertEquals(NO_MORE_DOCS, it.advance(4));
+ assertEquals(NO_MORE_DOCS, it.advance(5));
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
}
}
@@ -384,7 +384,7 @@
assertEquals(0, matched.getDetails().length);
assertEquals("within top 3 docs", matched.getDescription());
- Explanation nomatch = searcher.explain(query, 4);
+ Explanation nomatch = searcher.explain(query, 1);
assertFalse(nomatch.isMatch());
assertEquals(0f, nomatch.getValue());
assertEquals(0, matched.getDetails().length);