| diff --git a/lucene/queries/src/java/org/apache/lucene/queries/ScoreDocRank.java b/lucene/queries/src/java/org/apache/lucene/queries/ScoreDocRank.java |
| new file mode 100644 |
| index 0000000..a711271 |
| --- /dev/null |
| +++ b/lucene/queries/src/java/org/apache/lucene/queries/ScoreDocRank.java |
| @@ -0,0 +1,129 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.queries; |
| + |
| +import java.util.HashMap; |
| +import java.util.List; |
| +import java.util.ArrayList; |
| + |
| +import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.TopDocs; |
| + |
| +/** |
| + * The {@link #joinDocsWithRanks} method can be used when one is interested |
| + * in the ranks of some of the documents produced by queries, |
| + * for example for creating statistics like precision and recall. |
| + * <p> |
| + * This holds one query hit, an index of the producer and the rank of the hit. |
| + */ |
| +public class ScoreDocRank { |
| + |
| + /** The score of this document for the query. */ |
| + public float score; |
| + |
| + /** A hit document's number. |
| + * @see org.apache.lucene.search.IndexSearcher#doc(int) */ |
| + public int doc; |
| + |
| + /** An index of the producer. */ |
| + public int index; |
| + |
| + /** The rank of the document in the result from the producer. */ |
| + public int rank; |
| + |
| + /** Constructs a ScoreDocRank. */ |
| + public ScoreDocRank(int doc, float score, int index, int rank) { |
| + this.doc = doc; |
| + this.score = score; |
| + this.index = index; |
| + this.rank = rank; |
| + } |
| + |
| + /** Constructs a ScoreDocRank. */ |
| + public ScoreDocRank(ScoreDoc sd, int index, int rank) { |
| + this(sd.doc, sd.score, index, rank); |
| + } |
| + |
| + // A convenience method for debugging. |
| + @Override |
| + public String toString() { |
| + return "ScoreDocRank(doc=" + doc |
| + + " score=" + score |
| + + " index=" + index |
| + + " rank=" + rank |
| + + ")"; |
| + } |
| + |
| + /** Join the given TopDocs's by document |
| + * keeping only the hits for documents occurring in at least two TopDocs's. |
| + * <br> |
| + * This implementation works best when the last TopDocs is the largest one. |
| + * <br> |
| + * For each {@link ScoreDocRank} result hit provide: |
| + * <ul> |
| + * <li>the index (starting from 0) of the TopDocs as given here, |
| + * <li>the rank in the TopDocs that the hit originated from. |
| + * </ul> |
| + * ScoreDocRanks with equal docs are adjacent in the result. |
| + * <p> |
| + * Please note that joining TopDocs's by document only makes sense when they |
| + * were produced from the same {@link org.apache.lucene.index.IndexReader}. |
| + */ |
| + public static ScoreDocRank[] joinDocsWithRanks(TopDocs... topDocss) { |
| + int totalSize = 0; |
| + for (TopDocs topDocs: topDocss) { |
| + totalSize += topDocs.totalHits; |
| + } |
| + |
| + HashMap<Integer, List<ScoreDocRank>> sdrsByDoc = new HashMap<>(); |
| + |
| + int r = 0; // result size |
| + List<ScoreDocRank> lst; |
| + for (int tdi = 0; tdi < topDocss.length; tdi++) { |
| + TopDocs topDocs = topDocss[tdi]; |
| + for (int rank = 0; rank < topDocs.totalHits; rank++) { |
| + ScoreDoc scoreDoc = topDocs.scoreDocs[rank]; |
| + int doc = scoreDoc.doc; |
| + if (! sdrsByDoc.containsKey(doc)) { // first doc occurrence |
| + if (tdi + 1 == topDocss.length) { // only in last TopDocs. |
| + continue; |
| + } |
| + lst = new ArrayList<>(topDocss.length - tdi); |
| + lst.add(new ScoreDocRank(scoreDoc, tdi, rank)); |
| + sdrsByDoc.put(doc, lst); |
| + } else { // later occurrence of the same doc |
| + lst = sdrsByDoc.get(doc); |
| + lst.add(new ScoreDocRank(scoreDoc, tdi, rank)); |
| + r += (lst.size() == 2) ? 2 : 1; // count only hits with at least two docs present |
| + } |
| + } |
| + } |
| + |
| + ScoreDocRank[] res = new ScoreDocRank[r]; |
| + r = 0; |
| + for (List<ScoreDocRank> lsdr: sdrsByDoc.values()) { |
| + if (lsdr.size() >= 2) { |
| + for (ScoreDocRank sdr: lsdr) { |
| + res[r++] = sdr; |
| + } |
| + } |
| + } |
| + assert r == res.length; |
| + |
| + return res; |
| + } |
| +} |
| diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TestScoreDocRank.java b/lucene/queries/src/test/org/apache/lucene/queries/TestScoreDocRank.java |
| new file mode 100644 |
| index 0000000..b8f5825 |
| --- /dev/null |
| +++ b/lucene/queries/src/test/org/apache/lucene/queries/TestScoreDocRank.java |
| @@ -0,0 +1,106 @@ |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +package org.apache.lucene.queries; |
| + |
| +import java.util.Comparator; |
| + |
| +import org.apache.lucene.util.ArrayUtil; |
| + |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.BaseExplanationTestCase; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.TopDocs; |
| + |
| +public class TestScoreDocRank extends BaseExplanationTestCase { |
| + |
| + TopDocs topDocsFromTerm(Term t) throws Exception { |
| + return searcher.search(new TermQuery(t), docFields.length); |
| + } |
| + |
| + public void testEmpty() throws Exception { |
| + TopDocs tdtxt = topDocsFromTerm(new Term(ALTFIELD, "xx"));// matches docs 2 and 3 |
| + TopDocs td0 = topDocsFromTerm(new Term(KEY, "0")); |
| + |
| + ScoreDocRank[] res = ScoreDocRank.joinDocsWithRanks(); // no input at all |
| + assertEquals("result length", 0, res.length); |
| + |
| + res = ScoreDocRank.joinDocsWithRanks(tdtxt); // single TopDocs, no docs occur twice |
| + assertEquals("result length", 0, res.length); |
| + |
| + res = ScoreDocRank.joinDocsWithRanks(tdtxt, td0); // two TopDocs, matching different docs. |
| + assertEquals("result length", 0, res.length); |
| + } |
| + |
| + public void testOneTermTwoDocsRankBoth() throws Exception { |
| + TopDocs tdtxt = topDocsFromTerm(new Term(ALTFIELD, "xx")); // matches docs 2 and 3 |
| + TopDocs td2 = topDocsFromTerm(new Term(KEY, "2")); |
| + TopDocs td3 = topDocsFromTerm(new Term(KEY, "3")); |
| + |
| + // reverse order td3, td2 for testing, tdtxt with largest result at end |
| + ScoreDocRank[] res = ScoreDocRank.joinDocsWithRanks(td3, td2, tdtxt); |
| + |
| + assertEquals("result length", 4, res.length); |
| + |
| + // sort by doc for presence tests, stable sort keeps input order |
| + ArrayUtil.timSort(res, new Comparator<ScoreDocRank>() { |
| + @Override |
| + public int compare(ScoreDocRank a, ScoreDocRank b) { |
| + return a.doc - b.doc; |
| + } |
| + }); |
| + |
| + assertEquals(2, res[0].doc); |
| + assertEquals(1, res[0].index); // td2 has index 1 |
| + assertEquals(0, res[0].rank); |
| + |
| + assertEquals(2, res[1].doc); |
| + assertEquals(2, res[1].index); // tdtxt has index 2 |
| + assertEquals(0, res[1].rank); |
| + |
| + assertEquals(3, res[2].doc); |
| + assertEquals(0, res[2].index); // td3 has index 0 |
| + assertEquals(0, res[2].rank); |
| + |
| + assertEquals(3, res[3].doc); |
| + assertEquals(2, res[3].index); // tdtxt has index 2 |
| + assertEquals(1, res[3].rank); |
| + } |
| + |
| + public void testOneTermTwoDocsRankOne() throws Exception { |
| + TopDocs tdtxt = topDocsFromTerm(new Term(ALTFIELD, "xx")); // matches docs 2 and 3 |
| + TopDocs td3 = topDocsFromTerm(new Term(KEY, "3")); |
| + |
| + ScoreDocRank[] res = ScoreDocRank.joinDocsWithRanks(td3, tdtxt); |
| + |
| + assertEquals("result length", 2, res.length); |
| + |
| + assertEquals(3, res[0].doc); |
| + assertEquals(0, res[0].index); // td3 has index 0 |
| + assertEquals(0, res[0].rank); |
| + |
| + assertEquals(3, res[1].doc); |
| + assertEquals(1, res[1].index); // tdtxt has index 1 |
| + assertEquals(1, res[1].rank); |
| + } |
| + |
| + public void testKeepAllHits() throws Exception { |
| + TopDocs tdtxt = topDocsFromTerm(new Term(ALTFIELD, "w5")); // matches doc 0 |
| + TopDocs td0 = topDocsFromTerm(new Term(KEY, "0")); |
| + ScoreDocRank[] res = ScoreDocRank.joinDocsWithRanks(tdtxt, td0); |
| + assertEquals("result length", 2, res.length); |
| + } |
| +} |