blob: fc03dfdd84a59b13514a7b5eb0752e8232a12ad8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.grouping;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
public class BlockGroupingTest extends AbstractGroupingTestCase {
public void testSimple() throws IOException {
Shard shard = new Shard();
indexRandomDocs(shard.writer);
IndexSearcher searcher = shard.getIndexSearcher();
Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
GroupingSearch grouper = new GroupingSearch(blockEndQuery);
grouper.setGroupDocsLimit(10);
Query topLevel = new TermQuery(new Term("text", "grandmother"));
TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);
// We're sorting by score, so the score of the top group should be the same as the
// score of the top document from the same query with no grouping
TopDocs topDoc = searcher.search(topLevel, 1);
assertEquals(topDoc.scoreDocs[0].score, tg.groups[0].scoreDocs[0].score, 0);
assertEquals(topDoc.scoreDocs[0].doc, tg.groups[0].scoreDocs[0].doc);
for (int i = 0; i < tg.groups.length; i++) {
String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
// The contents of each group should be equal to the results of a search for
// that group alone
Query filtered = new BooleanQuery.Builder()
.add(topLevel, BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
.build();
TopDocs td = searcher.search(filtered, 10);
assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
}
shard.close();
}
public void testTopLevelSort() throws IOException {
Shard shard = new Shard();
indexRandomDocs(shard.writer);
IndexSearcher searcher = shard.getIndexSearcher();
Sort sort = new Sort(new SortField("length", SortField.Type.LONG));
Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
GroupingSearch grouper = new GroupingSearch(blockEndQuery);
grouper.setGroupDocsLimit(10);
grouper.setGroupSort(sort); // groups returned sorted by length, chapters within group sorted by relevancy
Query topLevel = new TermQuery(new Term("text", "grandmother"));
TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);
// The sort value of the top doc in the top group should be the same as the sort value
// of the top result from the same search done with no grouping
TopDocs topDoc = searcher.search(topLevel, 1, sort);
assertEquals(((FieldDoc)topDoc.scoreDocs[0]).fields[0], tg.groups[0].groupSortValues[0]);
for (int i = 0; i < tg.groups.length; i++) {
String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
// The contents of each group should be equal to the results of a search for
// that group alone, sorted by score
Query filtered = new BooleanQuery.Builder()
.add(topLevel, BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
.build();
TopDocs td = searcher.search(filtered, 10);
assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
if (i > 1) {
assertSortsBefore(tg.groups[i - 1], tg.groups[i]);
}
}
shard.close();
}
public void testWithinGroupSort() throws IOException {
Shard shard = new Shard();
indexRandomDocs(shard.writer);
IndexSearcher searcher = shard.getIndexSearcher();
Sort sort = new Sort(new SortField("length", SortField.Type.LONG));
Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
GroupingSearch grouper = new GroupingSearch(blockEndQuery);
grouper.setGroupDocsLimit(10);
grouper.setSortWithinGroup(sort); // groups returned sorted by relevancy, chapters within group sorted by length
Query topLevel = new TermQuery(new Term("text", "grandmother"));
TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);
// We're sorting by score, so the score of the top group should be the same as the
// score of the top document from the same query with no grouping
TopDocs topDoc = searcher.search(topLevel, 1);
assertEquals(topDoc.scoreDocs[0].score, (float)tg.groups[0].groupSortValues[0], 0);
for (int i = 0; i < tg.groups.length; i++) {
String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
// The contents of each group should be equal to the results of a search for
// that group alone, sorted by length
Query filtered = new BooleanQuery.Builder()
.add(topLevel, BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
.build();
TopDocs td = searcher.search(filtered, 10, sort);
assertFieldDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
// We're sorting by score, so the group sort value for each group should be a float,
// and the value for the previous group should be higher or equal to the value for this one
if (i > 0) {
float prevScore = (float) tg.groups[i - 1].groupSortValues[0];
float thisScore = (float) tg.groups[i].groupSortValues[0];
assertTrue(prevScore >= thisScore);
}
}
shard.close();
}
private static void indexRandomDocs(RandomIndexWriter writer) throws IOException {
int bookCount = atLeast(20);
for (int i = 0; i < bookCount; i++) {
writer.addDocuments(createRandomBlock(i));
}
}
private static List<Document> createRandomBlock(int book) {
List<Document> block = new ArrayList<>();
String bookName = "book" + book;
int chapterCount = atLeast(10);
for (int j = 0; j < chapterCount; j++) {
Document doc = new Document();
String chapterName = "chapter" + j;
String chapterText = randomText();
doc.add(new TextField("book", bookName, Field.Store.YES));
doc.add(new TextField("chapter", chapterName, Field.Store.YES));
doc.add(new TextField("text", chapterText, Field.Store.NO));
doc.add(new NumericDocValuesField("length", chapterText.length()));
doc.add(new SortedDocValuesField("book", new BytesRef(bookName)));
if (j == chapterCount - 1) {
doc.add(new TextField("blockEnd", "true", Field.Store.NO));
}
block.add(doc);
}
return block;
}
private static final String[] TEXT = new String[]{
"It was the day my grandmother exploded",
"It was the best of times, it was the worst of times",
"It was a bright cold morning in April",
"It is a truth universally acknowledged",
"I have just returned from a visit to my landlord",
"I've been here and I've been there"
};
private static String randomText() {
StringBuilder sb = new StringBuilder(TEXT[random().nextInt(TEXT.length)]);
int sentences = random().nextInt(20);
for (int i = 0; i < sentences; i++) {
sb.append(" ").append(TEXT[random().nextInt(TEXT.length)]);
}
return sb.toString();
}
private void assertSortsBefore(GroupDocs<?> first, GroupDocs<?> second) {
Object[] groupSortValues = second.groupSortValues;
Object[] prevSortValues = first.groupSortValues;
assertTrue(((Long)prevSortValues[0]).compareTo((Long)groupSortValues[0]) <= 0);
}
protected static void assertFieldDocsEquals(ScoreDoc[] expected, ScoreDoc[] actual) {
assertEquals(expected.length, actual.length);
for (int i = 0; i < expected.length; i++) {
assertEquals(expected[i].doc, actual[i].doc);
FieldDoc e = (FieldDoc) expected[i];
FieldDoc a = (FieldDoc) actual[i];
assertArrayEquals(e.fields, a.fields);
}
}
}