lucene/grouping/src/test/org/apache/lucene/search/grouping/BlockGroupingTest.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.search.grouping;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.util.BytesRef;

 public class BlockGroupingTest extends AbstractGroupingTestCase {

   public void testSimple() throws IOException {

     Shard shard = new Shard();
     indexRandomDocs(shard.writer);
     IndexSearcher searcher = shard.getIndexSearcher();

     Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
     GroupingSearch grouper = new GroupingSearch(blockEndQuery);
     grouper.setGroupDocsLimit(10);

     Query topLevel = new TermQuery(new Term("text", "grandmother"));
     TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

     // We're sorting by score, so the score of the top group should be the same as the
     // score of the top document from the same query with no grouping
     TopDocs topDoc = searcher.search(topLevel, 1);
     assertEquals(topDoc.scoreDocs[0].score, tg.groups[0].scoreDocs[0].score, 0);
     assertEquals(topDoc.scoreDocs[0].doc, tg.groups[0].scoreDocs[0].doc);

     for (int i = 0; i < tg.groups.length; i++) {
       String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
       // The contents of each group should be equal to the results of a search for
       // that group alone
       Query filtered = new BooleanQuery.Builder()
           .add(topLevel, BooleanClause.Occur.MUST)
           .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
           .build();
       TopDocs td = searcher.search(filtered, 10);
       assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
     }

     shard.close();

   }

   public void testTopLevelSort() throws IOException {

     Shard shard = new Shard();
     indexRandomDocs(shard.writer);
     IndexSearcher searcher = shard.getIndexSearcher();

     Sort sort = new Sort(new SortField("length", SortField.Type.LONG));

     Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
     GroupingSearch grouper = new GroupingSearch(blockEndQuery);
     grouper.setGroupDocsLimit(10);
     grouper.setGroupSort(sort);     // groups returned sorted by length, chapters within group sorted by relevancy

     Query topLevel = new TermQuery(new Term("text", "grandmother"));
     TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

     // The sort value of the top doc in the top group should be the same as the sort value
     // of the top result from the same search done with no grouping
     TopDocs topDoc = searcher.search(topLevel, 1, sort);
     assertEquals(((FieldDoc)topDoc.scoreDocs[0]).fields[0], tg.groups[0].groupSortValues[0]);

     for (int i = 0; i < tg.groups.length; i++) {
       String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
       // The contents of each group should be equal to the results of a search for
       // that group alone, sorted by score
       Query filtered = new BooleanQuery.Builder()
           .add(topLevel, BooleanClause.Occur.MUST)
           .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
           .build();
       TopDocs td = searcher.search(filtered, 10);
       assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
       if (i > 1) {
         assertSortsBefore(tg.groups[i - 1], tg.groups[i]);
       }
     }

     shard.close();

   }

   public void testWithinGroupSort() throws IOException {

     Shard shard = new Shard();
     indexRandomDocs(shard.writer);
     IndexSearcher searcher = shard.getIndexSearcher();

     Sort sort = new Sort(new SortField("length", SortField.Type.LONG));

     Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
     GroupingSearch grouper = new GroupingSearch(blockEndQuery);
     grouper.setGroupDocsLimit(10);
     grouper.setSortWithinGroup(sort);     // groups returned sorted by relevancy, chapters within group sorted by length

     Query topLevel = new TermQuery(new Term("text", "grandmother"));
     TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

     // We're sorting by score, so the score of the top group should be the same as the
     // score of the top document from the same query with no grouping
     TopDocs topDoc = searcher.search(topLevel, 1);
     assertEquals(topDoc.scoreDocs[0].score, (float)tg.groups[0].groupSortValues[0], 0);

     for (int i = 0; i < tg.groups.length; i++) {
       String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
       // The contents of each group should be equal to the results of a search for
       // that group alone, sorted by length
       Query filtered = new BooleanQuery.Builder()
           .add(topLevel, BooleanClause.Occur.MUST)
           .add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
           .build();
       TopDocs td = searcher.search(filtered, 10, sort);
       assertFieldDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
       // We're sorting by score, so the group sort value for each group should be a float,
       // and the value for the previous group should be higher or equal to the value for this one
       if (i > 0) {
         float prevScore = (float) tg.groups[i - 1].groupSortValues[0];
         float thisScore = (float) tg.groups[i].groupSortValues[0];
         assertTrue(prevScore >= thisScore);
       }
     }

     shard.close();
   }

   private static void indexRandomDocs(RandomIndexWriter writer) throws IOException {
     int bookCount = atLeast(20);
     for (int i = 0; i < bookCount; i++) {
       writer.addDocuments(createRandomBlock(i));
     }
   }

   private static List<Document> createRandomBlock(int book) {
     List<Document> block = new ArrayList<>();
     String bookName = "book" + book;
     int chapterCount = atLeast(10);
     for (int j = 0; j < chapterCount; j++) {
       Document doc = new Document();
       String chapterName = "chapter" + j;
       String chapterText = randomText();
       doc.add(new TextField("book", bookName, Field.Store.YES));
       doc.add(new TextField("chapter", chapterName, Field.Store.YES));
       doc.add(new TextField("text", chapterText, Field.Store.NO));
       doc.add(new NumericDocValuesField("length", chapterText.length()));
       doc.add(new SortedDocValuesField("book", new BytesRef(bookName)));
       if (j == chapterCount - 1) {
         doc.add(new TextField("blockEnd", "true", Field.Store.NO));
       }
       block.add(doc);
     }
     return block;
   }

   private static final String[] TEXT = new String[]{
       "It was the day my grandmother exploded",
       "It was the best of times, it was the worst of times",
       "It was a bright cold morning in April",
       "It is a truth universally acknowledged",
       "I have just returned from a visit to my landlord",
       "I've been here and I've been there"
   };

   private static String randomText() {
     StringBuilder sb = new StringBuilder(TEXT[random().nextInt(TEXT.length)]);
     int sentences = random().nextInt(20);
     for (int i = 0; i < sentences; i++) {
       sb.append(" ").append(TEXT[random().nextInt(TEXT.length)]);
     }
     return sb.toString();
   }

   private void assertSortsBefore(GroupDocs<?> first, GroupDocs<?> second) {
     Object[] groupSortValues = second.groupSortValues;
     Object[] prevSortValues = first.groupSortValues;
     assertTrue(((Long)prevSortValues[0]).compareTo((Long)groupSortValues[0]) <= 0);
   }

   protected static void assertFieldDocsEquals(ScoreDoc[] expected, ScoreDoc[] actual) {
     assertEquals(expected.length, actual.length);
     for (int i = 0; i < expected.length; i++) {
       assertEquals(expected[i].doc, actual[i].doc);
       FieldDoc e = (FieldDoc) expected[i];
       FieldDoc a = (FieldDoc) actual[i];
       assertArrayEquals(e.fields, a.fields);
     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.search.grouping;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.NumericDocValuesField;
	import org.apache.lucene.document.SortedDocValuesField;
	import org.apache.lucene.document.TextField;
	import org.apache.lucene.index.RandomIndexWriter;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.search.BooleanClause;
	import org.apache.lucene.search.BooleanQuery;
	import org.apache.lucene.search.FieldDoc;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.SortField;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.util.BytesRef;

	public class BlockGroupingTest extends AbstractGroupingTestCase {

	public void testSimple() throws IOException {

	Shard shard = new Shard();
	indexRandomDocs(shard.writer);
	IndexSearcher searcher = shard.getIndexSearcher();

	Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
	GroupingSearch grouper = new GroupingSearch(blockEndQuery);
	grouper.setGroupDocsLimit(10);

	Query topLevel = new TermQuery(new Term("text", "grandmother"));
	TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

	// We're sorting by score, so the score of the top group should be the same as the
	// score of the top document from the same query with no grouping
	TopDocs topDoc = searcher.search(topLevel, 1);
	assertEquals(topDoc.scoreDocs[0].score, tg.groups[0].scoreDocs[0].score, 0);
	assertEquals(topDoc.scoreDocs[0].doc, tg.groups[0].scoreDocs[0].doc);

	for (int i = 0; i < tg.groups.length; i++) {
	String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
	// The contents of each group should be equal to the results of a search for
	// that group alone
	Query filtered = new BooleanQuery.Builder()
	.add(topLevel, BooleanClause.Occur.MUST)
	.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
	.build();
	TopDocs td = searcher.search(filtered, 10);
	assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
	}

	shard.close();

	}

	public void testTopLevelSort() throws IOException {

	Shard shard = new Shard();
	indexRandomDocs(shard.writer);
	IndexSearcher searcher = shard.getIndexSearcher();

	Sort sort = new Sort(new SortField("length", SortField.Type.LONG));

	Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
	GroupingSearch grouper = new GroupingSearch(blockEndQuery);
	grouper.setGroupDocsLimit(10);
	grouper.setGroupSort(sort); // groups returned sorted by length, chapters within group sorted by relevancy

	Query topLevel = new TermQuery(new Term("text", "grandmother"));
	TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

	// The sort value of the top doc in the top group should be the same as the sort value
	// of the top result from the same search done with no grouping
	TopDocs topDoc = searcher.search(topLevel, 1, sort);
	assertEquals(((FieldDoc)topDoc.scoreDocs[0]).fields[0], tg.groups[0].groupSortValues[0]);

	for (int i = 0; i < tg.groups.length; i++) {
	String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
	// The contents of each group should be equal to the results of a search for
	// that group alone, sorted by score
	Query filtered = new BooleanQuery.Builder()
	.add(topLevel, BooleanClause.Occur.MUST)
	.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
	.build();
	TopDocs td = searcher.search(filtered, 10);
	assertScoreDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
	if (i > 1) {
	assertSortsBefore(tg.groups[i - 1], tg.groups[i]);
	}
	}

	shard.close();

	}

	public void testWithinGroupSort() throws IOException {

	Shard shard = new Shard();
	indexRandomDocs(shard.writer);
	IndexSearcher searcher = shard.getIndexSearcher();

	Sort sort = new Sort(new SortField("length", SortField.Type.LONG));

	Query blockEndQuery = new TermQuery(new Term("blockEnd", "true"));
	GroupingSearch grouper = new GroupingSearch(blockEndQuery);
	grouper.setGroupDocsLimit(10);
	grouper.setSortWithinGroup(sort); // groups returned sorted by relevancy, chapters within group sorted by length

	Query topLevel = new TermQuery(new Term("text", "grandmother"));
	TopGroups<?> tg = grouper.search(searcher, topLevel, 0, 5);

	// We're sorting by score, so the score of the top group should be the same as the
	// score of the top document from the same query with no grouping
	TopDocs topDoc = searcher.search(topLevel, 1);
	assertEquals(topDoc.scoreDocs[0].score, (float)tg.groups[0].groupSortValues[0], 0);

	for (int i = 0; i < tg.groups.length; i++) {
	String bookName = searcher.doc(tg.groups[i].scoreDocs[0].doc).get("book");
	// The contents of each group should be equal to the results of a search for
	// that group alone, sorted by length
	Query filtered = new BooleanQuery.Builder()
	.add(topLevel, BooleanClause.Occur.MUST)
	.add(new TermQuery(new Term("book", bookName)), BooleanClause.Occur.FILTER)
	.build();
	TopDocs td = searcher.search(filtered, 10, sort);
	assertFieldDocsEquals(td.scoreDocs, tg.groups[i].scoreDocs);
	// We're sorting by score, so the group sort value for each group should be a float,
	// and the value for the previous group should be higher or equal to the value for this one
	if (i > 0) {
	float prevScore = (float) tg.groups[i - 1].groupSortValues[0];
	float thisScore = (float) tg.groups[i].groupSortValues[0];
	assertTrue(prevScore >= thisScore);
	}
	}

	shard.close();
	}

	private static void indexRandomDocs(RandomIndexWriter writer) throws IOException {
	int bookCount = atLeast(20);
	for (int i = 0; i < bookCount; i++) {
	writer.addDocuments(createRandomBlock(i));
	}
	}

	private static List<Document> createRandomBlock(int book) {
	List<Document> block = new ArrayList<>();
	String bookName = "book" + book;
	int chapterCount = atLeast(10);
	for (int j = 0; j < chapterCount; j++) {
	Document doc = new Document();
	String chapterName = "chapter" + j;
	String chapterText = randomText();
	doc.add(new TextField("book", bookName, Field.Store.YES));
	doc.add(new TextField("chapter", chapterName, Field.Store.YES));
	doc.add(new TextField("text", chapterText, Field.Store.NO));
	doc.add(new NumericDocValuesField("length", chapterText.length()));
	doc.add(new SortedDocValuesField("book", new BytesRef(bookName)));
	if (j == chapterCount - 1) {
	doc.add(new TextField("blockEnd", "true", Field.Store.NO));
	}
	block.add(doc);
	}
	return block;
	}

	private static final String[] TEXT = new String[]{
	"It was the day my grandmother exploded",
	"It was the best of times, it was the worst of times",
	"It was a bright cold morning in April",
	"It is a truth universally acknowledged",
	"I have just returned from a visit to my landlord",
	"I've been here and I've been there"
	};

	private static String randomText() {
	StringBuilder sb = new StringBuilder(TEXT[random().nextInt(TEXT.length)]);
	int sentences = random().nextInt(20);
	for (int i = 0; i < sentences; i++) {
	sb.append(" ").append(TEXT[random().nextInt(TEXT.length)]);
	}
	return sb.toString();
	}

	private void assertSortsBefore(GroupDocs<?> first, GroupDocs<?> second) {
	Object[] groupSortValues = second.groupSortValues;
	Object[] prevSortValues = first.groupSortValues;
	assertTrue(((Long)prevSortValues[0]).compareTo((Long)groupSortValues[0]) <= 0);
	}

	protected static void assertFieldDocsEquals(ScoreDoc[] expected, ScoreDoc[] actual) {
	assertEquals(expected.length, actual.length);
	for (int i = 0; i < expected.length; i++) {
	assertEquals(expected[i].doc, actual[i].doc);
	FieldDoc e = (FieldDoc) expected[i];
	FieldDoc a = (FieldDoc) actual[i];
	assertArrayEquals(e.fields, a.fields);
	}
	}

	}