| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.grouping; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.SortedDocValuesField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.queries.function.ValueSource; |
| import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.similarities.BM25Similarity; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.mutable.MutableValueStr; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| |
| public class GroupingSearchTest extends LuceneTestCase { |
| |
| // Tests some very basic usages... |
| public void testBasic() throws Exception { |
| |
| final String groupField = "author"; |
| |
| FieldType customType = new FieldType(); |
| customType.setStored(true); |
| |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter( |
| random(), |
| dir, |
| newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| boolean canUseIDV = true; |
| List<Document> documents = new ArrayList<>(); |
| // 0 |
| Document doc = new Document(); |
| addGroupField(doc, groupField, "author1", canUseIDV); |
| doc.add(new TextField("content", "random text", Field.Store.YES)); |
| doc.add(new Field("id", "1", customType)); |
| documents.add(doc); |
| |
| // 1 |
| doc = new Document(); |
| addGroupField(doc, groupField, "author1", canUseIDV); |
| doc.add(new TextField("content", "some more random text", Field.Store.YES)); |
| doc.add(new Field("id", "2", customType)); |
| documents.add(doc); |
| |
| // 2 |
| doc = new Document(); |
| addGroupField(doc, groupField, "author1", canUseIDV); |
| doc.add(new TextField("content", "some more random textual data", Field.Store.YES)); |
| doc.add(new Field("id", "3", customType)); |
| doc.add(new StringField("groupend", "x", Field.Store.NO)); |
| documents.add(doc); |
| w.addDocuments(documents); |
| documents.clear(); |
| |
| // 3 |
| doc = new Document(); |
| addGroupField(doc, groupField, "author2", canUseIDV); |
| doc.add(new TextField("content", "some random text", Field.Store.YES)); |
| doc.add(new Field("id", "4", customType)); |
| doc.add(new StringField("groupend", "x", Field.Store.NO)); |
| w.addDocument(doc); |
| |
| // 4 |
| doc = new Document(); |
| addGroupField(doc, groupField, "author3", canUseIDV); |
| doc.add(new TextField("content", "some more random text", Field.Store.YES)); |
| doc.add(new Field("id", "5", customType)); |
| documents.add(doc); |
| |
| // 5 |
| doc = new Document(); |
| addGroupField(doc, groupField, "author3", canUseIDV); |
| doc.add(new TextField("content", "random", Field.Store.YES)); |
| doc.add(new Field("id", "6", customType)); |
| doc.add(new StringField("groupend", "x", Field.Store.NO)); |
| documents.add(doc); |
| w.addDocuments(documents); |
| documents.clear(); |
| |
| // 6 -- no author field |
| doc = new Document(); |
| doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); |
| doc.add(new Field("id", "6", customType)); |
| doc.add(new StringField("groupend", "x", Field.Store.NO)); |
| |
| w.addDocument(doc); |
| |
| IndexSearcher indexSearcher = newSearcher(w.getReader()); |
| indexSearcher.setSimilarity(new BM25Similarity()); |
| w.close(); |
| |
| Sort groupSort = Sort.RELEVANCE; |
| GroupingSearch groupingSearch = createRandomGroupingSearch(groupField, groupSort, 5, canUseIDV); |
| |
| TopGroups<?> groups = groupingSearch.search(indexSearcher, new TermQuery(new Term("content", "random")), 0, 10); |
| |
| assertEquals(7, groups.totalHitCount); |
| assertEquals(7, groups.totalGroupedHitCount); |
| assertEquals(4, groups.groups.length); |
| |
| // relevance order: 5, 0, 3, 4, 1, 2, 6 |
| |
| // the later a document is added the higher this docId |
| // value |
| GroupDocs<?> group = groups.groups[0]; |
| compareGroupValue("author3", group); |
| assertEquals(2, group.scoreDocs.length); |
| assertEquals(5, group.scoreDocs[0].doc); |
| assertEquals(4, group.scoreDocs[1].doc); |
| assertTrue(group.scoreDocs[0].score >= group.scoreDocs[1].score); |
| |
| group = groups.groups[1]; |
| compareGroupValue("author1", group); |
| assertEquals(3, group.scoreDocs.length); |
| assertEquals(0, group.scoreDocs[0].doc); |
| assertEquals(1, group.scoreDocs[1].doc); |
| assertEquals(2, group.scoreDocs[2].doc); |
| assertTrue(group.scoreDocs[0].score >= group.scoreDocs[1].score); |
| assertTrue(group.scoreDocs[1].score >= group.scoreDocs[2].score); |
| |
| group = groups.groups[2]; |
| compareGroupValue("author2", group); |
| assertEquals(1, group.scoreDocs.length); |
| assertEquals(3, group.scoreDocs[0].doc); |
| |
| group = groups.groups[3]; |
| compareGroupValue(null, group); |
| assertEquals(1, group.scoreDocs.length); |
| assertEquals(6, group.scoreDocs[0].doc); |
| |
| Query lastDocInBlock = new TermQuery(new Term("groupend", "x")); |
| groupingSearch = new GroupingSearch(lastDocInBlock); |
| groups = groupingSearch.search(indexSearcher, new TermQuery(new Term("content", "random")), 0, 10); |
| |
| assertEquals(7, groups.totalHitCount); |
| assertEquals(7, groups.totalGroupedHitCount); |
| assertEquals(4, groups.totalGroupCount.longValue()); |
| assertEquals(4, groups.groups.length); |
| |
| indexSearcher.getIndexReader().close(); |
| dir.close(); |
| } |
| |
| private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { |
| doc.add(new TextField(groupField, value, Field.Store.YES)); |
| if (canUseIDV) { |
| doc.add(new SortedDocValuesField(groupField, new BytesRef(value))); |
| } |
| } |
| |
| private void compareGroupValue(String expected, GroupDocs<?> group) { |
| if (expected == null) { |
| if (group.groupValue == null) { |
| return; |
| } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) { |
| return; |
| } else if (((BytesRef) group.groupValue).length == 0) { |
| return; |
| } |
| fail(); |
| } |
| |
| if (group.groupValue.getClass().isAssignableFrom(BytesRef.class)) { |
| assertEquals(new BytesRef(expected), group.groupValue); |
| } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) { |
| MutableValueStr v = new MutableValueStr(); |
| v.value.copyChars(expected); |
| assertEquals(v, group.groupValue); |
| } else { |
| fail(); |
| } |
| } |
| |
| private GroupingSearch createRandomGroupingSearch(String groupField, Sort groupSort, int docsInGroup, boolean canUseIDV) { |
| GroupingSearch groupingSearch; |
| if (random().nextBoolean()) { |
| ValueSource vs = new BytesRefFieldSource(groupField); |
| groupingSearch = new GroupingSearch(vs, new HashMap<>()); |
| } else { |
| groupingSearch = new GroupingSearch(groupField); |
| } |
| |
| groupingSearch.setGroupSort(groupSort); |
| groupingSearch.setGroupDocsLimit(docsInGroup); |
| |
| if (random().nextBoolean()) { |
| groupingSearch.setCachingInMB(4.0, true); |
| } |
| |
| return groupingSearch; |
| } |
| |
| public void testSetAllGroups() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter( |
| random(), |
| dir, |
| newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| Document doc = new Document(); |
| doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED)); |
| doc.add(new SortedDocValuesField("group", new BytesRef("foo"))); |
| w.addDocument(doc); |
| |
| IndexSearcher indexSearcher = newSearcher(w.getReader()); |
| w.close(); |
| |
| GroupingSearch gs = new GroupingSearch("group"); |
| gs.setAllGroups(true); |
| TopGroups<?> groups = gs.search(indexSearcher, new TermQuery(new Term("group", "foo")), 0, 10); |
| assertEquals(1, groups.totalHitCount); |
| //assertEquals(1, groups.totalGroupCount.intValue()); |
| assertEquals(1, groups.totalGroupedHitCount); |
| assertEquals(1, gs.getAllMatchingGroups().size()); |
| indexSearcher.getIndexReader().close(); |
| dir.close(); |
| } |
| } |