| package org.apache.lucene.search.join; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.*; |
| import org.apache.lucene.index.*; |
| import org.apache.lucene.search.*; |
| import org.apache.lucene.search.BooleanClause.Occur; |
| import org.apache.lucene.search.grouping.GroupDocs; |
| import org.apache.lucene.search.grouping.TopGroups; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.*; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Locale; |
| |
| public class TestBlockJoin extends LuceneTestCase { |
| |
| // One resume... |
| private Document makeResume(String name, String country) { |
| Document resume = new Document(); |
| resume.add(newStringField("docType", "resume", Field.Store.NO)); |
| resume.add(newStringField("name", name, Field.Store.YES)); |
| resume.add(newStringField("country", country, Field.Store.NO)); |
| return resume; |
| } |
| |
| // ... has multiple jobs |
| private Document makeJob(String skill, int year) { |
| Document job = new Document(); |
| job.add(newStringField("skill", skill, Field.Store.YES)); |
| job.add(new IntField("year", year, Field.Store.NO)); |
| job.add(new StoredField("year", year)); |
| return job; |
| } |
| |
| // ... has multiple qualifications |
| private Document makeQualification(String qualification, int year) { |
| Document job = new Document(); |
| job.add(newStringField("qualification", qualification, Field.Store.YES)); |
| job.add(new IntField("year", year, Field.Store.NO)); |
| return job; |
| } |
| |
| public void testEmptyChildFilter() throws Exception { |
| final Directory dir = newDirectory(); |
| final IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| config.setMergePolicy(NoMergePolicy.INSTANCE); |
| // we don't want to merge - since we rely on certain segment setup |
| final IndexWriter w = new IndexWriter(dir, config); |
| |
| final List<Document> docs = new ArrayList<>(); |
| |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeJob("python", 2010)); |
| docs.add(makeResume("Lisa", "United Kingdom")); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| docs.add(makeJob("ruby", 2005)); |
| docs.add(makeJob("java", 2006)); |
| docs.add(makeResume("Frank", "United States")); |
| w.addDocuments(docs); |
| w.commit(); |
| int num = atLeast(10); // produce a segment that doesn't have a value in the docType field |
| for (int i = 0; i < num; i++) { |
| docs.clear(); |
| docs.add(makeJob("java", 2007)); |
| w.addDocuments(docs); |
| } |
| |
| IndexReader r = DirectoryReader.open(w, random().nextBoolean()); |
| w.shutdown(); |
| assertTrue(r.leaves().size() > 1); |
| IndexSearcher s = new IndexSearcher(r); |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| BooleanQuery childQuery = new BooleanQuery(); |
| childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); |
| childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); |
| |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| |
| BooleanQuery fullQuery = new BooleanQuery(); |
| fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST)); |
| fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST)); |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); |
| s.search(fullQuery, c); |
| TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true); |
| assertFalse(Float.isNaN(results.maxScore)); |
| assertEquals(1, results.totalGroupedHitCount); |
| assertEquals(1, results.groups.length); |
| final GroupDocs<Integer> group = results.groups[0]; |
| StoredDocument childDoc = s.doc(group.scoreDocs[0].doc); |
| assertEquals("java", childDoc.get("skill")); |
| assertNotNull(group.groupValue); |
| StoredDocument parentDoc = s.doc(group.groupValue); |
| assertEquals("Lisa", parentDoc.get("name")); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| |
| public void testSimple() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final List<Document> docs = new ArrayList<>(); |
| |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeJob("python", 2010)); |
| docs.add(makeResume("Lisa", "United Kingdom")); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| docs.add(makeJob("ruby", 2005)); |
| docs.add(makeJob("java", 2006)); |
| docs.add(makeResume("Frank", "United States")); |
| w.addDocuments(docs); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| // Create a filter that defines "parent" documents in the index - in this case resumes |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| // Define child document criteria (finds an example of relevant work experience) |
| BooleanQuery childQuery = new BooleanQuery(); |
| childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); |
| childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); |
| |
| // Define parent document criteria (find a resident in the UK) |
| Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); |
| |
| // Wrap the child document query to 'join' any matches |
| // up to corresponding parent: |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| |
| // Combine the parent and nested child queries into a single query for a candidate |
| BooleanQuery fullQuery = new BooleanQuery(); |
| fullQuery.add(new BooleanClause(parentQuery, Occur.MUST)); |
| fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST)); |
| |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); |
| |
| s.search(fullQuery, c); |
| |
| TopGroups<Integer> results = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true); |
| assertFalse(Float.isNaN(results.maxScore)); |
| |
| //assertEquals(1, results.totalHitCount); |
| assertEquals(1, results.totalGroupedHitCount); |
| assertEquals(1, results.groups.length); |
| |
| final GroupDocs<Integer> group = results.groups[0]; |
| assertEquals(1, group.totalHits); |
| assertFalse(Float.isNaN(group.score)); |
| |
| StoredDocument childDoc = s.doc(group.scoreDocs[0].doc); |
| //System.out.println(" doc=" + group.scoreDocs[0].doc); |
| assertEquals("java", childDoc.get("skill")); |
| assertNotNull(group.groupValue); |
| StoredDocument parentDoc = s.doc(group.groupValue); |
| assertEquals("Lisa", parentDoc.get("name")); |
| |
| |
| //System.out.println("TEST: now test up"); |
| |
| // Now join "up" (map parent hits to child docs) instead...: |
| ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, random().nextBoolean()); |
| BooleanQuery fullChildQuery = new BooleanQuery(); |
| fullChildQuery.add(new BooleanClause(parentJoinQuery, Occur.MUST)); |
| fullChildQuery.add(new BooleanClause(childQuery, Occur.MUST)); |
| |
| //System.out.println("FULL: " + fullChildQuery); |
| TopDocs hits = s.search(fullChildQuery, 10); |
| assertEquals(1, hits.totalHits); |
| childDoc = s.doc(hits.scoreDocs[0].doc); |
| //System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc); |
| assertEquals("java", childDoc.get("skill")); |
| assertEquals(2007, childDoc.getField("year").numericValue()); |
| assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name")); |
| |
| // Test with filter on child docs: |
| assertEquals(0, s.search(fullChildQuery, |
| new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), |
| 1).totalHits); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testBugCausedByRewritingTwice() throws IOException { |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final List<Document> docs = new ArrayList<>(); |
| |
| for (int i=0;i<10;i++) { |
| docs.clear(); |
| docs.add(makeJob("ruby", i)); |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeResume("Frank", "United States")); |
| w.addDocuments(docs); |
| } |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| MultiTermQuery qc = NumericRangeQuery.newIntRange("year", 2007, 2007, true, true); |
| // Hacky: this causes the query to need 2 rewrite |
| // iterations: |
| qc.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); |
| |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| int h1 = qc.hashCode(); |
| Query qw1 = qc.rewrite(r); |
| int h2 = qw1.hashCode(); |
| Query qw2 = qw1.rewrite(r); |
| int h3 = qw2.hashCode(); |
| |
| assertTrue(h1 != h2); |
| assertTrue(h2 != h3); |
| assertTrue(h3 != h1); |
| |
| ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max); |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true); |
| |
| s.search(qp, c); |
| TopGroups<Integer> groups = c.getTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true); |
| for (GroupDocs<Integer> group : groups.groups) { |
| assertEquals(1, group.totalHits); |
| } |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| protected QueryWrapperFilter skill(String skill) { |
| return new QueryWrapperFilter(new TermQuery(new Term("skill", skill))); |
| } |
| |
| public void testSimpleFilter() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final List<Document> docs = new ArrayList<>(); |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeJob("python", 2010)); |
| Collections.shuffle(docs, random()); |
| docs.add(makeResume("Lisa", "United Kingdom")); |
| |
| final List<Document> docs2 = new ArrayList<>(); |
| docs2.add(makeJob("ruby", 2005)); |
| docs2.add(makeJob("java", 2006)); |
| Collections.shuffle(docs2, random()); |
| docs2.add(makeResume("Frank", "United States")); |
| |
| addSkillless(w); |
| boolean turn = random().nextBoolean(); |
| w.addDocuments(turn ? docs:docs2); |
| |
| addSkillless(w); |
| |
| w.addDocuments(!turn ? docs:docs2); |
| |
| addSkillless(w); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| // Create a filter that defines "parent" documents in the index - in this case resumes |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| // Define child document criteria (finds an example of relevant work experience) |
| BooleanQuery childQuery = new BooleanQuery(); |
| childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); |
| childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); |
| |
| // Define parent document criteria (find a resident in the UK) |
| Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); |
| |
| // Wrap the child document query to 'join' any matches |
| // up to corresponding parent: |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| |
| assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits); |
| |
| assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, parentsFilter, 10).totalHits); |
| assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits); |
| |
| // not found test |
| assertEquals("noone live there", 0, s.search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits); |
| assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits); |
| |
| // apply the UK filter by the searcher |
| TopDocs ukOnly = s.search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1); |
| assertEquals("has filter - single passed", 1, ukOnly.totalHits); |
| assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name")); |
| |
| // looking for US candidates |
| TopDocs usThen = s.search(childJoinQuery , new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1); |
| assertEquals("has filter - single passed", 1, usThen.totalHits); |
| assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name")); |
| |
| |
| TermQuery us = new TermQuery(new Term("country", "United States")); |
| assertEquals("@ US we have java and ruby", 2, |
| s.search(new ToChildBlockJoinQuery(us, |
| parentsFilter, random().nextBoolean()), 10).totalHits ); |
| |
| assertEquals("java skills in US", 1, s.search(new ToChildBlockJoinQuery(us, parentsFilter, random().nextBoolean()), |
| skill("java"), 10).totalHits ); |
| |
| BooleanQuery rubyPython = new BooleanQuery(); |
| rubyPython.add(new TermQuery(new Term("skill", "ruby")), Occur.SHOULD); |
| rubyPython.add(new TermQuery(new Term("skill", "python")), Occur.SHOULD); |
| assertEquals("ruby skills in US", 1, s.search(new ToChildBlockJoinQuery(us, parentsFilter, random().nextBoolean()), |
| new QueryWrapperFilter(rubyPython), 10).totalHits ); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| private void addSkillless(final RandomIndexWriter w) throws IOException { |
| if (random().nextBoolean()) { |
| w.addDocument(makeResume("Skillless", random().nextBoolean() ? "United Kingdom":"United States")); |
| } |
| } |
| |
| private StoredDocument getParentDoc(IndexReader reader, Filter parents, int childDocID) throws IOException { |
| final List<AtomicReaderContext> leaves = reader.leaves(); |
| final int subIndex = ReaderUtil.subIndex(childDocID, leaves); |
| final AtomicReaderContext leaf = leaves.get(subIndex); |
| final FixedBitSet bits = (FixedBitSet) parents.getDocIdSet(leaf, null); |
| return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase)); |
| } |
| |
| public void testBoostBug() throws Exception { |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg); |
| QueryUtils.check(random(), q, s); |
| s.search(q, 10); |
| BooleanQuery bq = new BooleanQuery(); |
| bq.setBoost(2f); // we boost the BQ |
| bq.add(q, BooleanClause.Occur.MUST); |
| s.search(bq, 10); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testNestedDocScoringWithDeletes() throws Exception { |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter( |
| random(), |
| dir, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, |
| new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); |
| |
| // Cannot assert this since we use NoMergePolicy: |
| w.setDoRandomForceMergeAssert(false); |
| |
| List<Document> docs = new ArrayList<>(); |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeJob("python", 2010)); |
| docs.add(makeResume("Lisa", "United Kingdom")); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| docs.add(makeJob("c", 1999)); |
| docs.add(makeJob("ruby", 2005)); |
| docs.add(makeJob("java", 2006)); |
| docs.add(makeResume("Frank", "United States")); |
| w.addDocuments(docs); |
| |
| w.commit(); |
| IndexSearcher s = newSearcher(DirectoryReader.open(dir)); |
| |
| ToParentBlockJoinQuery q = new ToParentBlockJoinQuery( |
| NumericRangeQuery.newIntRange("year", 1990, 2010, true, true), |
| new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), |
| ScoreMode.Total |
| ); |
| |
| TopDocs topDocs = s.search(q, 10); |
| assertEquals(2, topDocs.totalHits); |
| assertEquals(6, topDocs.scoreDocs[0].doc); |
| assertEquals(3.0f, topDocs.scoreDocs[0].score, 0.0f); |
| assertEquals(2, topDocs.scoreDocs[1].doc); |
| assertEquals(2.0f, topDocs.scoreDocs[1].score, 0.0f); |
| |
| s.getIndexReader().close(); |
| w.deleteDocuments(new Term("skill", "java")); |
| w.shutdown(); |
| s = newSearcher(DirectoryReader.open(dir)); |
| |
| topDocs = s.search(q, 10); |
| assertEquals(2, topDocs.totalHits); |
| assertEquals(6, topDocs.scoreDocs[0].doc); |
| assertEquals(2.0f, topDocs.scoreDocs[0].score, 0.0f); |
| assertEquals(2, topDocs.scoreDocs[1].doc); |
| assertEquals(1.0f, topDocs.scoreDocs[1].score, 0.0f); |
| |
| s.getIndexReader().close(); |
| dir.close(); |
| } |
| |
| private String[][] getRandomFields(int maxUniqueValues) { |
| |
| final String[][] fields = new String[TestUtil.nextInt(random(), 2, 4)][]; |
| for(int fieldID=0;fieldID<fields.length;fieldID++) { |
| final int valueCount; |
| if (fieldID == 0) { |
| valueCount = 2; |
| } else { |
| valueCount = TestUtil.nextInt(random(), 1, maxUniqueValues); |
| } |
| |
| final String[] values = fields[fieldID] = new String[valueCount]; |
| for(int i=0;i<valueCount;i++) { |
| values[i] = TestUtil.randomRealisticUnicodeString(random()); |
| //values[i] = _TestUtil.randomSimpleString(random); |
| } |
| } |
| |
| return fields; |
| } |
| |
| private Term randomParentTerm(String[] values) { |
| return new Term("parent0", values[random().nextInt(values.length)]); |
| } |
| |
| private Term randomChildTerm(String[] values) { |
| return new Term("child0", values[random().nextInt(values.length)]); |
| } |
| |
| private Sort getRandomSort(String prefix, int numFields) { |
| final List<SortField> sortFields = new ArrayList<>(); |
| // TODO: sometimes sort by score; problem is scores are |
| // not comparable across the two indices |
| // sortFields.add(SortField.FIELD_SCORE); |
| if (random().nextBoolean()) { |
| sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean())); |
| } else if (random().nextBoolean()) { |
| sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean())); |
| sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean())); |
| } |
| // Break ties: |
| sortFields.add(new SortField(prefix + "ID", SortField.Type.INT)); |
| return new Sort(sortFields.toArray(new SortField[sortFields.size()])); |
| } |
| |
| public void testRandom() throws Exception { |
| // We build two indices at once: one normalized (which |
| // ToParentBlockJoinQuery/Collector, |
| // ToChildBlockJoinQuery can query) and the other w/ |
| // the same docs, just fully denormalized: |
| final Directory dir = newDirectory(); |
| final Directory joinDir = newDirectory(); |
| |
| final int numParentDocs = TestUtil.nextInt(random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER); |
| //final int numParentDocs = 30; |
| |
| // Values for parent fields: |
| final String[][] parentFields = getRandomFields(numParentDocs/2); |
| // Values for child fields: |
| final String[][] childFields = getRandomFields(numParentDocs); |
| |
| final boolean doDeletes = random().nextBoolean(); |
| final List<Integer> toDelete = new ArrayList<>(); |
| |
| // TODO: parallel star join, nested join cases too! |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| final RandomIndexWriter joinW = new RandomIndexWriter(random(), joinDir); |
| for(int parentDocID=0;parentDocID<numParentDocs;parentDocID++) { |
| Document parentDoc = new Document(); |
| Document parentJoinDoc = new Document(); |
| Field id = new IntField("parentID", parentDocID, Field.Store.YES); |
| parentDoc.add(id); |
| parentJoinDoc.add(id); |
| parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO)); |
| for(int field=0;field<parentFields.length;field++) { |
| if (random().nextDouble() < 0.9) { |
| Field f = newStringField("parent" + field, parentFields[field][random().nextInt(parentFields[field].length)], Field.Store.NO); |
| parentDoc.add(f); |
| parentJoinDoc.add(f); |
| } |
| } |
| |
| if (doDeletes) { |
| parentDoc.add(new IntField("blockID", parentDocID, Field.Store.NO)); |
| parentJoinDoc.add(new IntField("blockID", parentDocID, Field.Store.NO)); |
| } |
| |
| final List<Document> joinDocs = new ArrayList<>(); |
| |
| if (VERBOSE) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("parentID=").append(parentDoc.get("parentID")); |
| for(int fieldID=0;fieldID<parentFields.length;fieldID++) { |
| String s = parentDoc.get("parent" + fieldID); |
| if (s != null) { |
| sb.append(" parent" + fieldID + "=" + s); |
| } |
| } |
| System.out.println(" " + sb.toString()); |
| } |
| |
| final int numChildDocs = TestUtil.nextInt(random(), 1, 20); |
| for(int childDocID=0;childDocID<numChildDocs;childDocID++) { |
| // Denormalize: copy all parent fields into child doc: |
| Document childDoc = TestUtil.cloneDocument(parentDoc); |
| Document joinChildDoc = new Document(); |
| joinDocs.add(joinChildDoc); |
| |
| Field childID = new IntField("childID", childDocID, Field.Store.YES); |
| childDoc.add(childID); |
| joinChildDoc.add(childID); |
| |
| for(int childFieldID=0;childFieldID<childFields.length;childFieldID++) { |
| if (random().nextDouble() < 0.9) { |
| Field f = newStringField("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)], Field.Store.NO); |
| childDoc.add(f); |
| joinChildDoc.add(f); |
| } |
| } |
| |
| if (VERBOSE) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("childID=").append(joinChildDoc.get("childID")); |
| for(int fieldID=0;fieldID<childFields.length;fieldID++) { |
| String s = joinChildDoc.get("child" + fieldID); |
| if (s != null) { |
| sb.append(" child" + fieldID + "=" + s); |
| } |
| } |
| System.out.println(" " + sb.toString()); |
| } |
| |
| if (doDeletes) { |
| joinChildDoc.add(new IntField("blockID", parentDocID, Field.Store.NO)); |
| } |
| |
| w.addDocument(childDoc); |
| } |
| |
| // Parent last: |
| joinDocs.add(parentJoinDoc); |
| joinW.addDocuments(joinDocs); |
| |
| if (doDeletes && random().nextInt(30) == 7) { |
| toDelete.add(parentDocID); |
| } |
| } |
| |
| for(int deleteID : toDelete) { |
| if (VERBOSE) { |
| System.out.println("DELETE parentID=" + deleteID); |
| } |
| BytesRef term = new BytesRef(); |
| NumericUtils.intToPrefixCodedBytes(deleteID, 0, term); |
| w.deleteDocuments(new Term("blockID", term)); |
| joinW.deleteDocuments(new Term("blockID", term)); |
| } |
| |
| final IndexReader r = w.getReader(); |
| w.shutdown(); |
| final IndexReader joinR = joinW.getReader(); |
| joinW.shutdown(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: reader=" + r); |
| System.out.println("TEST: joinReader=" + joinR); |
| |
| for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) { |
| System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX)); |
| } |
| } |
| |
| final IndexSearcher s = newSearcher(r); |
| |
| final IndexSearcher joinS = new IndexSearcher(joinR); |
| |
| final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x")))); |
| |
| final int iters = 200*RANDOM_MULTIPLIER; |
| |
| for(int iter=0;iter<iters;iter++) { |
| if (VERBOSE) { |
| System.out.println("TEST: iter=" + (1+iter) + " of " + iters); |
| } |
| |
| final Query childQuery; |
| if (random().nextInt(3) == 2) { |
| final int childFieldID = random().nextInt(childFields.length); |
| childQuery = new TermQuery(new Term("child" + childFieldID, |
| childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])); |
| } else if (random().nextInt(3) == 2) { |
| BooleanQuery bq = new BooleanQuery(); |
| childQuery = bq; |
| final int numClauses = TestUtil.nextInt(random(), 2, 4); |
| boolean didMust = false; |
| for(int clauseIDX=0;clauseIDX<numClauses;clauseIDX++) { |
| Query clause; |
| BooleanClause.Occur occur; |
| if (!didMust && random().nextBoolean()) { |
| occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT; |
| clause = new TermQuery(randomChildTerm(childFields[0])); |
| didMust = true; |
| } else { |
| occur = BooleanClause.Occur.SHOULD; |
| final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1); |
| clause = new TermQuery(new Term("child" + childFieldID, |
| childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])); |
| } |
| bq.add(clause, occur); |
| } |
| } else { |
| BooleanQuery bq = new BooleanQuery(); |
| childQuery = bq; |
| |
| bq.add(new TermQuery(randomChildTerm(childFields[0])), |
| BooleanClause.Occur.MUST); |
| final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1); |
| bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])), |
| random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT); |
| } |
| |
| final int x = random().nextInt(4); |
| final ScoreMode agg; |
| if (x == 0) { |
| agg = ScoreMode.None; |
| } else if (x == 1) { |
| agg = ScoreMode.Max; |
| } else if (x == 2) { |
| agg = ScoreMode.Total; |
| } else { |
| agg = ScoreMode.Avg; |
| } |
| |
| final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg); |
| |
| // To run against the block-join index: |
| final Query parentJoinQuery; |
| |
| // Same query as parentJoinQuery, but to run against |
| // the fully denormalized index (so we can compare |
| // results): |
| final Query parentQuery; |
| |
| if (random().nextBoolean()) { |
| parentQuery = childQuery; |
| parentJoinQuery = childJoinQuery; |
| } else { |
| // AND parent field w/ child field |
| final BooleanQuery bq = new BooleanQuery(); |
| parentJoinQuery = bq; |
| final Term parentTerm = randomParentTerm(parentFields[0]); |
| if (random().nextBoolean()) { |
| bq.add(childJoinQuery, BooleanClause.Occur.MUST); |
| bq.add(new TermQuery(parentTerm), |
| BooleanClause.Occur.MUST); |
| } else { |
| bq.add(new TermQuery(parentTerm), |
| BooleanClause.Occur.MUST); |
| bq.add(childJoinQuery, BooleanClause.Occur.MUST); |
| } |
| |
| final BooleanQuery bq2 = new BooleanQuery(); |
| parentQuery = bq2; |
| if (random().nextBoolean()) { |
| bq2.add(childQuery, BooleanClause.Occur.MUST); |
| bq2.add(new TermQuery(parentTerm), |
| BooleanClause.Occur.MUST); |
| } else { |
| bq2.add(new TermQuery(parentTerm), |
| BooleanClause.Occur.MUST); |
| bq2.add(childQuery, BooleanClause.Occur.MUST); |
| } |
| } |
| |
| final Sort parentSort = getRandomSort("parent", parentFields.length); |
| final Sort childSort = getRandomSort("child", childFields.length); |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort); |
| } |
| |
| // Merge both sorts: |
| final List<SortField> sortFields = new ArrayList<>(Arrays.asList(parentSort.getSort())); |
| sortFields.addAll(Arrays.asList(childSort.getSort())); |
| final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()])); |
| |
| final TopDocs results = s.search(parentQuery, null, r.numDocs(), |
| parentAndChildSort); |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: normal index gets " + results.totalHits + " hits"); |
| final ScoreDoc[] hits = results.scoreDocs; |
| for(int hitIDX=0;hitIDX<hits.length;hitIDX++) { |
| final StoredDocument doc = s.doc(hits[hitIDX].doc); |
| //System.out.println(" score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")"); |
| System.out.println(" parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")"); |
| FieldDoc fd = (FieldDoc) hits[hitIDX]; |
| if (fd.fields != null) { |
| System.out.print(" "); |
| for(Object o : fd.fields) { |
| if (o instanceof BytesRef) { |
| System.out.print(((BytesRef) o).utf8ToString() + " "); |
| } else { |
| System.out.print(o + " "); |
| } |
| } |
| System.out.println(); |
| } |
| } |
| } |
| |
| final boolean trackScores; |
| final boolean trackMaxScore; |
| if (agg == ScoreMode.None) { |
| trackScores = false; |
| trackMaxScore = false; |
| } else { |
| trackScores = random().nextBoolean(); |
| trackMaxScore = random().nextBoolean(); |
| } |
| final ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore); |
| |
| joinS.search(parentJoinQuery, c); |
| |
| final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20); |
| //final int hitsPerGroup = 100; |
| final TopGroups<Integer> joinResults = c.getTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true); |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.groups.length) + " groups; hitsPerGroup=" + hitsPerGroup); |
| if (joinResults != null) { |
| final GroupDocs<Integer>[] groups = joinResults.groups; |
| for(int groupIDX=0;groupIDX<groups.length;groupIDX++) { |
| final GroupDocs<Integer> group = groups[groupIDX]; |
| if (group.groupSortValues != null) { |
| System.out.print(" "); |
| for(Object o : group.groupSortValues) { |
| if (o instanceof BytesRef) { |
| System.out.print(((BytesRef) o).utf8ToString() + " "); |
| } else { |
| System.out.print(o + " "); |
| } |
| } |
| System.out.println(); |
| } |
| |
| assertNotNull(group.groupValue); |
| final StoredDocument parentDoc = joinS.doc(group.groupValue); |
| System.out.println(" group parentID=" + parentDoc.get("parentID") + " (docID=" + group.groupValue + ")"); |
| for(int hitIDX=0;hitIDX<group.scoreDocs.length;hitIDX++) { |
| final StoredDocument doc = joinS.doc(group.scoreDocs[hitIDX].doc); |
| //System.out.println(" score=" + group.scoreDocs[hitIDX].score + " childID=" + doc.get("childID") + " (docID=" + group.scoreDocs[hitIDX].doc + ")"); |
| System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + group.scoreDocs[hitIDX].doc + ")"); |
| } |
| } |
| } |
| } |
| |
| if (results.totalHits == 0) { |
| assertNull(joinResults); |
| } else { |
| compareHits(r, joinR, results, joinResults); |
| TopDocs b = joinS.search(childJoinQuery, 10); |
| for (ScoreDoc hit : b.scoreDocs) { |
| Explanation explanation = joinS.explain(childJoinQuery, hit.doc); |
| StoredDocument document = joinS.doc(hit.doc - 1); |
| int childId = Integer.parseInt(document.get("childID")); |
| assertTrue(explanation.isMatch()); |
| assertEquals(hit.score, explanation.getValue(), 0.0f); |
| assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription()); |
| } |
| } |
| |
| // Test joining in the opposite direction (parent to |
| // child): |
| |
| // Get random query against parent documents: |
| final Query parentQuery2; |
| if (random().nextInt(3) == 2) { |
| final int fieldID = random().nextInt(parentFields.length); |
| parentQuery2 = new TermQuery(new Term("parent" + fieldID, |
| parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])); |
| } else if (random().nextInt(3) == 2) { |
| BooleanQuery bq = new BooleanQuery(); |
| parentQuery2 = bq; |
| final int numClauses = TestUtil.nextInt(random(), 2, 4); |
| boolean didMust = false; |
| for(int clauseIDX=0;clauseIDX<numClauses;clauseIDX++) { |
| Query clause; |
| BooleanClause.Occur occur; |
| if (!didMust && random().nextBoolean()) { |
| occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT; |
| clause = new TermQuery(randomParentTerm(parentFields[0])); |
| didMust = true; |
| } else { |
| occur = BooleanClause.Occur.SHOULD; |
| final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1); |
| clause = new TermQuery(new Term("parent" + fieldID, |
| parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])); |
| } |
| bq.add(clause, occur); |
| } |
| } else { |
| BooleanQuery bq = new BooleanQuery(); |
| parentQuery2 = bq; |
| |
| bq.add(new TermQuery(randomParentTerm(parentFields[0])), |
| BooleanClause.Occur.MUST); |
| final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1); |
| bq.add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])), |
| random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT); |
| } |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: top down: parentQuery2=" + parentQuery2); |
| } |
| |
| // Maps parent query to child docs: |
| final ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, random().nextBoolean()); |
| |
| // To run against the block-join index: |
| final Query childJoinQuery2; |
| |
| // Same query as parentJoinQuery, but to run against |
| // the fully denormalized index (so we can compare |
| // results): |
| final Query childQuery2; |
| |
| // apply a filter to children |
| final Filter childFilter2, childJoinFilter2; |
| |
| if (random().nextBoolean()) { |
| childQuery2 = parentQuery2; |
| childJoinQuery2 = parentJoinQuery2; |
| childFilter2 = null; |
| childJoinFilter2 = null; |
| } else { |
| final Term childTerm = randomChildTerm(childFields[0]); |
| if (random().nextBoolean()) { // filtered case |
| childJoinQuery2 = parentJoinQuery2; |
| final Filter f = new QueryWrapperFilter(new TermQuery(childTerm)); |
| childJoinFilter2 = random().nextBoolean() |
| ? new FixedBitSetCachingWrapperFilter(f): f; |
| } else { |
| childJoinFilter2 = null; |
| // AND child field w/ parent query: |
| final BooleanQuery bq = new BooleanQuery(); |
| childJoinQuery2 = bq; |
| if (random().nextBoolean()) { |
| bq.add(parentJoinQuery2, BooleanClause.Occur.MUST); |
| bq.add(new TermQuery(childTerm), |
| BooleanClause.Occur.MUST); |
| } else { |
| bq.add(new TermQuery(childTerm), |
| BooleanClause.Occur.MUST); |
| bq.add(parentJoinQuery2, BooleanClause.Occur.MUST); |
| } |
| } |
| |
| if (random().nextBoolean()) { // filtered case |
| childQuery2 = parentQuery2; |
| final Filter f = new QueryWrapperFilter(new TermQuery(childTerm)); |
| childFilter2 = random().nextBoolean() |
| ? new FixedBitSetCachingWrapperFilter(f): f; |
| } else { |
| childFilter2 = null; |
| final BooleanQuery bq2 = new BooleanQuery(); |
| childQuery2 = bq2; |
| if (random().nextBoolean()) { |
| bq2.add(parentQuery2, BooleanClause.Occur.MUST); |
| bq2.add(new TermQuery(childTerm), |
| BooleanClause.Occur.MUST); |
| } else { |
| bq2.add(new TermQuery(childTerm), |
| BooleanClause.Occur.MUST); |
| bq2.add(parentQuery2, BooleanClause.Occur.MUST); |
| } |
| } |
| } |
| |
| final Sort childSort2 = getRandomSort("child", childFields.length); |
| |
| // Search denormalized index: |
| if (VERBOSE) { |
| System.out.println("TEST: run top down query=" + childQuery2 + |
| " filter=" + childFilter2 + |
| " sort=" + childSort2); |
| } |
| final TopDocs results2 = s.search(childQuery2, childFilter2, r.numDocs(), |
| childSort2); |
| if (VERBOSE) { |
| System.out.println(" " + results2.totalHits + " totalHits:"); |
| for(ScoreDoc sd : results2.scoreDocs) { |
| final StoredDocument doc = s.doc(sd.doc); |
| System.out.println(" childID=" + doc.get("childID") + " parentID=" + doc.get("parentID") + " docID=" + sd.doc); |
| } |
| } |
| |
| // Search join index: |
| if (VERBOSE) { |
| System.out.println("TEST: run top down join query=" + childJoinQuery2 + |
| " filter=" + childJoinFilter2 + " sort=" + childSort2); |
| } |
| TopDocs joinResults2 = joinS.search(childJoinQuery2, childJoinFilter2, joinR.numDocs(), childSort2); |
| if (VERBOSE) { |
| System.out.println(" " + joinResults2.totalHits + " totalHits:"); |
| for(ScoreDoc sd : joinResults2.scoreDocs) { |
| final StoredDocument doc = joinS.doc(sd.doc); |
| final StoredDocument parentDoc = getParentDoc(joinR, parentsFilter, sd.doc); |
| System.out.println(" childID=" + doc.get("childID") + " parentID=" + parentDoc.get("parentID") + " docID=" + sd.doc); |
| } |
| } |
| |
| compareChildHits(r, joinR, results2, joinResults2); |
| } |
| |
| r.close(); |
| joinR.close(); |
| dir.close(); |
| joinDir.close(); |
| } |
| |
| private void compareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults) throws Exception { |
| assertEquals(results.totalHits, joinResults.totalHits); |
| assertEquals(results.scoreDocs.length, joinResults.scoreDocs.length); |
| for(int hitCount=0;hitCount<results.scoreDocs.length;hitCount++) { |
| ScoreDoc hit = results.scoreDocs[hitCount]; |
| ScoreDoc joinHit = joinResults.scoreDocs[hitCount]; |
| StoredDocument doc1 = r.document(hit.doc); |
| StoredDocument doc2 = joinR.document(joinHit.doc); |
| assertEquals("hit " + hitCount + " differs", |
| doc1.get("childID"), doc2.get("childID")); |
| // don't compare scores -- they are expected to differ |
| |
| |
| assertTrue(hit instanceof FieldDoc); |
| assertTrue(joinHit instanceof FieldDoc); |
| |
| FieldDoc hit0 = (FieldDoc) hit; |
| FieldDoc joinHit0 = (FieldDoc) joinHit; |
| assertArrayEquals(hit0.fields, joinHit0.fields); |
| } |
| } |
| |
| private void compareHits(IndexReader r, IndexReader joinR, TopDocs results, TopGroups<Integer> joinResults) throws Exception { |
| // results is 'complete'; joinResults is a subset |
| int resultUpto = 0; |
| int joinGroupUpto = 0; |
| |
| final ScoreDoc[] hits = results.scoreDocs; |
| final GroupDocs<Integer>[] groupDocs = joinResults.groups; |
| |
| while(joinGroupUpto < groupDocs.length) { |
| final GroupDocs<Integer> group = groupDocs[joinGroupUpto++]; |
| final ScoreDoc[] groupHits = group.scoreDocs; |
| assertNotNull(group.groupValue); |
| final StoredDocument parentDoc = joinR.document(group.groupValue); |
| final String parentID = parentDoc.get("parentID"); |
| //System.out.println("GROUP groupDoc=" + group.groupDoc + " parent=" + parentDoc); |
| assertNotNull(parentID); |
| assertTrue(groupHits.length > 0); |
| for(int hitIDX=0;hitIDX<groupHits.length;hitIDX++) { |
| final StoredDocument nonJoinHit = r.document(hits[resultUpto++].doc); |
| final StoredDocument joinHit = joinR.document(groupHits[hitIDX].doc); |
| assertEquals(parentID, |
| nonJoinHit.get("parentID")); |
| assertEquals(joinHit.get("childID"), |
| nonJoinHit.get("childID")); |
| } |
| |
| if (joinGroupUpto < groupDocs.length) { |
| // Advance non-join hit to the next parentID: |
| //System.out.println(" next joingroupUpto=" + joinGroupUpto + " gd.length=" + groupDocs.length + " parentID=" + parentID); |
| while(true) { |
| assertTrue(resultUpto < hits.length); |
| if (!parentID.equals(r.document(hits[resultUpto].doc).get("parentID"))) { |
| break; |
| } |
| resultUpto++; |
| } |
| } |
| } |
| } |
| |
| public void testMultiChildTypes() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final List<Document> docs = new ArrayList<>(); |
| |
| docs.add(makeJob("java", 2007)); |
| docs.add(makeJob("python", 2010)); |
| docs.add(makeQualification("maths", 1999)); |
| docs.add(makeResume("Lisa", "United Kingdom")); |
| w.addDocuments(docs); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| // Create a filter that defines "parent" documents in the index - in this case resumes |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| // Define child document criteria (finds an example of relevant work experience) |
| BooleanQuery childJobQuery = new BooleanQuery(); |
| childJobQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); |
| childJobQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); |
| |
| BooleanQuery childQualificationQuery = new BooleanQuery(); |
| childQualificationQuery.add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), Occur.MUST)); |
| childQualificationQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 1980, 2000, true, true), Occur.MUST)); |
| |
| |
| // Define parent document criteria (find a resident in the UK) |
| Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); |
| |
| // Wrap the child document query to 'join' any matches |
| // up to corresponding parent: |
| ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg); |
| ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg); |
| |
| // Combine the parent and nested child queries into a single query for a candidate |
| BooleanQuery fullQuery = new BooleanQuery(); |
| fullQuery.add(new BooleanClause(parentQuery, Occur.MUST)); |
| fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST)); |
| fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST)); |
| |
| // Collects all job and qualification child docs for |
| // each resume hit in the top N (sorted by score): |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false); |
| |
| s.search(fullQuery, c); |
| |
| // Examine "Job" children |
| TopGroups<Integer> jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true); |
| |
| //assertEquals(1, results.totalHitCount); |
| assertEquals(1, jobResults.totalGroupedHitCount); |
| assertEquals(1, jobResults.groups.length); |
| |
| final GroupDocs<Integer> group = jobResults.groups[0]; |
| assertEquals(1, group.totalHits); |
| |
| StoredDocument childJobDoc = s.doc(group.scoreDocs[0].doc); |
| //System.out.println(" doc=" + group.scoreDocs[0].doc); |
| assertEquals("java", childJobDoc.get("skill")); |
| assertNotNull(group.groupValue); |
| StoredDocument parentDoc = s.doc(group.groupValue); |
| assertEquals("Lisa", parentDoc.get("name")); |
| |
| // Now Examine qualification children |
| TopGroups<Integer> qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true); |
| |
| assertEquals(1, qualificationResults.totalGroupedHitCount); |
| assertEquals(1, qualificationResults.groups.length); |
| |
| final GroupDocs<Integer> qGroup = qualificationResults.groups[0]; |
| assertEquals(1, qGroup.totalHits); |
| |
| StoredDocument childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc); |
| assertEquals("maths", childQualificationDoc.get("qualification")); |
| assertNotNull(qGroup.groupValue); |
| parentDoc = s.doc(qGroup.groupValue); |
| assertEquals("Lisa", parentDoc.get("name")); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testAdvanceSingleParentSingleChild() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| Document childDoc = new Document(); |
| childDoc.add(newStringField("child", "1", Field.Store.NO)); |
| Document parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "1", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(childDoc, parentDoc)); |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| Query tq = new TermQuery(new Term("child", "1")); |
| Filter parentFilter = new FixedBitSetCachingWrapperFilter( |
| new QueryWrapperFilter( |
| new TermQuery(new Term("parent", "1")))); |
| |
| ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); |
| Weight weight = s.createNormalizedWeight(q); |
| DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); |
| assertEquals(1, disi.advance(1)); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testAdvanceSingleParentNoChild() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy())); |
| Document parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "1", Field.Store.NO)); |
| parentDoc.add(newStringField("isparent", "yes", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(parentDoc)); |
| |
| // Add another doc so scorer is not null |
| parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "2", Field.Store.NO)); |
| parentDoc.add(newStringField("isparent", "yes", Field.Store.NO)); |
| Document childDoc = new Document(); |
| childDoc.add(newStringField("child", "2", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(childDoc, parentDoc)); |
| |
| // Need single seg: |
| w.forceMerge(1); |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| Query tq = new TermQuery(new Term("child", "2")); |
| Filter parentFilter = new FixedBitSetCachingWrapperFilter( |
| new QueryWrapperFilter( |
| new TermQuery(new Term("isparent", "yes")))); |
| |
| ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); |
| Weight weight = s.createNormalizedWeight(q); |
| DocIdSetIterator disi = weight.scorer(s.getIndexReader().leaves().get(0), null); |
| assertEquals(2, disi.advance(0)); |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testGetTopGroups() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final List<Document> docs = new ArrayList<>(); |
| docs.add(makeJob("ruby", 2005)); |
| docs.add(makeJob("java", 2006)); |
| docs.add(makeJob("java", 2010)); |
| docs.add(makeJob("java", 2012)); |
| Collections.shuffle(docs, random()); |
| docs.add(makeResume("Frank", "United States")); |
| |
| addSkillless(w); |
| w.addDocuments(docs); |
| addSkillless(w); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = new IndexSearcher(r); |
| |
| // Create a filter that defines "parent" documents in the index - in this case resumes |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); |
| |
| // Define child document criteria (finds an example of relevant work experience) |
| BooleanQuery childQuery = new BooleanQuery(); |
| childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST)); |
| childQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST)); |
| |
| // Wrap the child document query to 'join' any matches |
| // up to corresponding parent: |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true); |
| s.search(childJoinQuery, c); |
| |
| //Get all child documents within groups |
| @SuppressWarnings({"unchecked","rawtypes"}) |
| TopGroups<Integer>[] getTopGroupsResults = new TopGroups[2]; |
| getTopGroupsResults[0] = c.getTopGroups(childJoinQuery, null, 0, 10, 0, true); |
| getTopGroupsResults[1] = c.getTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true); |
| |
| for (TopGroups<Integer> results : getTopGroupsResults) { |
| assertFalse(Float.isNaN(results.maxScore)); |
| assertEquals(2, results.totalGroupedHitCount); |
| assertEquals(1, results.groups.length); |
| |
| final GroupDocs<Integer> group = results.groups[0]; |
| assertEquals(2, group.totalHits); |
| assertFalse(Float.isNaN(group.score)); |
| assertNotNull(group.groupValue); |
| StoredDocument parentDoc = s.doc(group.groupValue); |
| assertEquals("Frank", parentDoc.get("name")); |
| |
| assertEquals(2, group.scoreDocs.length); //all matched child documents collected |
| |
| for (ScoreDoc scoreDoc : group.scoreDocs) { |
| StoredDocument childDoc = s.doc(scoreDoc.doc); |
| assertEquals("java", childDoc.get("skill")); |
| int year = Integer.parseInt(childDoc.get("year")); |
| assertTrue(year >= 2006 && year <= 2011); |
| } |
| } |
| |
| //Get part of child documents |
| TopGroups<Integer> boundedResults = c.getTopGroups(childJoinQuery, null, 0, 1, 0, true); |
| assertFalse(Float.isNaN(boundedResults.maxScore)); |
| assertEquals(2, boundedResults.totalGroupedHitCount); |
| assertEquals(1, boundedResults.groups.length); |
| |
| final GroupDocs<Integer> group = boundedResults.groups[0]; |
| assertEquals(2, group.totalHits); |
| assertFalse(Float.isNaN(group.score)); |
| assertNotNull(group.groupValue); |
| StoredDocument parentDoc = s.doc(group.groupValue); |
| assertEquals("Frank", parentDoc.get("name")); |
| |
| assertEquals(1, group.scoreDocs.length); //not all matched child documents collected |
| |
| for (ScoreDoc scoreDoc : group.scoreDocs) { |
| StoredDocument childDoc = s.doc(scoreDoc.doc); |
| assertEquals("java", childDoc.get("skill")); |
| int year = Integer.parseInt(childDoc.get("year")); |
| assertTrue(year >= 2006 && year <= 2011); |
| } |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| // LUCENE-4968 |
| public void testSometimesParentOnlyMatches() throws Exception { |
| Directory d = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), d); |
| Document parent = new Document(); |
| parent.add(new StoredField("parentID", "0")); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| |
| List<Document> docs = new ArrayList<>(); |
| |
| Document child = new Document(); |
| docs.add(child); |
| child.add(new StoredField("childID", "0")); |
| child.add(newTextField("childText", "text", Field.Store.NO)); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| |
| parent = new Document(); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| parent.add(new StoredField("parentID", "1")); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| |
| Query childQuery = new TermQuery(new Term("childText", "text")); |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| BooleanQuery parentQuery = new BooleanQuery(); |
| parentQuery.add(childJoinQuery, Occur.SHOULD); |
| parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); |
| |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), |
| 10, true, true); |
| newSearcher(r).search(parentQuery, c); |
| TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false); |
| |
| // Two parents: |
| assertEquals(2, groups.totalGroupCount.intValue()); |
| |
| // One child docs: |
| assertEquals(1, groups.totalGroupedHitCount); |
| |
| GroupDocs<Integer> group = groups.groups[0]; |
| StoredDocument doc = r.document(group.groupValue.intValue()); |
| assertEquals("0", doc.get("parentID")); |
| |
| group = groups.groups[1]; |
| doc = r.document(group.groupValue.intValue()); |
| assertEquals("1", doc.get("parentID")); |
| |
| r.close(); |
| d.close(); |
| } |
| |
| // LUCENE-4968 |
| public void testChildQueryNeverMatches() throws Exception { |
| Directory d = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), d); |
| Document parent = new Document(); |
| parent.add(new StoredField("parentID", "0")); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| |
| List<Document> docs = new ArrayList<>(); |
| |
| Document child = new Document(); |
| docs.add(child); |
| child.add(new StoredField("childID", "0")); |
| child.add(newTextField("childText", "text", Field.Store.NO)); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| |
| parent = new Document(); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| parent.add(new StoredField("parentID", "1")); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| |
| // never matches: |
| Query childQuery = new TermQuery(new Term("childText", "bogus")); |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| BooleanQuery parentQuery = new BooleanQuery(); |
| parentQuery.add(childJoinQuery, Occur.SHOULD); |
| parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); |
| |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), |
| 10, true, true); |
| newSearcher(r).search(parentQuery, c); |
| TopGroups<Integer> groups = c.getTopGroups(childJoinQuery, null, 0, 10, 0, false); |
| |
| // Two parents: |
| assertEquals(2, groups.totalGroupCount.intValue()); |
| |
| // One child docs: |
| assertEquals(0, groups.totalGroupedHitCount); |
| |
| GroupDocs<Integer> group = groups.groups[0]; |
| StoredDocument doc = r.document(group.groupValue.intValue()); |
| assertEquals("0", doc.get("parentID")); |
| |
| group = groups.groups[1]; |
| doc = r.document(group.groupValue.intValue()); |
| assertEquals("1", doc.get("parentID")); |
| |
| r.close(); |
| d.close(); |
| } |
| |
| // LUCENE-4968 |
| public void testChildQueryMatchesParent() throws Exception { |
| Directory d = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), d); |
| Document parent = new Document(); |
| parent.add(new StoredField("parentID", "0")); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| |
| List<Document> docs = new ArrayList<>(); |
| |
| Document child = new Document(); |
| docs.add(child); |
| child.add(new StoredField("childID", "0")); |
| child.add(newTextField("childText", "text", Field.Store.NO)); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| docs.clear(); |
| |
| parent = new Document(); |
| parent.add(newTextField("parentText", "text", Field.Store.NO)); |
| parent.add(newStringField("isParent", "yes", Field.Store.NO)); |
| parent.add(new StoredField("parentID", "1")); |
| |
| // parent last: |
| docs.add(parent); |
| w.addDocuments(docs); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| |
| // illegally matches parent: |
| Query childQuery = new TermQuery(new Term("parentText", "text")); |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); |
| ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); |
| BooleanQuery parentQuery = new BooleanQuery(); |
| parentQuery.add(childJoinQuery, Occur.SHOULD); |
| parentQuery.add(new TermQuery(new Term("parentText", "text")), Occur.SHOULD); |
| |
| ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type.STRING)), |
| 10, true, true); |
| |
| try { |
| newSearcher(r).search(parentQuery, c); |
| fail("should have hit exception"); |
| } catch (IllegalStateException ise) { |
| // expected |
| } |
| |
| r.close(); |
| d.close(); |
| } |
| |
| public void testAdvanceSingleDeletedParentNoChild() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| // First doc with 1 children |
| Document parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "1", Field.Store.NO)); |
| parentDoc.add(newStringField("isparent", "yes", Field.Store.NO)); |
| Document childDoc = new Document(); |
| childDoc.add(newStringField("child", "1", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(childDoc, parentDoc)); |
| |
| parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "2", Field.Store.NO)); |
| parentDoc.add(newStringField("isparent", "yes", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(parentDoc)); |
| |
| w.deleteDocuments(new Term("parent", "2")); |
| |
| parentDoc = new Document(); |
| parentDoc.add(newStringField("parent", "2", Field.Store.NO)); |
| parentDoc.add(newStringField("isparent", "yes", Field.Store.NO)); |
| childDoc = new Document(); |
| childDoc.add(newStringField("child", "2", Field.Store.NO)); |
| w.addDocuments(Arrays.asList(childDoc, parentDoc)); |
| |
| IndexReader r = w.getReader(); |
| w.shutdown(); |
| IndexSearcher s = newSearcher(r); |
| |
| // Create a filter that defines "parent" documents in the index - in this case resumes |
| Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes")))); |
| |
| Query parentQuery = new TermQuery(new Term("parent", "2")); |
| |
| ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, random().nextBoolean()); |
| TopDocs topdocs = s.search(parentJoinQuery, 3); |
| assertEquals(1, topdocs.totalHits); |
| |
| r.close(); |
| dir.close(); |
| } |
| } |