blob: 0d8deaa6d35d8a2533f13e16dda69c036804fc18 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.BasicStats;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.SimilarityBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestBlockJoin extends LuceneTestCase {
// One resume...
private Document makeResume(String name, String country) {
Document resume = new Document();
resume.add(newStringField("docType", "resume", Field.Store.NO));
resume.add(newStringField("name", name, Field.Store.YES));
resume.add(newStringField("country", country, Field.Store.NO));
return resume;
}
// ... has multiple jobs
private Document makeJob(String skill, int year) {
Document job = new Document();
job.add(newStringField("skill", skill, Field.Store.YES));
job.add(new IntPoint("year", year));
job.add(new StoredField("year", year));
return job;
}
// ... has multiple qualifications
private Document makeQualification(String qualification, int year) {
Document job = new Document();
job.add(newStringField("qualification", qualification, Field.Store.YES));
job.add(new IntPoint("year", year));
return job;
}
public void testExtractTerms() throws Exception {
TermQuery termQuery = new TermQuery(new Term("field", "value"));
QueryBitSetProducer bitSetProducer = new QueryBitSetProducer(new MatchNoDocsQuery());
ToParentBlockJoinQuery toParentBlockJoinQuery = new ToParentBlockJoinQuery(termQuery, bitSetProducer, ScoreMode.None);
ToChildBlockJoinQuery toChildBlockJoinQuery = new ToChildBlockJoinQuery(toParentBlockJoinQuery, bitSetProducer);
Directory directory = newDirectory();
final IndexWriter w = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())));
w.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Weight weight = toParentBlockJoinQuery.createWeight(indexSearcher, org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
Set<Term> terms = new HashSet<>();
weight.extractTerms(terms);
Term[] termArr =terms.toArray(new Term[0]);
assertEquals(1, termArr.length);
weight = toChildBlockJoinQuery.createWeight(indexSearcher, org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
terms = new HashSet<>();
weight.extractTerms(terms);
termArr =terms.toArray(new Term[0]);
assertEquals(1, termArr.length);
indexReader.close();
directory.close();
}
public void testEmptyChildFilter() throws Exception {
final Directory dir = newDirectory();
final IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
config.setMergePolicy(NoMergePolicy.INSTANCE);
// we don't want to merge - since we rely on certain segment setup
final IndexWriter w = new IndexWriter(dir, config);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("ruby", 2005));
docs.add(makeJob("java", 2006));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
w.commit();
IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
TopDocs topDocs = s.search(fullQuery.build(), 2);
assertEquals(2, topDocs.totalHits.value);
assertEquals(asSet("Lisa", "Frank"),
asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
}
// You must use ToParentBlockJoinSearcher if you want to do BQ SHOULD queries:
public void testBQShouldJoinedChild() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("ruby", 2005));
docs.add(makeJob("java", 2006));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
//IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
// Define parent document criteria (find a resident in the UK)
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
// Combine the parent and nested child queries into a single query for a candidate
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(parentQuery, Occur.SHOULD));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.SHOULD));
final TopDocs topDocs = s.search(fullQuery.build(), 2);
assertEquals(2, topDocs.totalHits.value);
assertEquals(asSet("Lisa", "Frank"),
asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
r.close();
dir.close();
}
public void testSimple() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("ruby", 2005));
docs.add(makeJob("java", 2006));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
// Define parent document criteria (find a resident in the UK)
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
// Combine the parent and nested child queries into a single query for a candidate
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
CheckHits.checkHitCollector(random(), fullQuery.build(), "country", s, new int[] {2});
TopDocs topDocs = s.search(fullQuery.build(), 1);
//assertEquals(1, results.totalHitCount);
assertEquals(1, topDocs.totalHits.value);
Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
//System.out.println("TEST: now test up");
// Now join "up" (map parent hits to child docs) instead...:
ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
BooleanQuery.Builder fullChildQuery = new BooleanQuery.Builder();
fullChildQuery.add(new BooleanClause(parentJoinQuery, Occur.MUST));
fullChildQuery.add(new BooleanClause(childQuery.build(), Occur.MUST));
//System.out.println("FULL: " + fullChildQuery);
TopDocs hits = s.search(fullChildQuery.build(), 10);
assertEquals(1, hits.totalHits.value);
Document childDoc = s.doc(hits.scoreDocs[0].doc);
//System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc);
assertEquals("java", childDoc.get("skill"));
assertEquals(2007, childDoc.getField("year").numericValue());
assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name"));
// Test with filter on child docs:
fullChildQuery.add(new TermQuery(new Term("skill", "foosball")), Occur.FILTER);
assertEquals(0, s.count(fullChildQuery.build()));
r.close();
dir.close();
}
protected Query skill(String skill) {
return new TermQuery(new Term("skill", skill));
}
public void testSimpleFilter() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
Collections.shuffle(docs, random());
docs.add(makeResume("Lisa", "United Kingdom"));
final List<Document> docs2 = new ArrayList<>();
docs2.add(makeJob("ruby", 2005));
docs2.add(makeJob("java", 2006));
Collections.shuffle(docs2, random());
docs2.add(makeResume("Frank", "United States"));
addSkillless(w);
boolean turn = random().nextBoolean();
w.addDocuments(turn ? docs:docs2);
addSkillless(w);
w.addDocuments(!turn ? docs:docs2);
addSkillless(w);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(r, parentsFilter);
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
// Define parent document criteria (find a resident in the UK)
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);
assertEquals("no filter - both passed", 2, s.count(childJoinQuery));
Query query = new BooleanQuery.Builder()
.add(childJoinQuery, Occur.MUST)
.add(new TermQuery(new Term("docType", "resume")), Occur.FILTER)
.build();
assertEquals("dummy filter passes everyone ", 2, s.count(query));
query = new BooleanQuery.Builder()
.add(childJoinQuery, Occur.MUST)
.add(new TermQuery(new Term("docType", "resume")), Occur.FILTER)
.build();
assertEquals("dummy filter passes everyone ", 2, s.count(query));
// not found test
query = new BooleanQuery.Builder()
.add(childJoinQuery, Occur.MUST)
.add(new TermQuery(new Term("country", "Oz")), Occur.FILTER)
.build();
assertEquals("noone live there", 0, s.count(query));
// apply the UK filter by the searcher
query = new BooleanQuery.Builder()
.add(childJoinQuery, Occur.MUST)
.add(parentQuery, Occur.FILTER)
.build();
TopDocs ukOnly = s.search(query, 1);
assertEquals("has filter - single passed", 1, ukOnly.totalHits.value);
assertEquals( "Lisa", r.document(ukOnly.scoreDocs[0].doc).get("name"));
query = new BooleanQuery.Builder()
.add(childJoinQuery, Occur.MUST)
.add(new TermQuery(new Term("country", "United States")), Occur.FILTER)
.build();
// looking for US candidates
TopDocs usThen = s.search(query, 1);
assertEquals("has filter - single passed", 1, usThen.totalHits.value);
assertEquals("Frank", r.document(usThen.scoreDocs[0].doc).get("name"));
TermQuery us = new TermQuery(new Term("country", "United States"));
assertEquals("@ US we have java and ruby", 2,
s.count(new ToChildBlockJoinQuery(us,
parentsFilter)) );
query = new BooleanQuery.Builder()
.add(new ToChildBlockJoinQuery(us, parentsFilter), Occur.MUST)
.add(skill("java"), Occur.FILTER)
.build();
assertEquals("java skills in US", 1, s.count(query));
BooleanQuery.Builder rubyPython = new BooleanQuery.Builder();
rubyPython.add(new TermQuery(new Term("skill", "ruby")), Occur.SHOULD);
rubyPython.add(new TermQuery(new Term("skill", "python")), Occur.SHOULD);
query = new BooleanQuery.Builder()
.add(new ToChildBlockJoinQuery(us, parentsFilter), Occur.MUST)
.add(rubyPython.build(), Occur.FILTER)
.build();
assertEquals("ruby skills in US", 1, s.count(query) );
r.close();
dir.close();
}
private void addSkillless(final RandomIndexWriter w) throws IOException {
if (random().nextBoolean()) {
w.addDocument(makeResume("Skillless", random().nextBoolean() ? "United Kingdom":"United States"));
}
}
private Document getParentDoc(IndexReader reader, BitSetProducer parents, int childDocID) throws IOException {
final List<LeafReaderContext> leaves = reader.leaves();
final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
final LeafReaderContext leaf = leaves.get(subIndex);
final BitSet bits = parents.getBitSet(leaf);
return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase));
}
public void testBoostBug() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchNoDocsQuery(), new QueryBitSetProducer(new MatchAllDocsQuery()), ScoreMode.Avg);
QueryUtils.check(random(), q, s);
s.search(q, 10);
BooleanQuery.Builder bqB = new BooleanQuery.Builder();
bqB.add(q, BooleanClause.Occur.MUST);
BooleanQuery bq = bqB.build();
s.search(new BoostQuery(bq, 2f), 10);
r.close();
dir.close();
}
private String[][] getRandomFields(int maxUniqueValues) {
final String[][] fields = new String[TestUtil.nextInt(random(), 2, 4)][];
for(int fieldID=0;fieldID<fields.length;fieldID++) {
final int valueCount;
if (fieldID == 0) {
valueCount = 2;
} else {
valueCount = TestUtil.nextInt(random(), 1, maxUniqueValues);
}
final String[] values = fields[fieldID] = new String[valueCount];
for(int i=0;i<valueCount;i++) {
values[i] = TestUtil.randomRealisticUnicodeString(random());
//values[i] = TestUtil.randomSimpleString(random());
}
}
return fields;
}
private Term randomParentTerm(String[] values) {
return new Term("parent0", values[random().nextInt(values.length)]);
}
private Term randomChildTerm(String[] values) {
return new Term("child0", values[random().nextInt(values.length)]);
}
private Sort getRandomSort(String prefix, int numFields) {
final List<SortField> sortFields = new ArrayList<>();
// TODO: sometimes sort by score; problem is scores are
// not comparable across the two indices
// sortFields.add(SortField.FIELD_SCORE);
if (random().nextBoolean()) {
sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean()));
} else if (random().nextBoolean()) {
sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean()));
sortFields.add(new SortField(prefix + random().nextInt(numFields), SortField.Type.STRING, random().nextBoolean()));
}
// Break ties:
sortFields.add(new SortField(prefix + "ID", SortField.Type.INT));
return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
}
public void testRandom() throws Exception {
// We build two indices at once: one normalized (which
// ToParentBlockJoinQuery/Collector,
// ToChildBlockJoinQuery can query) and the other w/
// the same docs, just fully denormalized:
final Directory dir = newDirectory();
final Directory joinDir = newDirectory();
final int maxNumChildrenPerParent = 20;
final int numParentDocs = TestUtil.nextInt(random(), 10* RANDOM_MULTIPLIER, 30 * RANDOM_MULTIPLIER);
//final int numParentDocs = 30;
// Values for parent fields:
final String[][] parentFields = getRandomFields(numParentDocs/2);
// Values for child fields:
final String[][] childFields = getRandomFields(numParentDocs);
final boolean doDeletes = random().nextBoolean();
final List<Integer> toDelete = new ArrayList<>();
// TODO: parallel star join, nested join cases too!
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final RandomIndexWriter joinW = new RandomIndexWriter(random(), joinDir);
for(int parentDocID=0;parentDocID<numParentDocs;parentDocID++) {
Document parentDoc = new Document();
Document parentJoinDoc = new Document();
Field id = new StoredField("parentID", parentDocID);
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
id = new NumericDocValuesField("parentID", parentDocID);
parentDoc.add(id);
parentJoinDoc.add(id);
parentJoinDoc.add(newStringField("isParent", "x", Field.Store.NO));
for(int field=0;field<parentFields.length;field++) {
if (random().nextDouble() < 0.9) {
String s = parentFields[field][random().nextInt(parentFields[field].length)];
Field f = newStringField("parent" + field, s, Field.Store.NO);
parentDoc.add(f);
parentJoinDoc.add(f);
f = new SortedDocValuesField("parent" + field, new BytesRef(s));
parentDoc.add(f);
parentJoinDoc.add(f);
}
}
if (doDeletes) {
parentDoc.add(new IntPoint("blockID", parentDocID));
parentJoinDoc.add(new IntPoint("blockID", parentDocID));
}
final List<Document> joinDocs = new ArrayList<>();
if (VERBOSE) {
StringBuilder sb = new StringBuilder();
sb.append("parentID=").append(parentDoc.get("parentID"));
for(int fieldID=0;fieldID<parentFields.length;fieldID++) {
String s = parentDoc.get("parent" + fieldID);
if (s != null) {
sb.append(" parent" + fieldID + "=" + s);
}
}
System.out.println(" " + sb.toString());
}
final int numChildDocs = TestUtil.nextInt(random(), 1, maxNumChildrenPerParent);
for(int childDocID=0;childDocID<numChildDocs;childDocID++) {
// Denormalize: copy all parent fields into child doc:
Document childDoc = TestUtil.cloneDocument(parentDoc);
Document joinChildDoc = new Document();
joinDocs.add(joinChildDoc);
Field childID = new StoredField("childID", childDocID);
childDoc.add(childID);
joinChildDoc.add(childID);
childID = new NumericDocValuesField("childID", childDocID);
childDoc.add(childID);
joinChildDoc.add(childID);
for(int childFieldID=0;childFieldID<childFields.length;childFieldID++) {
if (random().nextDouble() < 0.9) {
String s = childFields[childFieldID][random().nextInt(childFields[childFieldID].length)];
Field f = newStringField("child" + childFieldID, s, Field.Store.NO);
childDoc.add(f);
joinChildDoc.add(f);
f = new SortedDocValuesField("child" + childFieldID, new BytesRef(s));
childDoc.add(f);
joinChildDoc.add(f);
}
}
if (VERBOSE) {
StringBuilder sb = new StringBuilder();
sb.append("childID=").append(joinChildDoc.get("childID"));
for(int fieldID=0;fieldID<childFields.length;fieldID++) {
String s = joinChildDoc.get("child" + fieldID);
if (s != null) {
sb.append(" child" + fieldID + "=" + s);
}
}
System.out.println(" " + sb.toString());
}
if (doDeletes) {
joinChildDoc.add(new IntPoint("blockID", parentDocID));
}
w.addDocument(childDoc);
}
// Parent last:
joinDocs.add(parentJoinDoc);
joinW.addDocuments(joinDocs);
if (doDeletes && random().nextInt(30) == 7) {
toDelete.add(parentDocID);
}
}
if (!toDelete.isEmpty()) {
Query query = IntPoint.newSetQuery("blockID", toDelete);
w.deleteDocuments(query);
joinW.deleteDocuments(query);
}
final IndexReader r = w.getReader();
w.close();
final IndexReader joinR = joinW.getReader();
joinW.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
System.out.println("TEST: joinReader=" + joinR);
Bits liveDocs = MultiBits.getLiveDocs(joinR);
for(int docIDX=0;docIDX<joinR.maxDoc();docIDX++) {
System.out.println(" docID=" + docIDX + " doc=" + joinR.document(docIDX) + " deleted?=" + (liveDocs != null && liveDocs.get(docIDX) == false));
}
PostingsEnum parents = MultiTerms.getTermPostingsEnum(joinR, "isParent", new BytesRef("x"), (int) PostingsEnum.FREQS);
System.out.println("parent docIDs:");
while (parents.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
System.out.println(" " + parents.docID());
}
}
final IndexSearcher s = newSearcher(r, false);
final IndexSearcher joinS = newSearcher(joinR);
final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "x")));
CheckJoinIndex.check(joinS.getIndexReader(), parentsFilter);
final int iters = 200 * RANDOM_MULTIPLIER;
for(int iter=0;iter<iters;iter++) {
if (VERBOSE) {
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
}
Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID,
childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
} else if (random().nextInt(3) == 2) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
final int numClauses = TestUtil.nextInt(random(), 2, 4);
boolean didMust = false;
for(int clauseIDX=0;clauseIDX<numClauses;clauseIDX++) {
Query clause;
BooleanClause.Occur occur;
if (!didMust && random().nextBoolean()) {
occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(randomChildTerm(childFields[0]));
didMust = true;
} else {
occur = BooleanClause.Occur.SHOULD;
final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
clause = new TermQuery(new Term("child" + childFieldID,
childFields[childFieldID][random().nextInt(childFields[childFieldID].length)]));
}
bq.add(clause, occur);
}
childQuery = bq.build();
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(randomChildTerm(childFields[0])),
BooleanClause.Occur.MUST);
final int childFieldID = TestUtil.nextInt(random(), 1, childFields.length - 1);
bq.add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][random().nextInt(childFields[childFieldID].length)])),
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
if (random().nextBoolean()) {
childQuery = new RandomApproximationQuery(childQuery, random());
}
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
// To run against the block-join index:
final Query parentJoinQuery;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
final Query parentQuery;
if (random().nextBoolean()) {
parentQuery = childQuery;
parentJoinQuery = childJoinQuery;
} else {
// AND parent field w/ child field
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
final Term parentTerm = randomParentTerm(parentFields[0]);
if (random().nextBoolean()) {
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
bq.add(new TermQuery(parentTerm),
BooleanClause.Occur.MUST);
} else {
bq.add(new TermQuery(parentTerm),
BooleanClause.Occur.MUST);
bq.add(childJoinQuery, BooleanClause.Occur.MUST);
}
final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq2.add(childQuery, BooleanClause.Occur.MUST);
bq2.add(new TermQuery(parentTerm),
BooleanClause.Occur.MUST);
} else {
bq2.add(new TermQuery(parentTerm),
BooleanClause.Occur.MUST);
bq2.add(childQuery, BooleanClause.Occur.MUST);
}
parentJoinQuery = bq.build();
parentQuery = bq2.build();
}
final Sort parentSort = getRandomSort("parent", parentFields.length);
final Sort childSort = getRandomSort("child", childFields.length);
if (VERBOSE) {
System.out.println("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
}
// Merge both sorts:
final List<SortField> sortFields = new ArrayList<>(Arrays.asList(parentSort.getSort()));
sortFields.addAll(Arrays.asList(childSort.getSort()));
final Sort parentAndChildSort = new Sort(sortFields.toArray(new SortField[sortFields.size()]));
final TopDocs results = s.search(parentQuery, r.numDocs(),
parentAndChildSort);
if (VERBOSE) {
System.out.println("\nTEST: normal index gets " + results.totalHits.value + " hits; sort=" + parentAndChildSort);
final ScoreDoc[] hits = results.scoreDocs;
for(int hitIDX=0;hitIDX<hits.length;hitIDX++) {
final Document doc = s.doc(hits[hitIDX].doc);
//System.out.println(" score=" + hits[hitIDX].score + " parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
System.out.println(" parentID=" + doc.get("parentID") + " childID=" + doc.get("childID") + " (docID=" + hits[hitIDX].doc + ")");
FieldDoc fd = (FieldDoc) hits[hitIDX];
if (fd.fields != null) {
System.out.print(" " + fd.fields.length + " sort values: ");
for(Object o : fd.fields) {
if (o instanceof BytesRef) {
System.out.print(((BytesRef) o).utf8ToString() + " ");
} else {
System.out.print(o + " ");
}
}
System.out.println();
}
}
}
TopDocs joinedResults = joinS.search(parentJoinQuery, numParentDocs);
SortedMap<Integer, TopDocs> joinResults = new TreeMap<>();
for (ScoreDoc parentHit : joinedResults.scoreDocs) {
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childQuery, parentHit.doc);
TopDocs childTopDocs = joinS.search(childrenQuery, maxNumChildrenPerParent, childSort);
final Document parentDoc = joinS.doc(parentHit.doc);
joinResults.put(Integer.valueOf(parentDoc.get("parentID")), childTopDocs);
}
final int hitsPerGroup = TestUtil.nextInt(random(), 1, 20);
//final int hitsPerGroup = 100;
if (VERBOSE) {
System.out.println("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.size()) + " groups; hitsPerGroup=" + hitsPerGroup);
if (joinResults != null) {
for (Map.Entry<Integer, TopDocs> entry : joinResults.entrySet()) {
System.out.println(" group parentID=" + entry.getKey() + " (docID=" + entry.getKey() + ")");
for(ScoreDoc childHit : entry.getValue().scoreDocs) {
final Document doc = joinS.doc(childHit.doc);
// System.out.println(" score=" + childHit.score + " childID=" + doc.get("childID") + " (docID=" + childHit.doc + ")");
System.out.println(" childID=" + doc.get("childID") + " child0=" + doc.get("child0") + " (docID=" + childHit.doc + ")");
}
}
}
}
if (results.totalHits.value == 0) {
assertEquals(0, joinResults.size());
} else {
compareHits(r, joinR, results, joinResults);
TopDocs b = joinS.search(childJoinQuery, 10);
for (ScoreDoc hit : b.scoreDocs) {
Explanation explanation = joinS.explain(childJoinQuery, hit.doc);
Document document = joinS.doc(hit.doc - 1);
int childId = Integer.parseInt(document.get("childID"));
//System.out.println(" hit docID=" + hit.doc + " childId=" + childId + " parentId=" + document.get("parentID"));
assertTrue(explanation.isMatch());
assertEquals(hit.score, explanation.getValue().doubleValue(), 0.0f);
Matcher m = Pattern.compile("Score based on ([0-9]+) child docs in range from ([0-9]+) to ([0-9]+), best match:").matcher(explanation.getDescription());
assertTrue("Block Join description not matches", m.matches());
assertTrue("Matched children not positive", Integer.parseInt(m.group(1)) > 0);
assertEquals("Wrong child range start", hit.doc - 1 - childId, Integer.parseInt(m.group(2)));
assertEquals("Wrong child range end", hit.doc - 1, Integer.parseInt(m.group(3)));
Explanation childWeightExplanation = explanation.getDetails()[0];
if ("sum of:".equals(childWeightExplanation.getDescription())) {
childWeightExplanation = childWeightExplanation.getDetails()[0];
}
if (agg == ScoreMode.None) {
assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("ConstantScore("));
} else {
assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
}
}
}
// Test joining in the opposite direction (parent to
// child):
// Get random query against parent documents:
final Query parentQuery2;
if (random().nextInt(3) == 2) {
final int fieldID = random().nextInt(parentFields.length);
parentQuery2 = new TermQuery(new Term("parent" + fieldID,
parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
} else if (random().nextInt(3) == 2) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
final int numClauses = TestUtil.nextInt(random(), 2, 4);
boolean didMust = false;
for(int clauseIDX=0;clauseIDX<numClauses;clauseIDX++) {
Query clause;
BooleanClause.Occur occur;
if (!didMust && random().nextBoolean()) {
occur = random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
clause = new TermQuery(randomParentTerm(parentFields[0]));
didMust = true;
} else {
occur = BooleanClause.Occur.SHOULD;
final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
clause = new TermQuery(new Term("parent" + fieldID,
parentFields[fieldID][random().nextInt(parentFields[fieldID].length)]));
}
bq.add(clause, occur);
}
parentQuery2 = bq.build();
} else {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(randomParentTerm(parentFields[0])),
BooleanClause.Occur.MUST);
final int fieldID = TestUtil.nextInt(random(), 1, parentFields.length - 1);
bq.add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][random().nextInt(parentFields[fieldID].length)])),
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
parentQuery2 = bq.build();
}
if (VERBOSE) {
System.out.println("\nTEST: top down: parentQuery2=" + parentQuery2);
}
// Maps parent query to child docs:
final ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter);
// To run against the block-join index:
Query childJoinQuery2;
// Same query as parentJoinQuery, but to run against
// the fully denormalized index (so we can compare
// results):
Query childQuery2;
if (random().nextBoolean()) {
childQuery2 = parentQuery2;
childJoinQuery2 = parentJoinQuery2;
} else {
final Term childTerm = randomChildTerm(childFields[0]);
if (random().nextBoolean()) { // filtered case
childJoinQuery2 = parentJoinQuery2;
childJoinQuery2 = new BooleanQuery.Builder()
.add(childJoinQuery2, Occur.MUST)
.add(new TermQuery(childTerm), Occur.FILTER)
.build();
} else {
// AND child field w/ parent query:
final BooleanQuery.Builder bq = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
bq.add(new TermQuery(childTerm),
BooleanClause.Occur.MUST);
} else {
bq.add(new TermQuery(childTerm),
BooleanClause.Occur.MUST);
bq.add(parentJoinQuery2, BooleanClause.Occur.MUST);
}
childJoinQuery2 = bq.build();
}
if (random().nextBoolean()) { // filtered case
childQuery2 = parentQuery2;
childQuery2 = new BooleanQuery.Builder()
.add(childQuery2, Occur.MUST)
.add(new TermQuery(childTerm), Occur.FILTER)
.build();
} else {
final BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
if (random().nextBoolean()) {
bq2.add(parentQuery2, BooleanClause.Occur.MUST);
bq2.add(new TermQuery(childTerm),
BooleanClause.Occur.MUST);
} else {
bq2.add(new TermQuery(childTerm),
BooleanClause.Occur.MUST);
bq2.add(parentQuery2, BooleanClause.Occur.MUST);
}
childQuery2 = bq2.build();
}
}
final Sort childSort2 = getRandomSort("child", childFields.length);
// Search denormalized index:
if (VERBOSE) {
System.out.println("TEST: run top down query=" + childQuery2 + " sort=" + childSort2);
}
final TopDocs results2 = s.search(childQuery2, r.numDocs(),
childSort2);
if (VERBOSE) {
System.out.println(" " + results2.totalHits.value + " totalHits:");
for(ScoreDoc sd : results2.scoreDocs) {
final Document doc = s.doc(sd.doc);
System.out.println(" childID=" + doc.get("childID") + " parentID=" + doc.get("parentID") + " docID=" + sd.doc);
}
}
// Search join index:
if (VERBOSE) {
System.out.println("TEST: run top down join query=" + childJoinQuery2 + " sort=" + childSort2);
}
TopDocs joinResults2 = joinS.search(childJoinQuery2, joinR.numDocs(), childSort2);
if (VERBOSE) {
System.out.println(" " + joinResults2.totalHits.value + " totalHits:");
for(ScoreDoc sd : joinResults2.scoreDocs) {
final Document doc = joinS.doc(sd.doc);
final Document parentDoc = getParentDoc(joinR, parentsFilter, sd.doc);
System.out.println(" childID=" + doc.get("childID") + " parentID=" + parentDoc.get("parentID") + " docID=" + sd.doc);
}
}
compareChildHits(r, joinR, results2, joinResults2);
}
r.close();
joinR.close();
dir.close();
joinDir.close();
}
private void compareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults) throws Exception {
assertEquals(results.totalHits.value, joinResults.totalHits.value);
assertEquals(results.scoreDocs.length, joinResults.scoreDocs.length);
for(int hitCount=0;hitCount<results.scoreDocs.length;hitCount++) {
ScoreDoc hit = results.scoreDocs[hitCount];
ScoreDoc joinHit = joinResults.scoreDocs[hitCount];
Document doc1 = r.document(hit.doc);
Document doc2 = joinR.document(joinHit.doc);
assertEquals("hit " + hitCount + " differs",
doc1.get("childID"), doc2.get("childID"));
// don't compare scores -- they are expected to differ
assertTrue(hit instanceof FieldDoc);
assertTrue(joinHit instanceof FieldDoc);
FieldDoc hit0 = (FieldDoc) hit;
FieldDoc joinHit0 = (FieldDoc) joinHit;
assertArrayEquals(hit0.fields, joinHit0.fields);
}
}
private void compareHits(IndexReader r, IndexReader joinR, TopDocs controlHits, Map<Integer, TopDocs> joinResults) throws Exception {
int currentParentID = -1;
int childHitSlot = 0;
TopDocs childHits = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
for (ScoreDoc controlHit : controlHits.scoreDocs) {
Document controlDoc = r.document(controlHit.doc);
int parentID = Integer.parseInt(controlDoc.get("parentID"));
if (parentID != currentParentID) {
assertEquals(childHitSlot, childHits.scoreDocs.length);
currentParentID = parentID;
childHitSlot = 0;
childHits = joinResults.get(parentID);
}
String controlChildID = controlDoc.get("childID");
Document childDoc = joinR.document(childHits.scoreDocs[childHitSlot++].doc);
String childID = childDoc.get("childID");
assertEquals(controlChildID, childID);
}
}
public void testMultiChildTypes() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeQualification("maths", 1999));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
CheckJoinIndex.check(s.getIndexReader(), parentsFilter);
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery.Builder childJobQuery = new BooleanQuery.Builder();
childJobQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
childJobQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));
BooleanQuery.Builder childQualificationQuery = new BooleanQuery.Builder();
childQualificationQuery.add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), Occur.MUST));
childQualificationQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 1980, 2000), Occur.MUST));
// Define parent document criteria (find a resident in the UK)
Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
// Wrap the child document query to 'join' any matches
// up to corresponding parent:
ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery.build(), parentsFilter, ScoreMode.Avg);
ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery.build(), parentsFilter, ScoreMode.Avg);
// Combine the parent and nested child queries into a single query for a candidate
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
final TopDocs topDocs = s.search(fullQuery.build(), 10);
assertEquals(1, topDocs.totalHits.value);
Document parentDoc = s.doc(topDocs.scoreDocs[0].doc);
assertEquals("Lisa", parentDoc.get("name"));
ParentChildrenBlockJoinQuery childrenQuery =
new ParentChildrenBlockJoinQuery(parentsFilter, childJobQuery.build(), topDocs.scoreDocs[0].doc);
TopDocs matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));
childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQualificationQuery.build(), topDocs.scoreDocs[0].doc);
matchingChildren = s.search(childrenQuery, 1);
assertEquals(1, matchingChildren.totalHits.value);
assertEquals("maths", s.doc(matchingChildren.scoreDocs[0].doc).get("qualification"));
r.close();
dir.close();
}
public void testAdvanceSingleParentSingleChild() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document childDoc = new Document();
childDoc.add(newStringField("child", "1", Field.Store.NO));
Document parentDoc = new Document();
parentDoc.add(newStringField("parent", "1", Field.Store.NO));
w.addDocuments(Arrays.asList(childDoc, parentDoc));
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "1"));
BitSetProducer parentFilter = new QueryBitSetProducer(
new TermQuery(new Term("parent", "1")));
CheckJoinIndex.check(s.getIndexReader(), parentFilter);
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
Weight weight = s.createWeight(s.rewrite(q), org.apache.lucene.search.ScoreMode.COMPLETE, 1);
Scorer sc = weight.scorer(s.getIndexReader().leaves().get(0));
assertEquals(1, sc.iterator().advance(1));
r.close();
dir.close();
}
public void testAdvanceSingleParentNoChild() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
Document parentDoc = new Document();
parentDoc.add(newStringField("parent", "1", Field.Store.NO));
parentDoc.add(newStringField("isparent", "yes", Field.Store.NO));
w.addDocuments(Arrays.asList(parentDoc));
// Add another doc so scorer is not null
parentDoc = new Document();
parentDoc.add(newStringField("parent", "2", Field.Store.NO));
parentDoc.add(newStringField("isparent", "yes", Field.Store.NO));
Document childDoc = new Document();
childDoc.add(newStringField("child", "2", Field.Store.NO));
w.addDocuments(Arrays.asList(childDoc, parentDoc));
// Need single seg:
w.forceMerge(1);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "2"));
BitSetProducer parentFilter = new QueryBitSetProducer(
new TermQuery(new Term("isparent", "yes")));
CheckJoinIndex.check(s.getIndexReader(), parentFilter);
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
Weight weight = s.createWeight(s.rewrite(q), org.apache.lucene.search.ScoreMode.COMPLETE, 1);
Scorer sc = weight.scorer(s.getIndexReader().leaves().get(0));
assertEquals(2, sc.iterator().advance(0));
r.close();
dir.close();
}
// LUCENE-4968
public void testChildQueryNeverMatches() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Document parent = new Document();
parent.add(new StoredField("parentID", "0"));
parent.add(new SortedDocValuesField("parentID", new BytesRef("0")));
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
List<Document> docs = new ArrayList<>();
Document child = new Document();
docs.add(child);
child.add(new StoredField("childID", "0"));
child.add(newTextField("childText", "text", Field.Store.NO));
// parent last:
docs.add(parent);
w.addDocuments(docs);
docs.clear();
parent = new Document();
parent.add(newTextField("parentText", "text", Field.Store.NO));
parent.add(newStringField("isParent", "yes", Field.Store.NO));
parent.add(new StoredField("parentID", "1"));
parent.add(new SortedDocValuesField("parentID", new BytesRef("1")));
// parent last:
docs.add(parent);
w.addDocuments(docs);
IndexReader r = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(r);
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isParent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
Weight weight = searcher.createWeight(searcher.rewrite(childJoinQuery), RandomPicks.randomFrom(random(), org.apache.lucene.search.ScoreMode.values()), 1);
Scorer scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(scorer);
// never matches and produces a null scorer
childQuery = new TermQuery(new Term("bogus", "bogus"));
childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
weight = searcher.createWeight(searcher.rewrite(childJoinQuery), RandomPicks.randomFrom(random(), org.apache.lucene.search.ScoreMode.values()), 1);
scorer = weight.scorer(searcher.getIndexReader().leaves().get(0));
assertNull(scorer);
r.close();
d.close();
}
public void testAdvanceSingleDeletedParentNoChild() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// First doc with 1 children
Document parentDoc = new Document();
parentDoc.add(newStringField("parent", "1", Field.Store.NO));
parentDoc.add(newStringField("isparent", "yes", Field.Store.NO));
Document childDoc = new Document();
childDoc.add(newStringField("child", "1", Field.Store.NO));
w.addDocuments(Arrays.asList(childDoc, parentDoc));
parentDoc = new Document();
parentDoc.add(newStringField("parent", "2", Field.Store.NO));
parentDoc.add(newStringField("isparent", "yes", Field.Store.NO));
w.addDocuments(Arrays.asList(parentDoc));
w.deleteDocuments(new Term("parent", "2"));
parentDoc = new Document();
parentDoc.add(newStringField("parent", "2", Field.Store.NO));
parentDoc.add(newStringField("isparent", "yes", Field.Store.NO));
childDoc = new Document();
childDoc.add(newStringField("child", "2", Field.Store.NO));
w.addDocuments(Arrays.asList(childDoc, parentDoc));
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("isparent", "yes")));
CheckJoinIndex.check(r, parentsFilter);
Query parentQuery = new TermQuery(new Term("parent", "2"));
ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
TopDocs topdocs = s.search(parentJoinQuery, 3);
assertEquals(1, topdocs.totalHits.value);
r.close();
dir.close();
}
public void testIntersectionWithRandomApproximation() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int numBlocks = atLeast(100);
for (int i = 0; i < numBlocks; ++i) {
List<Document> docs = new ArrayList<>();
final int numChildren = random().nextInt(3);
for (int j = 0; j < numChildren; ++j) {
Document child = new Document();
child.add(new StringField("foo_child", random().nextBoolean() ? "bar" : "baz", Store.NO));
docs.add(child);
}
Document parent = new Document();
parent.add(new StringField("parent", "true", Store.NO));
parent.add(new StringField("foo_parent", random().nextBoolean() ? "bar" : "baz", Store.NO));
docs.add(parent);
w.addDocuments(docs);
}
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCache(null); // to have real advance() calls
final BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("parent", "true")));
final Query toChild = new ToChildBlockJoinQuery(new TermQuery(new Term("foo_parent", "bar")), parentsFilter);
final Query childQuery = new TermQuery(new Term("foo_child", "baz"));
BooleanQuery bq1 = new BooleanQuery.Builder()
.add(toChild, Occur.MUST)
.add(childQuery, Occur.MUST)
.build();
BooleanQuery bq2 = new BooleanQuery.Builder()
.add(toChild, Occur.MUST)
.add(new RandomApproximationQuery(childQuery, random()), Occur.MUST)
.build();
assertEquals(searcher.count(bq1), searcher.count(bq2));
searcher.getIndexReader().close();
w.close();
dir.close();
}
//LUCENE-6588
// delete documents to simulate FilteredQuery applying a filter as acceptDocs
public void testParentScoringBug() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("java", 2006));
docs.add(makeJob("ruby", 2005));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
w.deleteDocuments(new Term("skill", "java")); // delete the first child of every parent
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
Query parentQuery = new PrefixQuery(new Term("country", "United"));
ToChildBlockJoinQuery toChildQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
TopDocs hits = s.search(toChildQuery, 10);
assertEquals(hits.scoreDocs.length, 2);
for (int i = 0; i < hits.scoreDocs.length; i++) {
if (hits.scoreDocs[i].score == 0.0)
fail("Failed to calculate score for hit #"+i);
}
r.close();
dir.close();
}
public void testToChildBlockJoinQueryExplain() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final List<Document> docs = new ArrayList<>();
docs.add(makeJob("java", 2007));
docs.add(makeJob("python", 2010));
docs.add(makeResume("Lisa", "United Kingdom"));
w.addDocuments(docs);
docs.clear();
docs.add(makeJob("java", 2006));
docs.add(makeJob("ruby", 2005));
docs.add(makeResume("Frank", "United States"));
w.addDocuments(docs);
w.deleteDocuments(new Term("skill", "java")); // delete the first child of every parent
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r, false);
// Create a filter that defines "parent" documents in the index - in this case resumes
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
Query parentQuery = new PrefixQuery(new Term("country", "United"));
ToChildBlockJoinQuery toChildQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
TopDocs hits = s.search(toChildQuery, 10);
assertEquals(hits.scoreDocs.length, 2);
for (int i = 0; i < hits.scoreDocs.length; i++) {
assertEquals(hits.scoreDocs[i].score, s.explain(toChildQuery, hits.scoreDocs[i].doc).getValue().doubleValue(), 0f);
}
r.close();
dir.close();
}
public void testToChildInitialAdvanceParentButNoKids() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// degenerate case: first doc has no children
w.addDocument(makeResume("first", "nokids"));
w.addDocuments(Arrays.asList(makeJob("job", 42), makeResume("second", "haskid")));
// single segment
w.forceMerge(1);
final IndexReader r = w.getReader();
final IndexSearcher s = newSearcher(r, false);
w.close();
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
Query parentQuery = new TermQuery(new Term("docType", "resume"));
ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentFilter);
Weight weight = s.createWeight(s.rewrite(parentJoinQuery), RandomPicks.randomFrom(random(), org.apache.lucene.search.ScoreMode.values()), 1);
Scorer advancingScorer = weight.scorer(s.getIndexReader().leaves().get(0));
Scorer nextDocScorer = weight.scorer(s.getIndexReader().leaves().get(0));
final int firstKid = nextDocScorer.iterator().nextDoc();
assertTrue("firstKid not found", DocIdSetIterator.NO_MORE_DOCS != firstKid);
assertEquals(firstKid, advancingScorer.iterator().advance(0));
r.close();
dir.close();
}
public void testMultiChildQueriesOfDiffParentLevels() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// randomly generate resume->jobs[]->qualifications[]
final int numResumes = atLeast(100);
for (int r = 0; r < numResumes; r++) {
final List<Document> docs = new ArrayList<>();
final int rv = TestUtil.nextInt(random(), 1, 10);
final int numJobs = atLeast(10);
for (int j = 0; j < numJobs; j++) {
final int jv = TestUtil.nextInt(random(), -10, -1); // neg so no overlap with q (both used for "year")
final int numQualifications = atLeast(10);
for (int q = 0; q < numQualifications; q++) {
docs.add(makeQualification("q" + q + "_rv" + rv + "_jv" + jv, q));
}
docs.add(makeJob("j" + j, jv));
}
docs.add(makeResume("r" + r, "rv"+rv));
w.addDocuments(docs);
}
final IndexReader r = w.getReader();
final IndexSearcher s = newSearcher(r, false);
w.close();
BitSetProducer resumeFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
// anything with a skill is a job
BitSetProducer jobFilter = new QueryBitSetProducer(new PrefixQuery(new Term("skill", "")));
final int numQueryIters = atLeast(1);
for (int i = 0; i < numQueryIters; i++) {
final int qjv = TestUtil.nextInt(random(), -10, -1);
final int qrv = TestUtil.nextInt(random(), 1, 10);
Query resumeQuery = new ToChildBlockJoinQuery(new TermQuery(new Term("country","rv" + qrv)),
resumeFilter);
Query jobQuery = new ToChildBlockJoinQuery(IntPoint.newRangeQuery("year", qjv, qjv),
jobFilter);
BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
fullQuery.add(new BooleanClause(jobQuery, Occur.MUST));
fullQuery.add(new BooleanClause(resumeQuery, Occur.MUST));
TopDocs hits = s.search(fullQuery.build(), 100); // NOTE: totally possible that we'll get no matches
for (ScoreDoc sd : hits.scoreDocs) {
// since we're looking for children of jobs, all results must be qualifications
String q = r.document(sd.doc).get("qualification");
assertNotNull(sd.doc + " has no qualification", q);
assertTrue(q + " MUST contain jv" + qjv, q.contains("jv"+qjv));
assertTrue(q + " MUST contain rv" + qrv, q.contains("rv"+qrv));
}
}
r.close();
dir.close();
}
public void testScoreMode() throws IOException {
Similarity sim = new SimilarityBase() {
@Override
public String toString() {
return "TestSim";
}
@Override
protected double score(BasicStats stats, double freq, double docLen) {
return freq;
}
};
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setSimilarity(sim));
w.addDocuments(Arrays.asList(
Collections.singleton(newTextField("foo", "bar bar", Store.NO)),
Collections.singleton(newTextField("foo", "bar", Store.NO)),
Collections.emptyList(),
Collections.singleton(newStringField("type", new BytesRef("parent"), Store.NO))));
DirectoryReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(sim);
BitSetProducer parents = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
for (ScoreMode scoreMode : ScoreMode.values()) {
Query query = new ToParentBlockJoinQuery(new TermQuery(new Term("foo", "bar")), parents, scoreMode);
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
assertEquals(3, topDocs.scoreDocs[0].doc);
float expectedScore;
switch (scoreMode) {
case Avg:
expectedScore = 1.5f;
break;
case Max:
expectedScore = 2f;
break;
case Min:
expectedScore = 1f;
break;
case None:
expectedScore = 0f;
break;
case Total:
expectedScore = 3f;
break;
default:
throw new AssertionError();
}
assertEquals(expectedScore, topDocs.scoreDocs[0].score, 0f);
}
reader.close();
dir.close();
}
}