blob: db82dfb113bdd35b321da292b238cfd122e2530d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Test;
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("4")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
joinQuery = JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for offer
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None);
result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits.value);
assertEquals(3, result.scoreDocs[0].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
public void testSimpleOrdinalsJoin() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
IndexReader r = indexSearcher.getIndexReader();
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
Query toQuery = new TermQuery(new Term(typeField, "price"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
// Search for product and return prices
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(4, result.scoreDocs[0].doc);
assertEquals(5, result.scoreDocs[1].doc);
fromQuery = new TermQuery(new Term("name", "name1"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(1, result.scoreDocs[0].doc);
assertEquals(2, result.scoreDocs[1].doc);
// Search for prices and return products
fromQuery = new TermQuery(new Term("price", "20.0"));
toQuery = new TermQuery(new Term(typeField, "product"));
joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, ScoreMode.None, ordinalMap);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
public void testOrdinalsJoinExplainNoMatches() throws Exception {
final String idField = "id";
final String productIdField = "productId";
// A field indicating to what type a document belongs, which is then used to distinques between documents during joining.
final String typeField = "type";
// A single sorted doc values field that holds the join values for all document types.
// Typically during indexing a schema will automatically create this field with the values
final String joinField = idField + productIdField;
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(
dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)
);
// 0
Document doc = new Document();
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "product", Field.Store.NO));
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField(productIdField, "1", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField(productIdField, "2", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("1")));
w.addDocument(doc);
if (random().nextBoolean()) {
w.flush();
}
// 4
doc = new Document();
doc.add(new TextField(productIdField, "3", Field.Store.NO));
doc.add(new TextField(typeField, "price", Field.Store.NO));
doc.add(new TextField("price", "5.0", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("2")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("field", "value", Field.Store.NO));
w.addDocument(doc);
IndexReader r = DirectoryReader.open(w);
IndexSearcher indexSearcher = new IndexSearcher(r);
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
Query toQuery = new TermQuery(new Term("price", "5.0"));
Query fromQuery = new TermQuery(new Term("name", "name2"));
for (ScoreMode scoreMode : ScoreMode.values()) {
Query joinQuery = JoinUtil.createJoinQuery(joinField, fromQuery, toQuery, indexSearcher, scoreMode, ordinalMap);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits.value);
assertEquals(4, result.scoreDocs[0].doc); // doc with price: 5.0
Explanation explanation = indexSearcher.explain(joinQuery, 4);
assertTrue(explanation.isMatch());
assertEquals(explanation.getDescription(), "A match, join value 2");
explanation = indexSearcher.explain(joinQuery, 3);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match, join value 1");
explanation = indexSearcher.explain(joinQuery, 5);
assertFalse(explanation.isMatch());
assertEquals(explanation.getDescription(), "Not a match");
}
w.close();
indexSearcher.getIndexReader().close();
dir.close();
}
public void testRandomOrdinalsJoin() throws Exception {
IndexIterationContext context = createContext(128, false, true);
int searchIters = atLeast(1);
IndexSearcher indexSearcher = context.searcher;
for (int i = 0; i < searchIters; i++) {
if (VERBOSE) {
System.out.println("search iter=" + i);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
if (from) {
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(new TermQuery(new Term("type", "from")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
} else {
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(new TermQuery(new Term("type", "to")), BooleanClause.Occur.FILTER);
fromQuery.add(actualQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "from"));
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, indexSearcher, scoreMode, context.ordinalMap);
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, Integer.MAX_VALUE);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
assertBitSet(expectedResult, actualResult, indexSearcher);
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
context.close();
}
public void testMinMaxScore() throws Exception {
String priceField = "price";
Query priceQuery = numericDocValuesScoreQuery(priceField);
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
);
Map<String, Float> lowestScoresPerParent = new HashMap<>();
Map<String, Float> highestScoresPerParent = new HashMap<>();
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), 2, 16);
int lowest = Integer.MAX_VALUE;
int highest = Integer.MIN_VALUE;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
int price = random().nextInt(1000);
childDoc.add(new NumericDocValuesField(priceField, price));
iw.addDocument(childDoc);
lowest = Math.min(lowest, price);
highest = Math.max(highest, price);
}
lowestScoresPerParent.put(parentId, (float) lowest);
highestScoresPerParent.put(parentId, (float) highest);
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
fromQuery.add(priceQuery, BooleanClause.Occur.MUST);
Query toQuery = new TermQuery(new Term("type", "to"));
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Min, ordinalMap);
TopDocs topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits.value);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(lowestScoresPerParent.get(id), scoreDoc.score, 0f);
}
joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery.build(), toQuery, searcher, ScoreMode.Max, ordinalMap);
topDocs = searcher.search(joinQuery, numParents);
assertEquals(numParents, topDocs.totalHits.value);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
String id = searcher.doc(scoreDoc.doc).get("id");
assertEquals(highestScoresPerParent.get(id), scoreDoc.score, 0f);
}
searcher.getIndexReader().close();
dir.close();
}
// FunctionQuery would be helpful, but join module doesn't depend on queries module.
static Query numericDocValuesScoreQuery(final String field) {
return new Query() {
private final Query fieldQuery = new DocValuesFieldExistsQuery(field);
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
Weight fieldWeight = fieldQuery.createWeight(searcher, org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Scorer fieldScorer = fieldWeight.scorer(context);
if (fieldScorer == null) {
return null;
}
NumericDocValues price = context.reader().getNumericDocValues(field);
return new FilterScorer(fieldScorer, this) {
@Override
public float score() throws IOException {
assertEquals(in.docID(), price.advance(in.docID()));
return (float) price.longValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
};
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return fieldQuery.toString(field);
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
}
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
);
int minChildDocsPerParent = 2;
int maxChildDocsPerParent = 16;
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
int[] childDocsPerParent = new int[numParents];
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
childDocsPerParent[p] = numChildren;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(childDoc);
}
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
Query fromQuery = new TermQuery(new Term("type", "from"));
Query toQuery = new TermQuery(new Term("type", "to"));
int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
for (int i = 1; i <= iters; i++) {
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
if (VERBOSE) {
System.out.println("iter=" + i);
System.out.println("scoreMode=" + scoreMode);
System.out.println("min=" + min);
System.out.println("max=" + max);
}
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(joinQuery, collector);
int expectedCount = 0;
for (int numChildDocs : childDocsPerParent) {
if (numChildDocs >= min && numChildDocs <= max) {
expectedCount++;
}
}
assertEquals(expectedCount, collector.getTotalHits());
}
searcher.getIndexReader().close();
dir.close();
}
public void testRewrite() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("join_field", new BytesRef("abc")));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("join_field", new BytesRef("abd")));
w.addDocument(doc);
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
OrdinalMap ordMap = OrdinalMap.build(null, new SortedDocValues[0], 0f);
{
Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(),
new MatchNoDocsQuery(), searcher, RandomPicks.randomFrom(random(), ScoreMode.values()), ordMap, 0, Integer.MAX_VALUE);
searcher.search(joinQuery, 1); // no exception due to missing rewrites
}
{
Query joinQuery = JoinUtil.createJoinQuery("join_field", new MatchNoDocsQuery(),
new MatchNoDocsQuery(), searcher, ScoreMode.None, ordMap, 1, Integer.MAX_VALUE);
Query rewritten = searcher.rewrite(joinQuery);
// should simplify to GlobalOrdinalsQuery since min is set to 1
assertTrue(rewritten instanceof GlobalOrdinalsQuery);
searcher.search(joinQuery, 1); // no exception due to missing rewrites
}
reader.close();
w.close();
dir.close();
}
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
public void testOverflowTermsWithScoreCollector() throws Exception {
test300spartans(true, ScoreMode.Avg);
}
public void testOverflowTermsWithScoreCollectorRandom() throws Exception {
test300spartans(random().nextBoolean(), ScoreMode.values()[random().nextInt(ScoreMode.values().length)]);
}
void test300spartans(boolean multipleValues, ScoreMode scoreMode) throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("0")));
w.addDocument(doc);
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
if (multipleValues) {
for(int i=0;i<300;i++) {
doc.add(new SortedSetDocValuesField(toField, new BytesRef(""+i)));
}
} else {
doc.add(new SortedDocValuesField(toField, new BytesRef("0")));
}
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(toField, multipleValues, idField, new TermQuery(new Term("price", "10.0")), indexSearcher, scoreMode);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(1, result.totalHits.value);
assertEquals(0, result.scoreDocs[0].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
/** LUCENE-5487: verify a join query inside a SHOULD BQ
* will still use the join query's optimized BulkScorers */
public void testInsideBooleanQuery() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "7", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("7")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
w.forceMerge(1);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(joinQuery, BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
indexSearcher.search(bq.build(), new SimpleCollector() {
boolean sawFive;
@Override
public void collect(int docID) {
// Hairy / evil (depends on how BooleanScorer
// stores temporarily collected docIDs by
// appending to head of linked list):
if (docID == 5) {
sawFive = true;
} else if (docID == 1) {
assertFalse("optimized bulkScorer was not used for join query embedded in boolean query!", sawFive);
}
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES;
}
});
indexSearcher.getIndexReader().close();
dir.close();
}
public void testSimpleWithScoring() throws Exception {
final String idField = "id";
final String toField = "movieId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "A random movie", Field.Store.NO));
doc.add(new TextField("name", "Movie 1", Field.Store.NO));
doc.add(new TextField(idField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("1")));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("subtitle", "The first subtitle of this movie", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("2")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("subtitle", "random subtitle; random event movie", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("3")));
doc.add(new TextField(toField, "1", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("1")));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "A second random movie", Field.Store.NO));
doc.add(new TextField("name", "Movie 2", Field.Store.NO));
doc.add(new TextField(idField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("4")));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("subtitle", "a very random event happened during christmas night", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("5")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("subtitle", "movie end movie test 123 test 123 random", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new SortedDocValuesField(idField, new BytesRef("6")));
doc.add(new TextField(toField, "4", Field.Store.NO));
doc.add(new SortedDocValuesField(toField, new BytesRef("4")));
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for movie via subtitle
Query joinQuery =
JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
TopDocs result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
// Score mode max.
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(3, result.scoreDocs[0].doc);
assertEquals(0, result.scoreDocs[1].doc);
// Score mode total
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(0, result.scoreDocs[0].doc);
assertEquals(3, result.scoreDocs[1].doc);
//Score mode avg
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg);
result = indexSearcher.search(joinQuery, 10);
assertEquals(2, result.totalHits.value);
assertEquals(3, result.scoreDocs[0].doc);
assertEquals(0, result.scoreDocs[1].doc);
indexSearcher.getIndexReader().close();
dir.close();
}
public void testEquals() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()))) {
boolean multiValued = random().nextBoolean();
String joinField = multiValued ? "mvField" : "svField";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int i = 0; i < numValues; i++) {
doc.add(new SortedSetDocValuesField(joinField, new BytesRef("" + random().nextInt(13))));
}
} else {
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + random().nextInt(13))));
}
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = RandomPicks.randomFrom(random(), scoreModes);
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = RandomPicks.randomFrom(random(), scoreModes);
final Query x;
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, multiValued, joinField,
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1);
assertEquals("identical calls to createJoinQuery",
x, JoinUtil.createJoinQuery(joinField, multiValued, joinField,
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField,
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode2)));
assertFalse("from fields (joinField != \"other_field\") but queries equals",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, "other_field",
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
assertFalse("from fields (\"other_field\" != joinField) but queries equals",
x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField,
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals",
x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField,
new TermQuery(new Term("name", "name6")),
indexSearcher, scoreMode1)));
}
for (int i = 0; i < 13; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id" , Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedSetDocValuesField(joinField, new BytesRef("" + i)));
}
} else {
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + i)));
}
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because different index readers ",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField,
new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
}
}
}
}
public void testEquals_globalOrdinalsJoin() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()))) {
String joinField = "field";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + random().nextInt(13))));
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = RandomPicks.randomFrom(random(), scoreModes);
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = RandomPicks.randomFrom(random(), scoreModes);
final Query x;
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(),
indexSearcher, scoreMode1, ordinalMap);
assertEquals("identical calls to createJoinQuery",
x, JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(),
indexSearcher, scoreMode1, ordinalMap));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal",
x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(),
indexSearcher, scoreMode2, ordinalMap)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals",
x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name6")), new MatchAllDocsQuery(),
indexSearcher, scoreMode1, ordinalMap)));
}
for (int i = 0; i < 13; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id" , Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
doc.add(new SortedDocValuesField(joinField, new BytesRef("" + i)));
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
SortedDocValues[] values = new SortedDocValues[r.leaves().size()];
for (int i = 0; i < values.length; i++) {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because different index readers ",
x.equals(JoinUtil.createJoinQuery(joinField, new TermQuery(new Term("name", "name5")), new MatchAllDocsQuery(),
indexSearcher, scoreMode1, ordinalMap)));
}
}
}
}
public void testEquals_numericJoin() throws Exception {
final int numDocs = atLeast(random(), 50);
try (final Directory dir = newDirectory()) {
try (final RandomIndexWriter w = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()))) {
boolean multiValued = random().nextBoolean();
String joinField = multiValued ? "mvField" : "svField";
for (int id = 0; id < numDocs; id++) {
Document doc = new Document();
doc.add(new TextField("id", "" + id, Field.Store.NO));
doc.add(new TextField("name", "name" + (id % 7), Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int i = 0; i < numValues; i++) {
doc.add(new IntPoint(joinField, random().nextInt(13)));
doc.add(new SortedNumericDocValuesField(joinField, random().nextInt(13)));
}
} else {
doc.add(new IntPoint(joinField, random().nextInt(13)));
doc.add(new NumericDocValuesField(joinField, random().nextInt(13)));
}
w.addDocument(doc);
}
Set<ScoreMode> scoreModes = EnumSet.allOf(ScoreMode.class);
ScoreMode scoreMode1 = scoreModes.toArray(new ScoreMode[0])[random().nextInt(scoreModes.size())];
scoreModes.remove(scoreMode1);
ScoreMode scoreMode2 = scoreModes.toArray(new ScoreMode[0])[random().nextInt(scoreModes.size())];
final Query x;
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
x = JoinUtil.createJoinQuery(joinField, multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1);
assertEquals("identical calls to createJoinQuery",
x, JoinUtil.createJoinQuery(joinField, multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1));
assertFalse("score mode (" + scoreMode1 + " != " + scoreMode2 + "), but queries are equal",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode2)));
assertFalse("from fields (joinField != \"other_field\") but queries equals",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, "other_field",
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
assertFalse("from fields (\"other_field\" != joinField) but queries equals",
x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
assertFalse("fromQuery (name:name5 != name:name6) but queries equals",
x.equals(JoinUtil.createJoinQuery("other_field", multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name6")),
indexSearcher, scoreMode1)));
}
for (int i = 14; i < 26; i++) {
Document doc = new Document();
doc.add(new TextField("id", "new_id" , Field.Store.NO));
doc.add(new TextField("name", "name5", Field.Store.NO));
if (multiValued) {
int numValues = 1 + random().nextInt(2);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedNumericDocValuesField(joinField, i));
doc.add(new IntPoint(joinField, i));
}
} else {
doc.add(new NumericDocValuesField(joinField, i));
doc.add(new IntPoint(joinField, i));
}
w.addDocument(doc);
}
try (IndexReader r = w.getReader()) {
IndexSearcher indexSearcher = new IndexSearcher(r);
assertFalse("Query shouldn't be equal, because new join values have been indexed",
x.equals(JoinUtil.createJoinQuery(joinField, multiValued, joinField,
Integer.class, new TermQuery(new Term("name", "name5")),
indexSearcher, scoreMode1)));
}
}
}
}
@Test
@Slow
public void testSingleValueRandomJoin() throws Exception {
int maxIndexIter = atLeast(1);
int maxSearchIter = atLeast(1);
executeRandomJoin(false, maxIndexIter, maxSearchIter, TestUtil.nextInt(random(), 87, 764));
}
@Test
@Slow
// This test really takes more time, that is why the number of iterations are smaller.
public void testMultiValueRandomJoin() throws Exception {
int maxIndexIter = atLeast(1);
int maxSearchIter = atLeast(1);
executeRandomJoin(true, maxIndexIter, maxSearchIter, TestUtil.nextInt(random(), 11, 57));
}
private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception {
for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
if (VERBOSE) {
System.out.println("TEST: indexIter=" + indexIter + " numDocs=" + numberOfDocumentsToIndex);
}
IndexIterationContext context = createContext(numberOfDocumentsToIndex, multipleValuesPerDocument, false);
IndexSearcher indexSearcher = context.searcher;
if (VERBOSE) {
System.out.println("TEST: got searcher=" + indexSearcher);
}
for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
if (VERBOSE) {
System.out.println("TEST: searchIter=" + searchIter);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
{
// single val can be handled by multiple-vals
final boolean muliValsQuery = multipleValuesPerDocument || random().nextBoolean();
final String fromField = from ? "from":"to";
final String toField = from ? "to":"from";
int surpriseMe = random().nextInt(2);
switch (surpriseMe) {
case 0:
Class<? extends Number> numType;
String suffix;
if (random().nextBoolean()) {
numType = Integer.class;
suffix = "INT";
} else if (random().nextBoolean()) {
numType = Float.class;
suffix = "FLOAT";
} else if (random().nextBoolean()) {
numType = Long.class;
suffix = "LONG";
} else {
numType = Double.class;
suffix = "DOUBLE";
}
joinQuery = JoinUtil.createJoinQuery(fromField + suffix, muliValsQuery, toField + suffix, numType, actualQuery, indexSearcher, scoreMode);
break;
case 1:
joinQuery = JoinUtil.createJoinQuery(fromField, muliValsQuery, toField, actualQuery, indexSearcher, scoreMode);
break;
default:
throw new RuntimeException("unexpected value " + surpriseMe);
}
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, Integer.MAX_VALUE);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
// Asserting bit set...
assertBitSet(expectedResult, actualResult, indexSearcher);
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
context.close();
}
}
private void assertBitSet(BitSet expectedResult, BitSet actualResult, IndexSearcher indexSearcher) throws IOException {
if (VERBOSE) {
System.out.println("expected cardinality:" + expectedResult.cardinality());
DocIdSetIterator iterator = new BitSetIterator(expectedResult, expectedResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Expected doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
System.out.println("actual cardinality:" + actualResult.cardinality());
iterator = new BitSetIterator(actualResult, actualResult.cardinality());
for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
System.out.println(String.format(Locale.ROOT, "Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
}
}
assertEquals(expectedResult, actualResult);
}
private void assertTopDocs(TopDocs expectedTopDocs, TopDocs actualTopDocs, ScoreMode scoreMode, IndexSearcher indexSearcher, Query joinQuery) throws IOException {
assertEquals(expectedTopDocs.totalHits.value, actualTopDocs.totalHits.value);
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
if (scoreMode == ScoreMode.None) {
return;
}
if (VERBOSE) {
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
System.out.printf(Locale.ENGLISH, "Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
System.out.printf(Locale.ENGLISH, "Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
}
}
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue().doubleValue(), 0.0f);
}
}
private IndexIterationContext createContext(int nDocs, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
if (globalOrdinalJoin) {
assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
}
Directory dir = newDirectory();
final Random random = random();
RandomIndexWriter w = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
);
IndexIterationContext context = new IndexIterationContext();
int numRandomValues = nDocs / RandomNumbers.randomIntBetween(random, 1, 4);
context.randomUniqueValues = new String[numRandomValues];
Set<String> trackSet = new HashSet<>();
context.randomFrom = new boolean[numRandomValues];
for (int i = 0; i < numRandomValues; i++) {
String uniqueRandomValue;
do {
// the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
//
// Additionally in order to avoid precision loss when joining via a float field we can't generate values higher than
// 0xFFFFFF, so we can't use Integer#MAX_VALUE as upper bound here:
final int nextInt = random.nextInt(0xFFFFFF);
uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue,16);
} while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
// Generate unique values and empty strings aren't allowed.
trackSet.add(uniqueRandomValue);
context.randomFrom[i] = random.nextBoolean();
context.randomUniqueValues[i] = uniqueRandomValue;
}
List<String> randomUniqueValuesReplica = new ArrayList<>(Arrays.asList(context.randomUniqueValues));
RandomDoc[] docs = new RandomDoc[nDocs];
for (int i = 0; i < nDocs; i++) {
String id = Integer.toString(i);
int randomI = random.nextInt(context.randomUniqueValues.length);
String value = context.randomUniqueValues[randomI];
Document document = new Document();
document.add(newTextField(random, "id", id, Field.Store.YES));
document.add(newTextField(random, "value", value, Field.Store.NO));
boolean from = context.randomFrom[randomI];
int numberOfLinkValues = multipleValuesPerDocument ? Math.min(2 + random.nextInt(10), context.randomUniqueValues.length) : 1;
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
if (globalOrdinalJoin) {
document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
}
final List<String> subValues;
{
int start = randomUniqueValuesReplica.size()==numberOfLinkValues? 0 : random.nextInt(randomUniqueValuesReplica.size()-numberOfLinkValues);
subValues = randomUniqueValuesReplica.subList(start, start+numberOfLinkValues);
Collections.shuffle(subValues, random);
}
for (String linkValue : subValues) {
assert !docs[i].linkValues.contains(linkValue);
docs[i].linkValues.add(linkValue);
if (from) {
if (!context.fromDocuments.containsKey(linkValue)) {
context.fromDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueFromDocs.containsKey(value)) {
context.randomValueFromDocs.put(value, new ArrayList<>());
}
context.fromDocuments.get(linkValue).add(docs[i]);
context.randomValueFromDocs.get(value).add(docs[i]);
addLinkFields(random, document, "from", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
} else {
if (!context.toDocuments.containsKey(linkValue)) {
context.toDocuments.put(linkValue, new ArrayList<>());
}
if (!context.randomValueToDocs.containsKey(value)) {
context.randomValueToDocs.put(value, new ArrayList<>());
}
context.toDocuments.get(linkValue).add(docs[i]);
context.randomValueToDocs.get(value).add(docs[i]);
addLinkFields(random, document, "to", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
}
}
w.addDocument(document);
if (random.nextInt(10) == 4) {
w.commit();
}
if (VERBOSE) {
System.out.println("Added document[" + docs[i].id + "]: " + document);
}
}
if (random.nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: now force merge");
}
w.forceMerge(1);
}
w.close();
// Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
// any ScoreMode.
DirectoryReader topLevelReader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(topLevelReader);
for (int i = 0; i < context.randomUniqueValues.length; i++) {
String uniqueRandomValue = context.randomUniqueValues[i];
final String fromField;
final String toField;
final Map<String, Map<Integer, JoinScore>> queryVals;
if (context.randomFrom[i]) {
fromField = "from";
toField = "to";
queryVals = context.fromHitsToJoinScore;
} else {
fromField = "to";
toField = "from";
queryVals = context.toHitsToJoinScore;
}
final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<>();
if (multipleValuesPerDocument) {
searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {
private Scorable scorer;
private SortedSetDocValues docTermOrds;
@Override
public void collect(int doc) throws IOException {
if (doc > docTermOrds.docID()) {
docTermOrds.advance(doc);
}
if (doc == docTermOrds.docID()) {
long ord;
while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
final BytesRef joinValue = docTermOrds.lookupOrd(ord);
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
joinScore.addScore(scorer.score());
}
}
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docTermOrds = DocValues.getSortedSet(context.reader(), fromField);
}
@Override
public void setScorer(Scorable scorer) {
this.scorer = scorer;
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return org.apache.lucene.search.ScoreMode.COMPLETE;
}
});
} else {
searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {
private Scorable scorer;
private BinaryDocValues terms;
@Override
public void collect(int doc) throws IOException {
final BytesRef joinValue;
if (terms.advanceExact(doc)) {
joinValue = terms.binaryValue();
} else {
// missing;
return;
}
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
}
if (VERBOSE) {
System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
}
joinScore.addScore(scorer.score());
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
terms = DocValues.getBinary(context.reader(), fromField);
}
@Override
public void setScorer(Scorable scorer) {
this.scorer = scorer;
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return org.apache.lucene.search.ScoreMode.COMPLETE;
}
});
}
final Map<Integer, JoinScore> docToJoinScore = new HashMap<>();
if (multipleValuesPerDocument) {
Terms terms = MultiTerms.getTerms(topLevelReader, toField);
if (terms != null) {
PostingsEnum postingsEnum = null;
SortedSet<BytesRef> joinValues = new TreeSet<>();
joinValues.addAll(joinValueToJoinScores.keySet());
for (BytesRef joinValue : joinValues) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(joinValue)) {
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
// First encountered join value determines the score.
// Something to keep in mind for many-to-many relations.
if (!docToJoinScore.containsKey(doc)) {
docToJoinScore.put(doc, joinScore);
}
}
}
}
}
} else {
searcher.search(new MatchAllDocsQuery(), new SimpleCollector() {
private BinaryDocValues terms;
private int docBase;
@Override
public void collect(int doc) throws IOException {
final BytesRef joinValue;
if (terms.advanceExact(doc)) {
joinValue = terms.binaryValue();
} else {
// missing;
joinValue = new BytesRef(BytesRef.EMPTY_BYTES);
}
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
if (joinScore == null) {
return;
}
docToJoinScore.put(docBase + doc, joinScore);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
terms = DocValues.getBinary(context.reader(), toField);
docBase = context.docBase;
}
@Override
public void setScorer(Scorable scorer) {
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES;
}
});
}
queryVals.put(uniqueRandomValue, docToJoinScore);
}
if (globalOrdinalJoin) {
SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
}
context.searcher = searcher;
context.dir = dir;
return context;
}
private void addLinkFields(final Random random, Document document, final String fieldName, String linkValue,
boolean multipleValuesPerDocument, boolean globalOrdinalJoin) {
document.add(newTextField(random, fieldName, linkValue, Field.Store.NO));
final int linkInt = Integer.parseUnsignedInt(linkValue,16);
document.add(new IntPoint(fieldName + "INT", linkInt));
document.add(new FloatPoint(fieldName + "FLOAT", linkInt));
final long linkLong = linkInt<<32 | linkInt;
document.add(new LongPoint(fieldName + "LONG", linkLong));
document.add(new DoublePoint(fieldName + "DOUBLE", linkLong));
if (multipleValuesPerDocument) {
document.add(new SortedSetDocValuesField(fieldName, new BytesRef(linkValue)));
document.add(new SortedNumericDocValuesField(fieldName+ "INT", linkInt));
document.add(new SortedNumericDocValuesField(fieldName+ "FLOAT", Float.floatToRawIntBits(linkInt)));
document.add(new SortedNumericDocValuesField(fieldName+ "LONG", linkLong));
document.add(new SortedNumericDocValuesField(fieldName+ "DOUBLE", Double.doubleToRawLongBits(linkLong)));
} else {
document.add(new SortedDocValuesField(fieldName, new BytesRef(linkValue)));
document.add(new NumericDocValuesField(fieldName+ "INT", linkInt));
document.add(new FloatDocValuesField(fieldName+ "FLOAT", linkInt));
document.add(new NumericDocValuesField(fieldName+ "LONG", linkLong));
document.add(new DoubleDocValuesField(fieldName+ "DOUBLE", linkLong));
}
if (globalOrdinalJoin) {
document.add(new SortedDocValuesField("join_field", new BytesRef(linkValue)));
}
}
private TopDocs createExpectedTopDocs(String queryValue,
final boolean from,
final ScoreMode scoreMode,
IndexIterationContext context) {
Map<Integer, JoinScore> hitsToJoinScores;
if (from) {
hitsToJoinScores = context.fromHitsToJoinScore.get(queryValue);
} else {
hitsToJoinScores = context.toHitsToJoinScore.get(queryValue);
}
List<Map.Entry<Integer,JoinScore>> hits = new ArrayList<>(hitsToJoinScores.entrySet());
Collections.sort(hits, new Comparator<Map.Entry<Integer, JoinScore>>() {
@Override
public int compare(Map.Entry<Integer, JoinScore> hit1, Map.Entry<Integer, JoinScore> hit2) {
float score1 = hit1.getValue().score(scoreMode);
float score2 = hit2.getValue().score(scoreMode);
int cmp = Float.compare(score2, score1);
if (cmp != 0) {
return cmp;
}
return hit1.getKey() - hit2.getKey();
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(10, hits.size())];
for (int i = 0; i < scoreDocs.length; i++) {
Map.Entry<Integer,JoinScore> hit = hits.get(i);
scoreDocs[i] = new ScoreDoc(hit.getKey(), hit.getValue().score(scoreMode));
}
return new TopDocs(new TotalHits(hits.size(), TotalHits.Relation.EQUAL_TO), scoreDocs);
}
private BitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
final Map<String, List<RandomDoc>> randomValueDocs;
final Map<String, List<RandomDoc>> linkValueDocuments;
if (from) {
randomValueDocs = context.randomValueFromDocs;
linkValueDocuments = context.toDocuments;
} else {
randomValueDocs = context.randomValueToDocs;
linkValueDocuments = context.fromDocuments;
}
BitSet expectedResult = new FixedBitSet(topLevelReader.maxDoc());
List<RandomDoc> matchingDocs = randomValueDocs.get(queryValue);
if (matchingDocs == null) {
return new FixedBitSet(topLevelReader.maxDoc());
}
for (RandomDoc matchingDoc : matchingDocs) {
for (String linkValue : matchingDoc.linkValues) {
List<RandomDoc> otherMatchingDocs = linkValueDocuments.get(linkValue);
if (otherMatchingDocs == null) {
continue;
}
for (RandomDoc otherSideDoc : otherMatchingDocs) {
PostingsEnum postingsEnum = MultiTerms.getTermPostingsEnum(topLevelReader, "id", new BytesRef(otherSideDoc.id), 0);
assert postingsEnum != null;
int doc = postingsEnum.nextDoc();
expectedResult.set(doc);
}
}
}
return expectedResult;
}
private static class IndexIterationContext {
String[] randomUniqueValues;
boolean[] randomFrom;
Map<String, List<RandomDoc>> fromDocuments = new HashMap<>();
Map<String, List<RandomDoc>> toDocuments = new HashMap<>();
Map<String, List<RandomDoc>> randomValueFromDocs = new HashMap<>();
Map<String, List<RandomDoc>> randomValueToDocs = new HashMap<>();
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
OrdinalMap ordinalMap;
Directory dir;
IndexSearcher searcher;
void close() throws IOException {
searcher.getIndexReader().close();
dir.close();
}
}
private static class RandomDoc {
final String id;
final List<String> linkValues;
final String value;
final boolean from;
private RandomDoc(String id, int numberOfLinkValues, String value, boolean from) {
this.id = id;
this.from = from;
linkValues = new ArrayList<>(numberOfLinkValues);
this.value = value;
}
}
private static class JoinScore {
float minScore = Float.POSITIVE_INFINITY;
float maxScore = Float.NEGATIVE_INFINITY;
float total;
int count;
void addScore(float score) {
if (score > maxScore) {
maxScore = score;
}
if (score < minScore) {
minScore = score;
}
total += score;
count++;
}
float score(ScoreMode mode) {
switch (mode) {
case None:
return 1f;
case Total:
return total;
case Avg:
return total / count;
case Min:
return minScore;
case Max:
return maxScore;
}
throw new IllegalArgumentException("Unsupported ScoreMode: " + mode);
}
}
private static class BitSetCollector extends SimpleCollector {
private final BitSet bitSet;
private int docBase;
private BitSetCollector(BitSet bitSet) {
this.bitSet = bitSet;
}
@Override
public void collect(int doc) throws IOException {
bitSet.set(docBase + doc);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public org.apache.lucene.search.ScoreMode scoreMode() {
return org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES;
}
}
}