blob: c8888becea32b0cded30e277c09e244f2f22b535 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestMatchesIterator extends LuceneTestCase {
protected IndexSearcher searcher;
protected Directory directory;
protected IndexReader reader = null;
private static final String FIELD_WITH_OFFSETS = "field_offsets";
private static final String FIELD_NO_OFFSETS = "field_no_offsets";
private static final String FIELD_DOCS_ONLY = "field_docs_only";
private static final String FIELD_FREQS = "field_freqs";
private static final String FIELD_POINT = "field_point";
private static final FieldType OFFSETS = new FieldType(TextField.TYPE_STORED);
static {
OFFSETS.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
private static final FieldType DOCS = new FieldType(TextField.TYPE_STORED);
static {
DOCS.setIndexOptions(IndexOptions.DOCS);
}
private static final FieldType DOCS_AND_FREQS = new FieldType(TextField.TYPE_STORED);
static {
DOCS_AND_FREQS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
super.tearDown();
}
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
RandomIndexWriter writer =
new RandomIndexWriter(
random(),
directory,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(newField(FIELD_WITH_OFFSETS, docFields[i], OFFSETS));
doc.add(newField(FIELD_NO_OFFSETS, docFields[i], TextField.TYPE_STORED));
doc.add(newField(FIELD_DOCS_ONLY, docFields[i], DOCS));
doc.add(newField(FIELD_FREQS, docFields[i], DOCS_AND_FREQS));
doc.add(new IntPoint(FIELD_POINT, 10));
doc.add(new NumericDocValuesField(FIELD_POINT, 10));
doc.add(new NumericDocValuesField("id", i));
doc.add(newField("id", Integer.toString(i), TextField.TYPE_STORED));
writer.addDocument(doc);
}
writer.forceMerge(1);
reader = writer.getReader();
writer.close();
searcher = newSearcher(getOnlyLeafReader(reader));
}
protected String[] docFields = {
"w1 w2 w3 w4 w5",
"w1 w3 w2 w3 zz",
"w1 xx w2 yy w4",
"w1 w2 w1 w4 w2 w3",
"a phrase sentence with many phrase sentence iterations of a phrase sentence",
"nothing matches this document"
};
private void checkMatches(Query q, String field, int[][] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
int doc = expected[i][0] - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals(expected[i].length, 1);
continue;
}
MatchesIterator it = matches.getMatches(field);
if (expected[i].length == 1) {
assertNull(it);
continue;
}
checkFieldMatches(it, expected[i]);
checkFieldMatches(matches.getMatches(field), expected[i]); // test multiple calls
}
}
private void checkLabelCount(Query q, String field, int[] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals("Expected to get matches on document " + i, 0, expected[i]);
continue;
}
MatchesIterator it = matches.getMatches(field);
if (expected[i] == 0) {
assertNull(it);
continue;
} else {
assertNotNull(it);
}
IdentityHashMap<Query, Integer> labels = new IdentityHashMap<>();
while (it.next()) {
labels.put(it.getQuery(), 1);
}
assertEquals(expected[i], labels.size());
}
}
private void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
int pos = 1;
while (it.next()) {
// System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos +
// 2] + "->" + expected[i][pos + 3] + "]");
assertEquals("Wrong start position", expected[pos], it.startPosition());
assertEquals("Wrong end position", expected[pos + 1], it.endPosition());
assertEquals("Wrong start offset", expected[pos + 2], it.startOffset());
assertEquals("Wrong end offset", expected[pos + 3], it.endOffset());
pos += 4;
}
assertEquals(expected.length, pos);
}
private void checkNoPositionsMatches(Query q, String field, boolean[] expected)
throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (expected[i]) {
MatchesIterator mi = matches.getMatches(field);
assertTrue(mi.next());
assertEquals(-1, mi.startPosition());
while (mi.next()) {
assertEquals(-1, mi.startPosition());
}
} else {
assertNull(matches);
}
}
}
private void checkSubMatches(Query q, String[][] expectedNames) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
for (int i = 0; i < expectedNames.length; i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals("Expected to get no matches on document " + i, 0, expectedNames[i].length);
continue;
}
Set<String> expectedQueries = new HashSet<>(Arrays.asList(expectedNames[i]));
Set<String> actualQueries =
NamedMatches.findNamedMatches(matches).stream()
.map(NamedMatches::getName)
.collect(Collectors.toSet());
Set<String> unexpected = new HashSet<>(actualQueries);
unexpected.removeAll(expectedQueries);
assertEquals("Unexpected matching leaf queries: " + unexpected, 0, unexpected.size());
Set<String> missing = new HashSet<>(expectedQueries);
missing.removeAll(actualQueries);
assertEquals("Missing matching leaf queries: " + missing, 0, missing.size());
}
}
private void assertIsLeafMatch(Query q, String field) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < searcher.reader.maxDoc(); i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
return;
}
MatchesIterator mi = matches.getMatches(field);
if (mi == null) {
return;
}
while (mi.next()) {
assertNull(mi.getSubMatches());
}
}
}
private void checkTermMatches(Query q, String field, TermMatch[][][] expected)
throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals(expected[i].length, 0);
continue;
}
MatchesIterator it = matches.getMatches(field);
if (expected[i].length == 0) {
assertNull(it);
continue;
}
checkTerms(expected[i], it);
}
}
private void checkTerms(TermMatch[][] expected, MatchesIterator it) throws IOException {
int upTo = 0;
while (it.next()) {
Set<TermMatch> expectedMatches = new HashSet<>(Arrays.asList(expected[upTo]));
MatchesIterator submatches = it.getSubMatches();
while (submatches.next()) {
TermMatch tm =
new TermMatch(
submatches.startPosition(), submatches.startOffset(), submatches.endOffset());
if (expectedMatches.remove(tm) == false) {
fail("Unexpected term match: " + tm);
}
}
if (expectedMatches.size() != 0) {
fail(
"Missing term matches: "
+ expectedMatches.stream().map(Object::toString).collect(Collectors.joining(", ")));
}
upTo++;
}
if (upTo < expected.length - 1) {
fail("Missing expected match");
}
}
static class TermMatch {
public final int position;
public final int startOffset;
public final int endOffset;
public TermMatch(PostingsEnum pe, int position) throws IOException {
this.position = position;
this.startOffset = pe.startOffset();
this.endOffset = pe.endOffset();
}
public TermMatch(int position, int startOffset, int endOffset) {
this.position = position;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TermMatch termMatch = (TermMatch) o;
return position == termMatch.position
&& startOffset == termMatch.startOffset
&& endOffset == termMatch.endOffset;
}
@Override
public int hashCode() {
return Objects.hash(position, startOffset, endOffset);
}
@Override
public String toString() {
return position + "[" + startOffset + "->" + endOffset + "]";
}
}
public void testTermQuery() throws IOException {
Term t = new Term(FIELD_WITH_OFFSETS, "w1");
Query q = NamedMatches.wrapQuery("q", new TermQuery(t));
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2},
{1, 0, 0, 0, 2},
{2, 0, 0, 0, 2},
{3, 0, 0, 0, 2, 2, 2, 6, 8},
{4}
});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[] {1, 1, 1, 1, 0, 0});
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
checkSubMatches(q, new String[][] {{"q"}, {"q"}, {"q"}, {"q"}, {}, {}});
}
public void testTermQueryNoStoredOffsets() throws IOException {
Query q = new TermQuery(new Term(FIELD_NO_OFFSETS, "w1"));
checkMatches(
q,
FIELD_NO_OFFSETS,
new int[][] {
{0, 0, 0, -1, -1},
{1, 0, 0, -1, -1},
{2, 0, 0, -1, -1},
{3, 0, 0, -1, -1, 2, 2, -1, -1},
{4}
});
}
public void testTermQueryNoPositions() throws IOException {
for (String field : new String[] {FIELD_DOCS_ONLY, FIELD_FREQS}) {
Query q = new TermQuery(new Term(field, "w1"));
checkNoPositionsMatches(q, field, new boolean[] {true, true, true, true, false});
}
}
public void testDisjunction() throws IOException {
Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")));
Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")));
Query q =
new BooleanQuery.Builder()
.add(w1, BooleanClause.Occur.SHOULD)
.add(w3, BooleanClause.Occur.SHOULD)
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2, 2, 2, 6, 8},
{1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11},
{2, 0, 0, 0, 2},
{3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17},
{4}
});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[] {2, 2, 1, 2, 0, 0});
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
checkSubMatches(q, new String[][] {{"w1", "w3"}, {"w1", "w3"}, {"w1"}, {"w1", "w3"}, {}, {}});
}
public void testDisjunctionNoPositions() throws IOException {
for (String field : new String[] {FIELD_DOCS_ONLY, FIELD_FREQS}) {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, true, true, true, false});
}
}
public void testReqOpt() throws IOException {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2, 2, 2, 6, 8},
{1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11},
{2},
{3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17},
{4}
});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[] {2, 2, 0, 2, 0, 0});
}
public void testReqOptNoPositions() throws IOException {
for (String field : new String[] {FIELD_DOCS_ONLY, FIELD_FREQS}) {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, true, false, true, false});
}
}
public void testMinShouldMatch() throws IOException {
Query w1 = NamedMatches.wrapQuery("w1", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")));
Query w3 = NamedMatches.wrapQuery("w3", new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")));
Query w4 = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4"));
Query xx = NamedMatches.wrapQuery("xx", new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")));
Query q =
new BooleanQuery.Builder()
.add(w3, BooleanClause.Occur.SHOULD)
.add(
new BooleanQuery.Builder()
.add(w1, BooleanClause.Occur.SHOULD)
.add(w4, BooleanClause.Occur.SHOULD)
.add(xx, BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build(),
BooleanClause.Occur.SHOULD)
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11},
{1, 1, 1, 3, 5, 3, 3, 9, 11},
{2, 0, 0, 0, 2, 1, 1, 3, 5, 4, 4, 12, 14},
{3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17},
{4}
});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[] {3, 1, 3, 3, 0, 0});
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
checkSubMatches(q, new String[][] {{"w1", "w3"}, {"w3"}, {"w1", "xx"}, {"w1", "w3"}, {}, {}});
}
public void testMinShouldMatchNoPositions() throws IOException {
for (String field : new String[] {FIELD_FREQS, FIELD_DOCS_ONLY}) {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.add(
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build(),
BooleanClause.Occur.SHOULD)
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, true, true, true, false});
}
}
public void testExclusion() throws IOException {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "zz")), BooleanClause.Occur.MUST_NOT)
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 2, 2, 6, 8},
{1},
{2},
{3, 5, 5, 15, 17},
{4}
});
}
public void testExclusionNoPositions() throws IOException {
for (String field : new String[] {FIELD_FREQS, FIELD_DOCS_ONLY}) {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.MUST_NOT)
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, false, false, true, false});
}
}
public void testConjunction() throws IOException {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.MUST)
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 2, 2, 6, 8, 3, 3, 9, 11},
{1},
{2},
{3, 3, 3, 9, 11, 5, 5, 15, 17},
{4}
});
}
public void testConjunctionNoPositions() throws IOException {
for (String field : new String[] {FIELD_FREQS, FIELD_DOCS_ONLY}) {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.MUST)
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, false, false, true, false});
}
}
public void testWildcards() throws IOException {
Query q = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "x"));
checkMatches(q, FIELD_WITH_OFFSETS, new int[][] {{0}, {1}, {2, 1, 1, 3, 5}, {3}, {4}});
Query rq = new RegexpQuery(new Term(FIELD_WITH_OFFSETS, "w[1-2]"));
checkMatches(
rq,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2, 1, 1, 3, 5},
{1, 0, 0, 0, 2, 2, 2, 6, 8},
{2, 0, 0, 0, 2, 2, 2, 6, 8},
{3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14},
{4}
});
checkLabelCount(rq, FIELD_WITH_OFFSETS, new int[] {1, 1, 1, 1, 0});
assertIsLeafMatch(rq, FIELD_WITH_OFFSETS);
}
public void testNoMatchWildcards() throws IOException {
Query nomatch = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "wibble"));
Matches matches =
searcher
.createWeight(searcher.rewrite(nomatch), ScoreMode.COMPLETE_NO_SCORES, 1)
.matches(searcher.leafContexts.get(0), 0);
assertNull(matches);
}
public void testWildcardsNoPositions() throws IOException {
for (String field : new String[] {FIELD_FREQS, FIELD_DOCS_ONLY}) {
Query q = new PrefixQuery(new Term(field, "x"));
checkNoPositionsMatches(q, field, new boolean[] {false, false, true, false, false});
}
}
public void testSynonymQuery() throws IOException {
Query q =
new SynonymQuery.Builder(FIELD_WITH_OFFSETS)
.addTerm(new Term(FIELD_WITH_OFFSETS, "w1"))
.addTerm(new Term(FIELD_WITH_OFFSETS, "w2"))
.build();
checkMatches(
q,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2, 1, 1, 3, 5},
{1, 0, 0, 0, 2, 2, 2, 6, 8},
{2, 0, 0, 0, 2, 2, 2, 6, 8},
{3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14},
{4}
});
assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
}
public void testSynonymQueryNoPositions() throws IOException {
for (String field : new String[] {FIELD_FREQS, FIELD_DOCS_ONLY}) {
Query q =
new SynonymQuery.Builder(field)
.addTerm(new Term(field, "w1"))
.addTerm(new Term(field, "w2"))
.build();
checkNoPositionsMatches(q, field, new boolean[] {true, true, true, true, false});
}
}
public void testMultipleFields() throws IOException {
Query q =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("id", "1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.build();
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
LeafReaderContext ctx =
searcher.leafContexts.get(ReaderUtil.subIndex(1, searcher.leafContexts));
Matches m = w.matches(ctx, 1 - ctx.docBase);
assertNotNull(m);
checkFieldMatches(m.getMatches("id"), new int[] {-1, 0, 0, -1, -1});
checkFieldMatches(m.getMatches(FIELD_WITH_OFFSETS), new int[] {-1, 1, 1, 3, 5, 3, 3, 9, 11});
assertNull(m.getMatches("bogus"));
Set<String> fields = new HashSet<>();
for (String field : m) {
fields.add(field);
}
assertEquals(2, fields.size());
assertTrue(fields.contains(FIELD_WITH_OFFSETS));
assertTrue(fields.contains("id"));
assertEquals(2, AssertingMatches.unWrap(m).getSubMatches().size());
}
// 0 1 2 3 4 5 6 7
// "a phrase sentence with many phrase sentence iterations of a phrase sentence",
public void testSloppyPhraseQueryWithRepeats() throws IOException {
Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
PhraseQuery pq = new PhraseQuery(10, FIELD_WITH_OFFSETS, "phrase", "sentence", "sentence");
checkMatches(
pq,
FIELD_WITH_OFFSETS,
new int[][] {
{0}, {1}, {2}, {3}, {4, 1, 6, 2, 43, 2, 11, 9, 75, 5, 11, 28, 75, 6, 11, 35, 75}
});
checkLabelCount(pq, FIELD_WITH_OFFSETS, new int[] {0, 0, 0, 0, 1});
assertIsLeafMatch(pq, FIELD_WITH_OFFSETS);
}
public void testSloppyPhraseQuery() throws IOException {
PhraseQuery pq = new PhraseQuery(4, FIELD_WITH_OFFSETS, "a", "sentence");
checkMatches(
pq,
FIELD_WITH_OFFSETS,
new int[][] {{0}, {1}, {2}, {3}, {4, 0, 2, 0, 17, 6, 9, 35, 59, 9, 11, 58, 75}});
assertIsLeafMatch(pq, FIELD_WITH_OFFSETS);
}
public void testExactPhraseQuery() throws IOException {
PhraseQuery pq = new PhraseQuery(FIELD_WITH_OFFSETS, "phrase", "sentence");
checkMatches(
pq,
FIELD_WITH_OFFSETS,
new int[][] {{0}, {1}, {2}, {3}, {4, 1, 2, 2, 17, 5, 6, 28, 43, 10, 11, 60, 75}});
Term a = new Term(FIELD_WITH_OFFSETS, "a");
Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
PhraseQuery pq2 = new PhraseQuery.Builder().add(a).add(s, 2).build();
checkMatches(
pq2, FIELD_WITH_OFFSETS, new int[][] {{0}, {1}, {2}, {3}, {4, 0, 2, 0, 17, 9, 11, 58, 75}});
assertIsLeafMatch(pq2, FIELD_WITH_OFFSETS);
}
// 0 1 2 3 4 5 6 7
// "a phrase sentence with many phrase sentence iterations of a phrase sentence",
public void testSloppyMultiPhraseQuery() throws IOException {
Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
Term i = new Term(FIELD_WITH_OFFSETS, "iterations");
MultiPhraseQuery mpq =
new MultiPhraseQuery.Builder().add(p).add(new Term[] {s, i}).setSlop(4).build();
checkMatches(
mpq,
FIELD_WITH_OFFSETS,
new int[][] {
{0}, {1}, {2}, {3}, {4, 1, 2, 2, 17, 5, 6, 28, 43, 5, 7, 28, 54, 10, 11, 60, 75}
});
assertIsLeafMatch(mpq, FIELD_WITH_OFFSETS);
}
public void testExactMultiPhraseQuery() throws IOException {
MultiPhraseQuery mpq =
new MultiPhraseQuery.Builder()
.add(new Term(FIELD_WITH_OFFSETS, "sentence"))
.add(
new Term[] {
new Term(FIELD_WITH_OFFSETS, "with"), new Term(FIELD_WITH_OFFSETS, "iterations")
})
.build();
checkMatches(
mpq, FIELD_WITH_OFFSETS, new int[][] {{0}, {1}, {2}, {3}, {4, 2, 3, 9, 22, 6, 7, 35, 54}});
MultiPhraseQuery mpq2 =
new MultiPhraseQuery.Builder()
.add(
new Term[] {
new Term(FIELD_WITH_OFFSETS, "a"), new Term(FIELD_WITH_OFFSETS, "many")
})
.add(new Term(FIELD_WITH_OFFSETS, "phrase"))
.build();
checkMatches(
mpq2,
FIELD_WITH_OFFSETS,
new int[][] {{0}, {1}, {2}, {3}, {4, 0, 1, 0, 8, 4, 5, 23, 34, 9, 10, 58, 66}});
assertIsLeafMatch(mpq2, FIELD_WITH_OFFSETS);
}
// 0 1 2 3 4 5 6 7
// "a phrase sentence with many phrase sentence iterations of a phrase sentence",
public void testSpanQuery() throws IOException {
SpanQuery subq =
SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "with")))
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "many")))
.build();
Query q =
SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
.addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "sentence")))
.addClause(
new SpanOrQuery(
subq, new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "iterations"))))
.build();
checkMatches(
q, FIELD_WITH_OFFSETS, new int[][] {{0}, {1}, {2}, {3}, {4, 2, 4, 9, 27, 6, 7, 35, 54}});
checkLabelCount(q, FIELD_WITH_OFFSETS, new int[] {0, 0, 0, 0, 1});
checkTermMatches(
q,
FIELD_WITH_OFFSETS,
new TermMatch[][][] {
{},
{},
{},
{},
{
{new TermMatch(2, 9, 17), new TermMatch(3, 18, 22), new TermMatch(4, 23, 27)},
{new TermMatch(6, 35, 43), new TermMatch(7, 44, 54)}
}
});
}
public void testPointQuery() throws IOException {
IndexOrDocValuesQuery pointQuery =
new IndexOrDocValuesQuery(
IntPoint.newExactQuery(FIELD_POINT, 10),
NumericDocValuesField.newSlowExactQuery(FIELD_POINT, 10));
Term t = new Term(FIELD_WITH_OFFSETS, "w1");
Query query =
new BooleanQuery.Builder()
.add(new TermQuery(t), BooleanClause.Occur.MUST)
.add(pointQuery, BooleanClause.Occur.MUST)
.build();
checkMatches(pointQuery, FIELD_WITH_OFFSETS, new int[][] {});
checkMatches(
query,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2},
{1, 0, 0, 0, 2},
{2, 0, 0, 0, 2},
{3, 0, 0, 0, 2, 2, 2, 6, 8},
{4}
});
pointQuery =
new IndexOrDocValuesQuery(
IntPoint.newExactQuery(FIELD_POINT, 11),
NumericDocValuesField.newSlowExactQuery(FIELD_POINT, 11));
query =
new BooleanQuery.Builder()
.add(new TermQuery(t), BooleanClause.Occur.MUST)
.add(pointQuery, BooleanClause.Occur.MUST)
.build();
checkMatches(query, FIELD_WITH_OFFSETS, new int[][] {});
query =
new BooleanQuery.Builder()
.add(new TermQuery(t), BooleanClause.Occur.MUST)
.add(pointQuery, BooleanClause.Occur.SHOULD)
.build();
checkMatches(
query,
FIELD_WITH_OFFSETS,
new int[][] {
{0, 0, 0, 0, 2},
{1, 0, 0, 0, 2},
{2, 0, 0, 0, 2},
{3, 0, 0, 0, 2, 2, 2, 6, 8},
{4}
});
}
public void testMinimalSeekingWithWildcards() throws IOException {
SeekCountingLeafReader reader = new SeekCountingLeafReader(getOnlyLeafReader(this.reader));
this.searcher = new IndexSearcher(reader);
Query query = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "w"));
Weight w = searcher.createWeight(query.rewrite(reader), ScoreMode.COMPLETE, 1);
// docs 0-3 match several different terms here, but we only seek to the first term and
// then short-cut return; other terms are ignored until we try and iterate over matches
int[] expectedSeeks = new int[] {1, 1, 1, 1, 6, 6};
int i = 0;
for (LeafReaderContext ctx : reader.leaves()) {
for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) {
reader.seeks = 0;
w.matches(ctx, doc);
assertEquals("Unexpected seek count on doc " + doc, expectedSeeks[i], reader.seeks);
i++;
}
}
}
public void testFromSubIteratorsMethod() throws IOException {
class CountIterator implements MatchesIterator {
private int count;
private int max;
CountIterator(int count) {
this.count = count;
this.max = count;
}
@Override
public boolean next() throws IOException {
if (count == 0) {
return false;
} else {
count--;
return true;
}
}
@Override
public int startPosition() {
return max - count;
}
@Override
public int endPosition() {
return max - count;
}
@Override
public int startOffset() throws IOException {
throw new AssertionError();
}
@Override
public int endOffset() throws IOException {
throw new AssertionError();
}
@Override
public MatchesIterator getSubMatches() throws IOException {
throw new AssertionError();
}
@Override
public Query getQuery() {
throw new AssertionError();
}
}
int[][] checks = {
{0}, {1}, {0, 0}, {0, 1}, {1, 0}, {1, 1}, {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {1, 0, 0},
{1, 0, 1}, {1, 1, 0}, {1, 1, 1},
};
for (int[] counts : checks) {
List<MatchesIterator> its =
IntStream.of(counts).mapToObj(CountIterator::new).collect(Collectors.toList());
int expectedCount = IntStream.of(counts).sum();
MatchesIterator merged = DisjunctionMatchesIterator.fromSubIterators(its);
int actualCount = 0;
while (merged.next()) {
actualCount++;
}
assertEquals(
"Sub-iterator count is not right for: " + Arrays.toString(counts),
expectedCount,
actualCount);
}
}
private static class SeekCountingLeafReader extends FilterLeafReader {
int seeks = 0;
public SeekCountingLeafReader(LeafReader in) {
super(in);
}
@Override
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
if (terms == null) {
return null;
}
return new FilterTerms(terms) {
@Override
public TermsEnum iterator() throws IOException {
return new FilterTermsEnum(super.iterator()) {
@Override
public boolean seekExact(BytesRef text) throws IOException {
seeks++;
return super.seekExact(text);
}
};
}
};
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}
}