| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.spans; |
| |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.CheckHits; |
| import org.apache.lucene.search.FuzzyQuery; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.PrefixQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.ScoreMode; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| import static org.apache.lucene.search.spans.SpanTestUtil.assertFinished; |
| import static org.apache.lucene.search.spans.SpanTestUtil.assertNext; |
| import static org.apache.lucene.search.spans.SpanTestUtil.spanNearOrderedQuery; |
| import static org.apache.lucene.search.spans.SpanTestUtil.spanNearUnorderedQuery; |
| import static org.apache.lucene.search.spans.SpanTestUtil.spanNotQuery; |
| import static org.apache.lucene.search.spans.SpanTestUtil.spanOrQuery; |
| import static org.apache.lucene.search.spans.SpanTestUtil.spanTermQuery; |
| |
| public class TestSpans extends LuceneTestCase { |
| private IndexSearcher searcher; |
| private IndexReader reader; |
| private Directory directory; |
| |
| public static final String field = "field"; |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| directory = newDirectory(); |
| RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| for (int i = 0; i < docFields.length; i++) { |
| Document doc = new Document(); |
| doc.add(newTextField(field, docFields[i], Field.Store.YES)); |
| writer.addDocument(doc); |
| } |
| writer.forceMerge(1); |
| reader = writer.getReader(); |
| writer.close(); |
| searcher = newSearcher(getOnlyLeafReader(reader)); |
| } |
| |
| @Override |
| public void tearDown() throws Exception { |
| reader.close(); |
| directory.close(); |
| super.tearDown(); |
| } |
| |
| private String[] docFields = { |
| "w1 w2 w3 w4 w5", |
| "w1 w3 w2 w3", |
| "w1 xx w2 yy w3", |
| "w1 w3 xx w2 yy w3", |
| "u2 u2 u1", |
| "u2 xx u2 u1", |
| "u2 u2 xx u1", |
| "u2 xx u2 yy u1", |
| "u2 xx u1 u2", |
| "u2 u1 xx u2", |
| "u1 u2 xx u2", |
| "t1 t2 t1 t3 t2 t3", |
| "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx", |
| "r1 s11", |
| "r1 s21" |
| }; |
| |
| private void checkHits(Query query, int[] results) throws IOException { |
| CheckHits.checkHits(random(), query, field, searcher, results); |
| } |
| |
| private void orderedSlopTest3SQ( |
| SpanQuery q1, |
| SpanQuery q2, |
| SpanQuery q3, |
| int slop, |
| int[] expectedDocs) throws IOException { |
| SpanQuery query = spanNearOrderedQuery(slop, q1, q2, q3); |
| checkHits(query, expectedDocs); |
| } |
| |
| public void orderedSlopTest3(int slop, int[] expectedDocs) throws IOException { |
| orderedSlopTest3SQ( |
| spanTermQuery(field, "w1"), |
| spanTermQuery(field, "w2"), |
| spanTermQuery(field, "w3"), |
| slop, |
| expectedDocs); |
| } |
| |
| public void orderedSlopTest3Equal(int slop, int[] expectedDocs) throws IOException { |
| orderedSlopTest3SQ( |
| spanTermQuery(field, "w1"), |
| spanTermQuery(field, "w3"), |
| spanTermQuery(field, "w3"), |
| slop, |
| expectedDocs); |
| } |
| |
| public void orderedSlopTest1Equal(int slop, int[] expectedDocs) throws IOException { |
| orderedSlopTest3SQ( |
| spanTermQuery(field, "u2"), |
| spanTermQuery(field, "u2"), |
| spanTermQuery(field, "u1"), |
| slop, |
| expectedDocs); |
| } |
| |
| public void testSpanNearOrdered01() throws Exception { |
| orderedSlopTest3(0, new int[] {0}); |
| } |
| |
| public void testSpanNearOrdered02() throws Exception { |
| orderedSlopTest3(1, new int[] {0,1}); |
| } |
| |
| public void testSpanNearOrdered03() throws Exception { |
| orderedSlopTest3(2, new int[] {0,1,2}); |
| } |
| |
| public void testSpanNearOrdered04() throws Exception { |
| orderedSlopTest3(3, new int[] {0,1,2,3}); |
| } |
| |
| public void testSpanNearOrdered05() throws Exception { |
| orderedSlopTest3(4, new int[] {0,1,2,3}); |
| } |
| |
| public void testSpanNearOrderedEqual01() throws Exception { |
| orderedSlopTest3Equal(0, new int[] {}); |
| } |
| |
| public void testSpanNearOrderedEqual02() throws Exception { |
| orderedSlopTest3Equal(1, new int[] {1}); |
| } |
| |
| public void testSpanNearOrderedEqual03() throws Exception { |
| orderedSlopTest3Equal(2, new int[] {1}); |
| } |
| |
| public void testSpanNearOrderedEqual04() throws Exception { |
| orderedSlopTest3Equal(3, new int[] {1,3}); |
| } |
| |
| public void testSpanNearOrderedEqual11() throws Exception { |
| orderedSlopTest1Equal(0, new int[] {4}); |
| } |
| |
| public void testSpanNearOrderedEqual12() throws Exception { |
| orderedSlopTest1Equal(0, new int[] {4}); |
| } |
| |
| public void testSpanNearOrderedEqual13() throws Exception { |
| orderedSlopTest1Equal(1, new int[] {4,5,6}); |
| } |
| |
| public void testSpanNearOrderedEqual14() throws Exception { |
| orderedSlopTest1Equal(2, new int[] {4,5,6,7}); |
| } |
| |
| public void testSpanNearOrderedEqual15() throws Exception { |
| orderedSlopTest1Equal(3, new int[] {4,5,6,7}); |
| } |
| |
| public void testSpanNearOrderedOverlap() throws Exception { |
| final SpanQuery query = spanNearOrderedQuery(field, 1, "t1", "t2", "t3"); |
| |
| Spans spans = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); |
| |
| assertEquals("first doc", 11, spans.nextDoc()); |
| assertEquals("first start", 0, spans.nextStartPosition()); |
| assertEquals("first end", 4, spans.endPosition()); |
| |
| assertEquals("second start", 2, spans.nextStartPosition()); |
| assertEquals("second end", 6, spans.endPosition()); |
| |
| assertFinished(spans); |
| } |
| |
| public void testSpanNearUnOrdered() throws Exception { |
| //See http://www.gossamer-threads.com/lists/lucene/java-dev/52270 for discussion about this test |
| SpanQuery senq = spanNearUnorderedQuery(field, 0, "u1", "u2"); |
| Spans spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); |
| assertNext(spans, 4, 1, 3); |
| assertNext(spans, 5, 2, 4); |
| assertNext(spans, 8, 2, 4); |
| assertNext(spans, 9, 0, 2); |
| assertNext(spans, 10, 0, 2); |
| assertFinished(spans); |
| |
| senq = spanNearUnorderedQuery(1, senq, spanTermQuery(field, "u2")); |
| spans = senq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); |
| assertNext(spans, 4, 0, 3); |
| assertNext(spans, 4, 1, 3); // unordered spans can be subsets |
| assertNext(spans, 5, 0, 4); |
| assertNext(spans, 5, 2, 4); |
| assertNext(spans, 8, 0, 4); |
| assertNext(spans, 8, 2, 4); |
| assertNext(spans, 9, 0, 2); |
| assertNext(spans, 9, 0, 4); |
| assertNext(spans, 10, 0, 2); |
| assertFinished(spans); |
| } |
| |
| private Spans orSpans(String[] terms) throws Exception { |
| return spanOrQuery(field, terms).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); |
| } |
| |
| public void testSpanOrEmpty() throws Exception { |
| Spans spans = orSpans(new String[0]); |
| assertFinished(spans); |
| } |
| |
| public void testSpanOrSingle() throws Exception { |
| Spans spans = orSpans(new String[] {"w5"}); |
| assertNext(spans, 0, 4, 5); |
| assertFinished(spans); |
| } |
| |
| public void testSpanOrDouble() throws Exception { |
| Spans spans = orSpans(new String[] {"w5", "yy"}); |
| assertNext(spans, 0, 4, 5); |
| assertNext(spans, 2, 3, 4); |
| assertNext(spans, 3, 4, 5); |
| assertNext(spans, 7, 3, 4); |
| assertFinished(spans); |
| } |
| |
| public void testSpanOrDoubleAdvance() throws Exception { |
| Spans spans = orSpans(new String[] {"w5", "yy"}); |
| assertEquals("initial advance", 3, spans.advance(3)); |
| assertNext(spans, 3, 4, 5); |
| assertNext(spans, 7, 3, 4); |
| assertFinished(spans); |
| } |
| |
| public void testSpanOrUnused() throws Exception { |
| Spans spans = orSpans(new String[] {"w5", "unusedTerm", "yy"}); |
| assertNext(spans, 0, 4, 5); |
| assertNext(spans, 2, 3, 4); |
| assertNext(spans, 3, 4, 5); |
| assertNext(spans, 7, 3, 4); |
| assertFinished(spans); |
| } |
| |
| public void testSpanOrTripleSameDoc() throws Exception { |
| Spans spans = orSpans(new String[] {"t1", "t2", "t3"}); |
| assertNext(spans, 11, 0, 1); |
| assertNext(spans, 11, 1, 2); |
| assertNext(spans, 11, 2, 3); |
| assertNext(spans, 11, 3, 4); |
| assertNext(spans, 11, 4, 5); |
| assertNext(spans, 11, 5, 6); |
| assertFinished(spans); |
| } |
| |
| // LUCENE-1404 |
| private void addDoc(IndexWriter writer, String id, String text) throws IOException { |
| final Document doc = new Document(); |
| doc.add( newStringField("id", id, Field.Store.YES) ); |
| doc.add( newTextField("text", text, Field.Store.YES) ); |
| writer.addDocument(doc); |
| } |
| |
| // LUCENE-1404 |
| private long hitCount(IndexSearcher searcher, String word) throws Throwable { |
| return searcher.count(new TermQuery(new Term("text", word))); |
| } |
| |
| // LUCENE-1404 |
| private SpanQuery createSpan(String value) { |
| return spanTermQuery("text", value); |
| } |
| |
| // LUCENE-1404 |
| private SpanQuery createSpan(int slop, boolean ordered, SpanQuery[] clauses) { |
| if (ordered) { |
| return spanNearOrderedQuery(slop, clauses); |
| } else { |
| return spanNearUnorderedQuery(slop, clauses); |
| } |
| } |
| |
| // LUCENE-1404 |
| private SpanQuery createSpan(int slop, boolean ordered, String term1, String term2) { |
| return createSpan(slop, ordered, new SpanQuery[] {createSpan(term1), createSpan(term2)}); |
| } |
| |
| // LUCENE-1404 |
| public void testNPESpanQuery() throws Throwable { |
| final Directory dir = newDirectory(); |
| final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random()))); |
| |
| // Add documents |
| addDoc(writer, "1", "the big dogs went running to the market"); |
| addDoc(writer, "2", "the cat chased the mouse, then the cat ate the mouse quickly"); |
| |
| // Commit |
| writer.close(); |
| |
| // Get searcher |
| final IndexReader reader = DirectoryReader.open(dir); |
| final IndexSearcher searcher = newSearcher(reader); |
| |
| // Control (make sure docs indexed) |
| assertEquals(2, hitCount(searcher, "the")); |
| assertEquals(1, hitCount(searcher, "cat")); |
| assertEquals(1, hitCount(searcher, "dogs")); |
| assertEquals(0, hitCount(searcher, "rabbit")); |
| |
| // This throws exception (it shouldn't) |
| assertEquals(1, |
| searcher.search(createSpan(0, true, |
| new SpanQuery[] {createSpan(4, false, "chased", "cat"), |
| createSpan("ate")}), 10).totalHits.value); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testSpanNotWithMultiterm() throws Exception { |
| SpanQuery q = spanNotQuery( |
| spanTermQuery(field, "r1"), |
| new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(field, "s1"))),3,3); |
| checkHits(q, new int[] {14}); |
| |
| q = spanNotQuery( |
| spanTermQuery(field, "r1"), |
| new SpanMultiTermQueryWrapper<>(new FuzzyQuery(new Term(field, "s12"), 1, 2)),3,3); |
| checkHits(q, new int[] {14}); |
| |
| q = spanNotQuery( |
| new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(field, "r"))), |
| spanTermQuery(field, "s21"),3,3); |
| checkHits(q, new int[] {13}); |
| |
| |
| } |
| |
| public void testSpanNots() throws Throwable { |
| |
| assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0); |
| assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0); |
| |
| //focus on behind |
| assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0)); |
| assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0)); |
| assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0)); |
| assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0)); |
| assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0)); |
| |
| //focus on both |
| assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1)); |
| assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1)); |
| assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1)); |
| assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10)); |
| |
| //focus on ahead |
| assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10)); |
| assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1)); |
| assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2)); |
| assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3)); |
| assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4)); |
| assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8)); |
| |
| //exclude doesn't exist |
| assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8)); |
| |
| //include doesn't exist |
| assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8)); |
| |
| // Negative values |
| assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0)); |
| assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1)); |
| assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2)); |
| assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2)); |
| assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1)); |
| assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1)); |
| assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3)); |
| assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2)); |
| } |
| |
| |
| private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{ |
| String[] includeTerms = include.split(" +"); |
| SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms); |
| SpanQuery eq = spanTermQuery(field, exclude); |
| SpanQuery snq = spanNotQuery(iq, eq, pre, post); |
| Spans spans = snq.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS); |
| |
| int i = 0; |
| if (spans != null) { |
| while (spans.nextDoc() != Spans.NO_MORE_DOCS){ |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| i++; |
| } |
| } |
| } |
| return i; |
| } |
| |
| } |