| Index: lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0) |
| +++ lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0) |
| @@ -0,0 +1,259 @@ |
| +package org.apache.lucene.search.spans; |
| + |
| +/** |
| + * Copyright 2004 The Apache Software Foundation |
| + * |
| + * Licensed under the Apache License, Version 2.0 (the "License"); |
| + * you may not use this file except in compliance with the License. |
| + * You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.IndexReader.AtomicReaderContext; |
| +import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.spans.SpanQuery; |
| +import org.apache.lucene.search.spans.Spans; |
| +import org.apache.lucene.util.ToStringUtils; |
| + |
| +import java.io.IOException; |
| + |
| +import java.util.ArrayList; |
| +import java.util.Collection; |
| +import java.util.Set; |
| + |
| +/** |
| + * Allows a given number of intersections between spans. |
| + */ |
| +public class SpanWithinQuery extends SpanQuery { |
| + |
| + private SpanQuery include; |
| + private SpanQuery exclude; |
| + private int proximity; |
| + |
| + /** |
| + * Construct a SpanWithinQuery matching spans from <code>include</code> which |
| + * overlap with spans from <code>exclude</code> up to <code>proximity</code> |
| + * times. |
| + */ |
| + public SpanWithinQuery(SpanQuery include, SpanQuery exclude, int proximity) { |
| + this.include = include; |
| + this.exclude = exclude; |
| + this.proximity = proximity; |
| + |
| + if (!include.getField().equals(exclude.getField())) { |
| + throw new IllegalArgumentException("Clauses must have same field."); |
| + } |
| + } |
| + |
| + /** Return the SpanQuery whose matches are filtered. */ |
| + public SpanQuery getInclude() { |
| + return include; |
| + } |
| + |
| + /** Return the SpanQuery whose matches must not overlap those returned. */ |
| + public SpanQuery getExclude() { |
| + return exclude; |
| + } |
| + |
| + public String getField() { |
| + return include.getField(); |
| + } |
| + |
| + public void extractTerms(Set terms) { |
| + include.extractTerms(terms); |
| + } |
| + |
| + public String toString(String field) { |
| + StringBuffer buffer = new StringBuffer(); |
| + buffer.append("spanWithin("); |
| + buffer.append(include.toString(field)); |
| + buffer.append(", "); |
| + buffer.append(proximity + " ,"); |
| + buffer.append(exclude.toString(field)); |
| + buffer.append(")"); |
| + buffer.append(ToStringUtils.boost(getBoost())); |
| + |
| + return buffer.toString(); |
| + } |
| + |
| + public Spans getSpans(final AtomicReaderContext context) throws IOException { |
| + return new Spans() { |
| + private Spans includeSpans = include.getSpans(context); |
| + private boolean moreInclude = true; |
| + private Spans excludeSpans = exclude.getSpans(context); |
| + private boolean moreExclude = true; |
| + |
| + public boolean next() throws IOException { |
| + if (moreInclude) { // move to next include |
| + moreInclude = includeSpans.next(); |
| + } |
| + |
| + while (moreInclude && moreExclude) { |
| + if (includeSpans.doc() > excludeSpans.doc()) { // skip exclude |
| + moreExclude = excludeSpans.skipTo(includeSpans.doc()); |
| + } |
| + |
| + int count = 0; |
| + |
| + while (moreExclude // while exclude is before |
| + && (includeSpans.doc() == excludeSpans.doc())) { |
| + if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) { |
| + count += 1; |
| + |
| + if (count > proximity) { |
| + break; |
| + } |
| + } |
| + |
| + moreExclude = excludeSpans.next(); // increment exclude |
| + } |
| + |
| + if (!moreExclude // if no intersection |
| + || (includeSpans.doc() != excludeSpans.doc()) |
| + || (includeSpans.end() <= excludeSpans.start())) { |
| + break; // we found a match |
| + } |
| + |
| + moreInclude = includeSpans.next(); // intersected: keep scanning |
| + } |
| + |
| + return moreInclude; |
| + } |
| + |
| + public boolean skipTo(int target) throws IOException { |
| + if (moreInclude) { // skip include |
| + moreInclude = includeSpans.skipTo(target); |
| + } |
| + |
| + if (!moreInclude) { |
| + return false; |
| + } |
| + |
| + if (moreExclude // skip exclude |
| + && (includeSpans.doc() > excludeSpans.doc())) { |
| + moreExclude = excludeSpans.skipTo(includeSpans.doc()); |
| + } |
| + |
| + int count = 0; |
| + |
| + while (moreExclude // while exclude is before |
| + && (includeSpans.doc() == excludeSpans.doc())) { |
| + if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) { |
| + count += 1; |
| + |
| + if (count > proximity) { |
| + break; |
| + } |
| + } |
| + |
| + moreExclude = excludeSpans.next(); // increment exclude |
| + } |
| + |
| + if (!moreExclude // if no intersection |
| + || (includeSpans.doc() != excludeSpans.doc()) |
| + || (includeSpans.end() <= excludeSpans.start())) { |
| + return true; // we found a match |
| + } |
| + |
| + boolean returnboolean = next(); |
| + |
| + return returnboolean; // scan to next match |
| + } |
| + |
| + public int doc() { |
| + return includeSpans.doc(); |
| + } |
| + |
| + public int start() { |
| + return includeSpans.start(); |
| + } |
| + |
| + public int end() { |
| + return includeSpans.end(); |
| + } |
| + |
| + @Override |
| + public Collection<byte[]> getPayload() throws IOException { |
| + ArrayList<byte[]> result = null; |
| + if (includeSpans.isPayloadAvailable()) { |
| + result = new ArrayList<byte[]>(includeSpans.getPayload()); |
| + } |
| + return result; |
| + } |
| + |
| + @Override |
| + public boolean isPayloadAvailable() { |
| + return includeSpans.isPayloadAvailable(); |
| + } |
| + |
| + public String toString() { |
| + return "spans(" + SpanWithinQuery.this.toString() + ")"; |
| + } |
| + }; |
| + } |
| + |
| + public Query rewrite(IndexReader reader) throws IOException { |
| + SpanWithinQuery clone = null; |
| + |
| + SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader); |
| + |
| + if (rewrittenInclude != include) { |
| + clone = (SpanWithinQuery) this.clone(); |
| + clone.include = rewrittenInclude; |
| + } |
| + |
| + SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader); |
| + |
| + if (rewrittenExclude != exclude) { |
| + if (clone == null) { |
| + clone = (SpanWithinQuery) this.clone(); |
| + } |
| + |
| + clone.exclude = rewrittenExclude; |
| + } |
| + |
| + if (clone != null) { |
| + return clone; // some clauses rewrote |
| + } else { |
| + return this; // no clauses rewrote |
| + } |
| + } |
| + |
| + /** Returns true iff <code>o</code> is equal to this. */ |
| + public boolean equals(Object o) { |
| + if (this == o) { |
| + return true; |
| + } |
| + |
| + if (!(o instanceof SpanWithinQuery)) { |
| + return false; |
| + } |
| + |
| + SpanWithinQuery other = (SpanWithinQuery) o; |
| + |
| + return this.include.equals(other.include) |
| + && this.exclude.equals(other.exclude) |
| + && (this.getBoost() == other.getBoost()) |
| + && (proximity == other.proximity); |
| + } |
| + |
| + public int hashCode() { |
| + int h = include.hashCode(); |
| + h = (h << 1) | (h >>> 31); // rotate left |
| + h ^= exclude.hashCode(); |
| + h = (h << 1) | (h >>> 31); // rotate left |
| + h ^= Float.floatToRawIntBits(getBoost()); |
| + h ^= proximity; |
| + |
| + return h; |
| + } |
| + |
| +} |
| Index: lucene/src/test/org/apache/lucene/search/spans/TestSpans.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (revision 1148004) |
| +++ lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (working copy) |
| @@ -86,7 +86,8 @@ |
| "u2 xx u1 u2", |
| "u2 u1 xx u2", |
| "u1 u2 xx u2", |
| - "t1 t2 t1 t3 t2 t3" |
| + "t1 t2 t1 t3 t2 t3", |
| + "z1 z2 z3 bb bb bb z5 t6 z7" |
| }; |
| |
| public SpanTermQuery makeSpanTermQuery(String text) { |
| @@ -509,4 +510,25 @@ |
| reader.close(); |
| dir.close(); |
| } |
| + |
| + public void testSpanWithin() throws IOException { |
| + SpanWithinQuery spanQuery = new SpanWithinQuery(new SpanNearQuery( |
| + new SpanQuery[] {makeSpanTermQuery("z5"), makeSpanTermQuery("z7")}, 5, |
| + false), makeSpanTermQuery("t6"), 0); |
| + checkHits(spanQuery, new int[] {}); |
| + spanQuery = getWithinQuery(0); |
| + checkHits(spanQuery, new int[] {}); |
| + spanQuery = getWithinQuery(1); |
| + checkHits(spanQuery, new int[] {}); |
| + spanQuery = getWithinQuery(3); |
| + checkHits(spanQuery, new int[] {12}); |
| + spanQuery = getWithinQuery(5); |
| + checkHits(spanQuery, new int[] {12}); |
| + } |
| + |
| + private SpanWithinQuery getWithinQuery(int distance) { |
| + return new SpanWithinQuery(new SpanNearQuery(new SpanQuery[] { |
| + makeSpanTermQuery("z3"), makeSpanTermQuery("z5")}, 4, false), |
| + makeSpanTermQuery("bb"), distance); |
| + } |
| } |
| Index: lucene/src/test/org/apache/lucene/search/spans/TestSentence.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0) |
| +++ lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0) |
| @@ -0,0 +1,112 @@ |
| +package org.apache.lucene.search.spans; |
| + |
| +import java.io.Reader; |
| + |
| +import org.apache.lucene.analysis.Analyzer; |
| +import org.apache.lucene.analysis.TokenStream; |
| +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.RandomIndexWriter; |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.PhraseQuery; |
| +import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.spans.SpanNearQuery; |
| +import org.apache.lucene.search.spans.SpanQuery; |
| +import org.apache.lucene.search.spans.SpanTermQuery; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestSentence extends LuceneTestCase { |
| + public static final String field = "field"; |
| + public static final String START = "^"; |
| + public static final String END = "$"; |
| + |
| + public void testSetPosition() throws Exception { |
| + Analyzer analyzer = new Analyzer() { |
| + @Override |
| + public TokenStream tokenStream(String fieldName, Reader reader) { |
| + return new TokenStream() { |
| + private final String[] TOKENS = {"1", "2", "3", END, "4", "5", "6", |
| + END, "9"}; |
| + private final int[] INCREMENTS = {1, 1, 1, 0, 1, 1, 1, 0, 1}; |
| + private int i = 0; |
| + |
| + PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); |
| + |
| + @Override |
| + public boolean incrementToken() { |
| + assertEquals(TOKENS.length, INCREMENTS.length); |
| + if (i == TOKENS.length) return false; |
| + clearAttributes(); |
| + termAtt.append(TOKENS[i]); |
| + offsetAtt.setOffset(i, i); |
| + posIncrAtt.setPositionIncrement(INCREMENTS[i]); |
| + i++; |
| + return true; |
| + } |
| + }; |
| + } |
| + }; |
| + Directory store = newDirectory(); |
| + RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer); |
| + Document d = new Document(); |
| + d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); |
| + writer.addDocument(d); |
| + IndexReader reader = writer.getReader(); |
| + writer.close(); |
| + IndexSearcher searcher = newSearcher(reader); |
| + |
| + SpanTermQuery startSentence = makeSpanTermQuery(START); |
| + SpanTermQuery endSentence = makeSpanTermQuery(END); |
| + SpanQuery[] clauses = new SpanQuery[2]; |
| + clauses[0] = makeSpanTermQuery("1"); |
| + clauses[1] = makeSpanTermQuery("2"); |
| + SpanNearQuery allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, |
| + false); // SpanAndQuery equivalent |
| + SpanWithinQuery query = new SpanWithinQuery(allKeywords, endSentence, 0); |
| + System.out.println("query: " + query); |
| + ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; |
| + assertEquals(hits.length, 1); |
| + |
| + clauses[1] = makeSpanTermQuery("4"); |
| + allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery |
| + // equivalent |
| + query = new SpanWithinQuery(allKeywords, endSentence, 0); |
| + System.out.println("query: " + query); |
| + hits = searcher.search(query, null, 1000).scoreDocs; |
| + assertEquals(hits.length, 0); |
| + |
| + PhraseQuery pq = new PhraseQuery(); |
| + pq.add(new Term(field, "3")); |
| + pq.add(new Term(field, "4")); |
| + hits = searcher.search(pq, null, 1000).scoreDocs; |
| + assertEquals(hits.length, 1); |
| + |
| + clauses[1] = makeSpanTermQuery("3"); |
| + allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery |
| + // equivalent |
| + query = new SpanWithinQuery(allKeywords, endSentence, 0); |
| + System.out.println("query: " + query); |
| + hits = searcher.search(query, null, 1000).scoreDocs; |
| + assertEquals(1, hits.length); |
| + reader.close(); |
| + searcher.close(); |
| + store.close(); |
| + } |
| + |
| + public SpanTermQuery makeSpanTermQuery(String text) { |
| + return new SpanTermQuery(new Term(field, text)); |
| + } |
| + |
| + public TermQuery makeTermQuery(String text) { |
| + return new TermQuery(new Term(field, text)); |
| + } |
| +} |