blob: e4dffa5acac1d2a007e40dd3c4d3d6a0f92aba9f [file] [log] [blame]
Index: lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java (revision 0)
@@ -0,0 +1,259 @@
+package org.apache.lucene.search.spans;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Set;
+
+/**
+ * Allows a given number of intersections between spans.
+ */
+public class SpanWithinQuery extends SpanQuery {
+
+ private SpanQuery include;
+ private SpanQuery exclude;
+ private int proximity;
+
+ /**
+ * Construct a SpanWithinQuery matching spans from <code>include</code> which
+ * overlap with spans from <code>exclude</code> up to <code>proximity</code>
+ * times.
+ */
+ public SpanWithinQuery(SpanQuery include, SpanQuery exclude, int proximity) {
+ this.include = include;
+ this.exclude = exclude;
+ this.proximity = proximity;
+
+ if (!include.getField().equals(exclude.getField())) {
+ throw new IllegalArgumentException("Clauses must have same field.");
+ }
+ }
+
+ /** Return the SpanQuery whose matches are filtered. */
+ public SpanQuery getInclude() {
+ return include;
+ }
+
+ /** Return the SpanQuery whose matches must not overlap those returned. */
+ public SpanQuery getExclude() {
+ return exclude;
+ }
+
+ public String getField() {
+ return include.getField();
+ }
+
+ public void extractTerms(Set terms) {
+ include.extractTerms(terms);
+ }
+
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("spanWithin(");
+ buffer.append(include.toString(field));
+ buffer.append(", ");
+ buffer.append(proximity + " ,");
+ buffer.append(exclude.toString(field));
+ buffer.append(")");
+ buffer.append(ToStringUtils.boost(getBoost()));
+
+ return buffer.toString();
+ }
+
+ public Spans getSpans(final AtomicReaderContext context) throws IOException {
+ return new Spans() {
+ private Spans includeSpans = include.getSpans(context);
+ private boolean moreInclude = true;
+ private Spans excludeSpans = exclude.getSpans(context);
+ private boolean moreExclude = true;
+
+ public boolean next() throws IOException {
+ if (moreInclude) { // move to next include
+ moreInclude = includeSpans.next();
+ }
+
+ while (moreInclude && moreExclude) {
+ if (includeSpans.doc() > excludeSpans.doc()) { // skip exclude
+ moreExclude = excludeSpans.skipTo(includeSpans.doc());
+ }
+
+ int count = 0;
+
+ while (moreExclude // while exclude is before
+ && (includeSpans.doc() == excludeSpans.doc())) {
+ if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) {
+ count += 1;
+
+ if (count > proximity) {
+ break;
+ }
+ }
+
+ moreExclude = excludeSpans.next(); // increment exclude
+ }
+
+ if (!moreExclude // if no intersection
+ || (includeSpans.doc() != excludeSpans.doc())
+ || (includeSpans.end() <= excludeSpans.start())) {
+ break; // we found a match
+ }
+
+ moreInclude = includeSpans.next(); // intersected: keep scanning
+ }
+
+ return moreInclude;
+ }
+
+ public boolean skipTo(int target) throws IOException {
+ if (moreInclude) { // skip include
+ moreInclude = includeSpans.skipTo(target);
+ }
+
+ if (!moreInclude) {
+ return false;
+ }
+
+ if (moreExclude // skip exclude
+ && (includeSpans.doc() > excludeSpans.doc())) {
+ moreExclude = excludeSpans.skipTo(includeSpans.doc());
+ }
+
+ int count = 0;
+
+ while (moreExclude // while exclude is before
+ && (includeSpans.doc() == excludeSpans.doc())) {
+ if ((includeSpans.end() - 1) > includeSpans.start() && excludeSpans.start() < (includeSpans.end() - 1)) {
+ count += 1;
+
+ if (count > proximity) {
+ break;
+ }
+ }
+
+ moreExclude = excludeSpans.next(); // increment exclude
+ }
+
+ if (!moreExclude // if no intersection
+ || (includeSpans.doc() != excludeSpans.doc())
+ || (includeSpans.end() <= excludeSpans.start())) {
+ return true; // we found a match
+ }
+
+ boolean returnboolean = next();
+
+ return returnboolean; // scan to next match
+ }
+
+ public int doc() {
+ return includeSpans.doc();
+ }
+
+ public int start() {
+ return includeSpans.start();
+ }
+
+ public int end() {
+ return includeSpans.end();
+ }
+
+ @Override
+ public Collection<byte[]> getPayload() throws IOException {
+ ArrayList<byte[]> result = null;
+ if (includeSpans.isPayloadAvailable()) {
+ result = new ArrayList<byte[]>(includeSpans.getPayload());
+ }
+ return result;
+ }
+
+ @Override
+ public boolean isPayloadAvailable() {
+ return includeSpans.isPayloadAvailable();
+ }
+
+ public String toString() {
+ return "spans(" + SpanWithinQuery.this.toString() + ")";
+ }
+ };
+ }
+
+ public Query rewrite(IndexReader reader) throws IOException {
+ SpanWithinQuery clone = null;
+
+ SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader);
+
+ if (rewrittenInclude != include) {
+ clone = (SpanWithinQuery) this.clone();
+ clone.include = rewrittenInclude;
+ }
+
+ SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader);
+
+ if (rewrittenExclude != exclude) {
+ if (clone == null) {
+ clone = (SpanWithinQuery) this.clone();
+ }
+
+ clone.exclude = rewrittenExclude;
+ }
+
+ if (clone != null) {
+ return clone; // some clauses rewrote
+ } else {
+ return this; // no clauses rewrote
+ }
+ }
+
+ /** Returns true iff <code>o</code> is equal to this. */
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+
+ if (!(o instanceof SpanWithinQuery)) {
+ return false;
+ }
+
+ SpanWithinQuery other = (SpanWithinQuery) o;
+
+ return this.include.equals(other.include)
+ && this.exclude.equals(other.exclude)
+ && (this.getBoost() == other.getBoost())
+ && (proximity == other.proximity);
+ }
+
+ public int hashCode() {
+ int h = include.hashCode();
+ h = (h << 1) | (h >>> 31); // rotate left
+ h ^= exclude.hashCode();
+ h = (h << 1) | (h >>> 31); // rotate left
+ h ^= Float.floatToRawIntBits(getBoost());
+ h ^= proximity;
+
+ return h;
+ }
+
+}
Index: lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (revision 1148004)
+++ lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (working copy)
@@ -86,7 +86,8 @@
"u2 xx u1 u2",
"u2 u1 xx u2",
"u1 u2 xx u2",
- "t1 t2 t1 t3 t2 t3"
+ "t1 t2 t1 t3 t2 t3",
+ "z1 z2 z3 bb bb bb z5 t6 z7"
};
public SpanTermQuery makeSpanTermQuery(String text) {
@@ -509,4 +510,25 @@
reader.close();
dir.close();
}
+
+ public void testSpanWithin() throws IOException {
+ SpanWithinQuery spanQuery = new SpanWithinQuery(new SpanNearQuery(
+ new SpanQuery[] {makeSpanTermQuery("z5"), makeSpanTermQuery("z7")}, 5,
+ false), makeSpanTermQuery("t6"), 0);
+ checkHits(spanQuery, new int[] {});
+ spanQuery = getWithinQuery(0);
+ checkHits(spanQuery, new int[] {});
+ spanQuery = getWithinQuery(1);
+ checkHits(spanQuery, new int[] {});
+ spanQuery = getWithinQuery(3);
+ checkHits(spanQuery, new int[] {12});
+ spanQuery = getWithinQuery(5);
+ checkHits(spanQuery, new int[] {12});
+ }
+
+ private SpanWithinQuery getWithinQuery(int distance) {
+ return new SpanWithinQuery(new SpanNearQuery(new SpanQuery[] {
+ makeSpanTermQuery("z3"), makeSpanTermQuery("z5")}, 4, false),
+ makeSpanTermQuery("bb"), distance);
+ }
}
Index: lucene/src/test/org/apache/lucene/search/spans/TestSentence.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0)
+++ lucene/src/test/org/apache/lucene/search/spans/TestSentence.java (revision 0)
@@ -0,0 +1,112 @@
+package org.apache.lucene.search.spans;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestSentence extends LuceneTestCase {
+ public static final String field = "field";
+ public static final String START = "^";
+ public static final String END = "$";
+
+ public void testSetPosition() throws Exception {
+ Analyzer analyzer = new Analyzer() {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new TokenStream() {
+ private final String[] TOKENS = {"1", "2", "3", END, "4", "5", "6",
+ END, "9"};
+ private final int[] INCREMENTS = {1, 1, 1, 0, 1, 1, 1, 0, 1};
+ private int i = 0;
+
+ PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ @Override
+ public boolean incrementToken() {
+ assertEquals(TOKENS.length, INCREMENTS.length);
+ if (i == TOKENS.length) return false;
+ clearAttributes();
+ termAtt.append(TOKENS[i]);
+ offsetAtt.setOffset(i, i);
+ posIncrAtt.setPositionIncrement(INCREMENTS[i]);
+ i++;
+ return true;
+ }
+ };
+ }
+ };
+ Directory store = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
+ Document d = new Document();
+ d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
+ writer.addDocument(d);
+ IndexReader reader = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ SpanTermQuery startSentence = makeSpanTermQuery(START);
+ SpanTermQuery endSentence = makeSpanTermQuery(END);
+ SpanQuery[] clauses = new SpanQuery[2];
+ clauses[0] = makeSpanTermQuery("1");
+ clauses[1] = makeSpanTermQuery("2");
+ SpanNearQuery allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE,
+ false); // SpanAndQuery equivalent
+ SpanWithinQuery query = new SpanWithinQuery(allKeywords, endSentence, 0);
+ System.out.println("query: " + query);
+ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals(hits.length, 1);
+
+ clauses[1] = makeSpanTermQuery("4");
+ allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery
+ // equivalent
+ query = new SpanWithinQuery(allKeywords, endSentence, 0);
+ System.out.println("query: " + query);
+ hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals(hits.length, 0);
+
+ PhraseQuery pq = new PhraseQuery();
+ pq.add(new Term(field, "3"));
+ pq.add(new Term(field, "4"));
+ hits = searcher.search(pq, null, 1000).scoreDocs;
+ assertEquals(hits.length, 1);
+
+ clauses[1] = makeSpanTermQuery("3");
+ allKeywords = new SpanNearQuery(clauses, Integer.MAX_VALUE, false); // SpanAndQuery
+ // equivalent
+ query = new SpanWithinQuery(allKeywords, endSentence, 0);
+ System.out.println("query: " + query);
+ hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+ reader.close();
+ searcher.close();
+ store.close();
+ }
+
+ public SpanTermQuery makeSpanTermQuery(String text) {
+ return new SpanTermQuery(new Term(field, text));
+ }
+
+ public TermQuery makeTermQuery(String text) {
+ return new TermQuery(new Term(field, text));
+ }
+}