blob: 81201244ca5e8e903111f656967af51d0a2d6e30 [file] [log] [blame]
Index: lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (working copy)
@@ -24,9 +24,7 @@
* The payload of a Token.
* <p>
* The payload is stored in the index at each position, and can
- * be used to influence scoring when using Payload-based queries
- * in the {@link org.apache.lucene.search.payloads} and
- * {@link org.apache.lucene.search.spans} packages.
+ * be used to influence scoring when using Payload-based queries.
* <p>
* NOTE: because the payload will be stored at each position, it's usually
* best to use the minimum number of bytes necessary. Some codec implementations
Index: lucene/core/src/java/org/apache/lucene/search/package-info.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/package-info.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/package-info.java (working copy)
@@ -33,7 +33,7 @@
* <h2>Search Basics</h2>
* <p>
* Lucene offers a wide variety of {@link org.apache.lucene.search.Query} implementations, most of which are in
- * this package, its subpackages ({@link org.apache.lucene.search.spans spans}, {@link org.apache.lucene.search.payloads payloads}),
+ * this package, its subpackage ({@link org.apache.lucene.search.spans spans},
* or the <a href="{@docRoot}/../queries/overview-summary.html">queries module</a>. These implementations can be combined in a wide
* variety of ways to provide complex querying capabilities along with information about where matches took place in the document
* collection. The <a href="#query">Query Classes</a> section below highlights some of the more important Query classes. For details
Index: lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (working copy)
@@ -1,57 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/**
- * Calculate the final score as the average score of all payloads seen.
- * <p>
- * Is thread safe and completely reusable.
- *
- **/
-public class AveragePayloadFunction extends PayloadFunction{
-
- @Override
- public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
- return currentPayloadScore + currentScore;
- }
-
- @Override
- public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
- return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + this.getClass().hashCode();
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- return true;
- }
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (working copy)
@@ -1,60 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-/**
- * Returns the maximum payload score seen, else 1 if there are no payloads on the doc.
- * <p>
- * Is thread safe and completely reusable.
- *
- **/
-public class MaxPayloadFunction extends PayloadFunction {
- @Override
- public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
- if (numPayloadsSeen == 0) {
- return currentPayloadScore;
- } else {
- return Math.max(currentPayloadScore, currentScore);
- }
- }
-
- @Override
- public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
- return numPayloadsSeen > 0 ? payloadScore : 1;
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + this.getClass().hashCode();
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- return true;
- }
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (working copy)
@@ -1,59 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Calculates the minimum payload seen
- *
- **/
-public class MinPayloadFunction extends PayloadFunction {
-
- @Override
- public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
- if (numPayloadsSeen == 0) {
- return currentPayloadScore;
- } else {
- return Math.min(currentPayloadScore, currentScore);
- }
- }
-
- @Override
- public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
- return numPayloadsSeen > 0 ? payloadScore : 1;
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + this.getClass().hashCode();
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- return true;
- }
-
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (working copy)
@@ -1,70 +0,0 @@
-package org.apache.lucene.search.payloads;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.search.Explanation;
-
-/**
- * An abstract class that defines a way for PayloadScoreQuery instances to transform
- * the cumulative effects of payload scores for a document.
- *
- * @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information
- *
- * @lucene.experimental This class and its derivations are experimental and subject to
- * change
- *
- **/
-public abstract class PayloadFunction {
-
- /**
- * Calculate the score up to this point for this doc and field
- * @param docId The current doc
- * @param field The field
- * @param start The start position of the matching Span
- * @param end The end position of the matching Span
- * @param numPayloadsSeen The number of payloads seen so far
- * @param currentScore The current score so far
- * @param currentPayloadScore The score for the current payload
- * @return The new current Score
- *
- * @see org.apache.lucene.search.spans.Spans
- */
- public abstract float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore);
-
- /**
- * Calculate the final score for all the payloads seen so far for this doc/field
- * @param docId The current doc
- * @param field The current field
- * @param numPayloadsSeen The total number of payloads seen on this document
- * @param payloadScore The raw score for those payloads
- * @return The final score for the payloads
- */
- public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
-
- public Explanation explain(int docId, String field, int numPayloadsSeen, float payloadScore){
- return Explanation.match(
- docScore(docId, field, numPayloadsSeen, payloadScore),
- getClass().getSimpleName() + ".docScore()");
- };
-
- @Override
- public abstract int hashCode();
-
- @Override
- public abstract boolean equals(Object o);
-
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java (working copy)
@@ -1,215 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.spans.SpanCollector;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanScorer;
-import org.apache.lucene.search.spans.SpanWeight;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * A Query class that uses a {@link PayloadFunction} to modify the score of a
- * wrapped SpanQuery
- *
- * NOTE: In order to take advantage of this with the default scoring implementation
- * ({@link ClassicSimilarity}), you must override {@link ClassicSimilarity#scorePayload(int, int, int, BytesRef)},
- * which returns 1 by default.
- *
- * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
- */
-public class PayloadScoreQuery extends SpanQuery {
-
- private final SpanQuery wrappedQuery;
- private final PayloadFunction function;
-
- /**
- * Creates a new PayloadScoreQuery
- * @param wrappedQuery the query to wrap
- * @param function a PayloadFunction to use to modify the scores
- */
- public PayloadScoreQuery(SpanQuery wrappedQuery, PayloadFunction function) {
- this.wrappedQuery = wrappedQuery;
- this.function = function;
- }
-
- @Override
- public String getField() {
- return wrappedQuery.getField();
- }
-
- @Override
- public String toString(String field) {
- return "PayloadSpanQuery[" + wrappedQuery.toString(field) + "; " + function.toString() + "]";
- }
-
- @Override
- public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
- SpanWeight innerWeight = wrappedQuery.createWeight(searcher, needsScores);
- if (!needsScores)
- return innerWeight;
- return new PayloadSpanWeight(searcher, innerWeight);
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof PayloadScoreQuery)) return false;
- if (!super.equals(o)) return false;
-
- PayloadScoreQuery that = (PayloadScoreQuery) o;
-
- if (wrappedQuery != null ? !wrappedQuery.equals(that.wrappedQuery) : that.wrappedQuery != null) return false;
- return !(function != null ? !function.equals(that.function) : that.function != null);
-
- }
-
- @Override
- public int hashCode() {
- int result = super.hashCode();
- result = 31 * result + (wrappedQuery != null ? wrappedQuery.hashCode() : 0);
- result = 31 * result + (function != null ? function.hashCode() : 0);
- return result;
- }
-
- private class PayloadSpanWeight extends SpanWeight {
-
- private final SpanWeight innerWeight;
-
- public PayloadSpanWeight(IndexSearcher searcher, SpanWeight innerWeight) throws IOException {
- super(PayloadScoreQuery.this, searcher, null);
- this.innerWeight = innerWeight;
- }
-
- @Override
- public void extractTermContexts(Map<Term, TermContext> contexts) {
- innerWeight.extractTermContexts(contexts);
- }
-
- @Override
- public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException {
- return innerWeight.getSpans(ctx, requiredPostings.atLeast(Postings.PAYLOADS));
- }
-
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- Spans spans = getSpans(context, Postings.PAYLOADS);
- if (spans == null)
- return null;
- return new PayloadSpanScorer(spans, this, innerWeight.getSimScorer(context));
- }
-
- @Override
- public void extractTerms(Set<Term> terms) {
- innerWeight.extractTerms(terms);
- }
-
- @Override
- public float getValueForNormalization() throws IOException {
- return innerWeight.getValueForNormalization();
- }
-
- @Override
- public void normalize(float queryNorm, float topLevelBoost) {
- innerWeight.normalize(queryNorm, topLevelBoost);
- }
-
- @Override
- public Explanation explain(LeafReaderContext context, int doc) throws IOException {
- PayloadSpanScorer scorer = (PayloadSpanScorer) scorer(context);
- if (scorer == null || scorer.advance(doc) != doc)
- return Explanation.noMatch("No match");
-
- SpanWeight innerWeight = ((PayloadSpanWeight)scorer.getWeight()).innerWeight;
- Explanation innerExpl = innerWeight.explain(context, doc);
- scorer.freq(); // force freq calculation
- Explanation payloadExpl = scorer.getPayloadExplanation();
-
- return Explanation.match(scorer.scoreCurrentDoc(), "PayloadSpanQuery, product of:", innerExpl, payloadExpl);
- }
- }
-
- private class PayloadSpanScorer extends SpanScorer implements SpanCollector {
-
- private int payloadsSeen;
- private float payloadScore;
-
- private PayloadSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
- super(spans, weight, docScorer);
- }
-
- @Override
- protected void doStartCurrentDoc() {
- payloadScore = 0;
- payloadsSeen = 0;
- }
-
- @Override
- protected void doCurrentSpans() throws IOException {
- spans.collect(this);
- }
-
- @Override
- public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
- BytesRef payload = postings.getPayload();
- if (payload == null)
- return;
- float payloadFactor = docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload);
- payloadScore = function.currentScore(docID(), getField(), spans.startPosition(), spans.endPosition(),
- payloadsSeen, payloadScore, payloadFactor);
- payloadsSeen++;
- }
-
- protected float getPayloadScore() {
- return function.docScore(docID(), getField(), payloadsSeen, payloadScore);
- }
-
- protected Explanation getPayloadExplanation() {
- return function.explain(docID(), getField(), payloadsSeen, payloadScore);
- }
-
- protected float getSpanScore() throws IOException {
- return super.scoreCurrentDoc();
- }
-
- @Override
- protected float scoreCurrentDoc() throws IOException {
- return getSpanScore() * getPayloadScore();
- }
-
- @Override
- public void reset() {
-
- }
- }
-
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java (working copy)
@@ -1,57 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.spans.SpanCollector;
-import org.apache.lucene.util.BytesRef;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-
-/**
- * SpanCollector for collecting payloads
- */
-public class PayloadSpanCollector implements SpanCollector {
-
- private final Collection<byte[]> payloads = new ArrayList<>();
-
- @Override
- public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
- BytesRef payload = postings.getPayload();
- if (payload == null)
- return;
- final byte[] bytes = new byte[payload.length];
- System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
- payloads.add(bytes);
- }
-
- @Override
- public void reset() {
- payloads.clear();
- }
-
- /**
- * @return the collected payloads
- */
- public Collection<byte[]> getPayloads() {
- return payloads;
- }
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java (working copy)
@@ -1,191 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexReaderContext;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.DisjunctionMaxQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MultiPhraseQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.SpanWeight;
-import org.apache.lucene.search.spans.Spans;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Experimental class to get set of payloads for most standard Lucene queries.
- * Operates like Highlighter - IndexReader should only contain doc of interest,
- * best to use MemoryIndex.
- *
- * @lucene.experimental
- *
- */
-public class PayloadSpanUtil {
- private IndexReaderContext context;
-
- /**
- * @param context
- * that contains doc with payloads to extract
- *
- * @see IndexReader#getContext()
- */
- public PayloadSpanUtil(IndexReaderContext context) {
- this.context = context;
- }
-
- /**
- * Query should be rewritten for wild/fuzzy support.
- *
- * @param query rewritten query
- * @return payloads Collection
- * @throws IOException if there is a low-level I/O error
- */
- public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException {
- Collection<byte[]> payloads = new ArrayList<>();
- queryToSpanQuery(query, payloads);
- return payloads;
- }
-
- private void queryToSpanQuery(Query query, Collection<byte[]> payloads)
- throws IOException {
- if (query instanceof BooleanQuery) {
- for (BooleanClause clause : (BooleanQuery) query) {
- if (!clause.isProhibited()) {
- queryToSpanQuery(clause.getQuery(), payloads);
- }
- }
-
- } else if (query instanceof PhraseQuery) {
- Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
- SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
- for (int i = 0; i < phraseQueryTerms.length; i++) {
- clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
- }
-
- int slop = ((PhraseQuery) query).getSlop();
- boolean inorder = false;
-
- if (slop == 0) {
- inorder = true;
- }
-
- SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
- getPayloads(payloads, sp);
- } else if (query instanceof TermQuery) {
- SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
- getPayloads(payloads, stq);
- } else if (query instanceof SpanQuery) {
- getPayloads(payloads, (SpanQuery) query);
- } else if (query instanceof DisjunctionMaxQuery) {
-
- for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator
- .hasNext();) {
- queryToSpanQuery(iterator.next(), payloads);
- }
-
- } else if (query instanceof MultiPhraseQuery) {
- final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
- final List<Term[]> termArrays = mpq.getTermArrays();
- final int[] positions = mpq.getPositions();
- if (positions.length > 0) {
-
- int maxPosition = positions[positions.length - 1];
- for (int i = 0; i < positions.length - 1; ++i) {
- if (positions[i] > maxPosition) {
- maxPosition = positions[i];
- }
- }
-
- @SuppressWarnings({"rawtypes","unchecked"}) final List<Query>[] disjunctLists =
- new List[maxPosition + 1];
- int distinctPositions = 0;
-
- for (int i = 0; i < termArrays.size(); ++i) {
- final Term[] termArray = termArrays.get(i);
- List<Query> disjuncts = disjunctLists[positions[i]];
- if (disjuncts == null) {
- disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(
- termArray.length));
- ++distinctPositions;
- }
- for (final Term term : termArray) {
- disjuncts.add(new SpanTermQuery(term));
- }
- }
-
- int positionGaps = 0;
- int position = 0;
- final SpanQuery[] clauses = new SpanQuery[distinctPositions];
- for (int i = 0; i < disjunctLists.length; ++i) {
- List<Query> disjuncts = disjunctLists[i];
- if (disjuncts != null) {
- clauses[position++] = new SpanOrQuery(disjuncts
- .toArray(new SpanQuery[disjuncts.size()]));
- } else {
- ++positionGaps;
- }
- }
-
- final int slop = mpq.getSlop();
- final boolean inorder = (slop == 0);
-
- SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
- inorder);
- getPayloads(payloads, sp);
- }
- }
- }
-
- private void getPayloads(Collection<byte []> payloads, SpanQuery query)
- throws IOException {
-
- final IndexSearcher searcher = new IndexSearcher(context);
- searcher.setQueryCache(null);
-
- SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false);
-
- PayloadSpanCollector collector = new PayloadSpanCollector();
- for (LeafReaderContext leafReaderContext : context.leaves()) {
- final Spans spans = w.getSpans(leafReaderContext, SpanWeight.Postings.PAYLOADS);
- if (spans != null) {
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- collector.reset();
- spans.collect(collector);
- payloads.addAll(collector.getPayloads());
- }
- }
- }
- }
- }
-}
Index: lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java (working copy)
@@ -1,189 +0,0 @@
-package org.apache.lucene.search.payloads;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.spans.FilterSpans;
-import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
-import org.apache.lucene.search.spans.SpanCollector;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanScorer;
-import org.apache.lucene.search.spans.SpanWeight;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * Only return those matches that have a specific payload at the given position.
- */
-public class SpanPayloadCheckQuery extends SpanQuery {
-
- protected final List<BytesRef> payloadToMatch;
- protected final SpanQuery match;
-
- /**
- * @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
- * @param payloadToMatch The {@link java.util.List} of payloads to match
- */
- public SpanPayloadCheckQuery(SpanQuery match, List<BytesRef> payloadToMatch) {
- this.match = match;
- this.payloadToMatch = payloadToMatch;
- }
-
- @Override
- public String getField() {
- return match.getField();
- }
-
- @Override
- public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
- SpanWeight matchWeight = match.createWeight(searcher, false);
- return new SpanPayloadCheckWeight(searcher, needsScores ? getTermContexts(matchWeight) : null, matchWeight);
- }
-
- /**
- * Weight that pulls its Spans using a PayloadSpanCollector
- */
- public class SpanPayloadCheckWeight extends SpanWeight {
-
- final SpanWeight matchWeight;
-
- public SpanPayloadCheckWeight(IndexSearcher searcher, Map<Term, TermContext> termContexts, SpanWeight matchWeight) throws IOException {
- super(SpanPayloadCheckQuery.this, searcher, termContexts);
- this.matchWeight = matchWeight;
- }
-
- @Override
- public void extractTerms(Set<Term> terms) {
- matchWeight.extractTerms(terms);
- }
-
- @Override
- public void extractTermContexts(Map<Term, TermContext> contexts) {
- matchWeight.extractTermContexts(contexts);
- }
-
- @Override
- public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
- final PayloadChecker collector = new PayloadChecker();
- Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS));
- return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
- @Override
- protected AcceptStatus accept(Spans candidate) throws IOException {
- collector.reset();
- candidate.collect(collector);
- return collector.match();
- }
- };
- }
-
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- if (field == null)
- return null;
-
- Terms terms = context.reader().terms(field);
- if (terms != null && terms.hasPositions() == false) {
- throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")");
- }
-
- Spans spans = getSpans(context, Postings.PAYLOADS);
- Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
- return (spans == null) ? null : new SpanScorer(spans, this, simScorer);
- }
- }
-
- private class PayloadChecker implements SpanCollector {
-
- int upto = 0;
- boolean matches = true;
-
- @Override
- public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
- if (!matches)
- return;
- if (upto >= payloadToMatch.size()) {
- matches = false;
- return;
- }
- BytesRef payload = postings.getPayload();
- if (payloadToMatch.get(upto) == null) {
- matches = payload == null;
- upto++;
- return;
- }
- if (payload == null) {
- matches = false;
- upto++;
- return;
- }
- matches = payloadToMatch.get(upto).bytesEquals(payload);
- upto++;
- }
-
- AcceptStatus match() {
- return matches && upto == payloadToMatch.size() ? AcceptStatus.YES : AcceptStatus.NO;
- }
-
- @Override
- public void reset() {
- this.upto = 0;
- this.matches = true;
- }
- }
-
- @Override
- public String toString(String field) {
- StringBuilder buffer = new StringBuilder();
- buffer.append("spanPayCheck(");
- buffer.append(match.toString(field));
- buffer.append(", payloadRef: ");
- for (BytesRef bytes : payloadToMatch) {
- buffer.append(Term.toString(bytes));
- buffer.append(';');
- }
- buffer.append(")");
- return buffer.toString();
- }
-
- @Override
- public boolean equals(Object o) {
- if (! super.equals(o)) {
- return false;
- }
- SpanPayloadCheckQuery other = (SpanPayloadCheckQuery)o;
- return this.payloadToMatch.equals(other.payloadToMatch);
- }
-
- @Override
- public int hashCode() {
- int h = super.hashCode();
- h = (h * 63) ^ payloadToMatch.hashCode();
- return h;
- }
-}
\ No newline at end of file
Index: lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java (revision 1703369)
+++ lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java (working copy)
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * The payloads package provides Query mechanisms for finding and using payloads.
- * <p>
- * The following Query implementations are provided:
- * <ol>
- * <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
- * a SpanQuery, boost the score based on the value of the payload located at those terms.</li>
- * </ol>
- */
-package org.apache.lucene.search.payloads;
Index: lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy)
@@ -19,8 +19,8 @@
import java.io.IOException;
import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collection;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
@@ -38,9 +38,8 @@
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.payloads.PayloadSpanCollector;
-import org.apache.lucene.search.payloads.PayloadSpanUtil;
import org.apache.lucene.search.spans.MultiSpansWrapper;
+import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@@ -201,6 +200,22 @@
store.close();
}
+ static class PayloadSpanCollector implements SpanCollector {
+
+ List<BytesRef> payloads = new ArrayList<>();
+
+ @Override
+ public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
+ if (postings.getPayload() != null)
+ payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
+ }
+
+ @Override
+ public void reset() {
+ payloads.clear();
+ }
+ }
+
public void testPayloadsPos0() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer());
@@ -248,12 +263,11 @@
}
collector.reset();
pspans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
sawZero |= pspans.startPosition() == 0;
- for (byte[] bytes : payloads) {
+ for (BytesRef payload : collector.payloads) {
count++;
if (VERBOSE) {
- System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8));
+ System.out.println(" payload: " + Term.toString(payload));
}
}
}
@@ -276,17 +290,6 @@
assertEquals(4, count);
assertTrue(sawZero);
- sawZero = false;
- PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext());
- Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
- count = pls.size();
- for (byte[] bytes : pls) {
- String s = new String(bytes, StandardCharsets.UTF_8);
- //System.out.println(s);
- sawZero |= s.equals("pos: 0");
- }
- assertEquals(8, count);
- assertTrue(sawZero);
writer.close();
is.getIndexReader().close();
dir.close();
Index: lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (working copy)
@@ -1,141 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.English;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.store.RAMDirectory;
-
-import java.io.IOException;
-import java.util.Random;
-
-/**
- *
- *
- **/
-public class PayloadHelper {
-
- private byte[] payloadField = new byte[]{1};
- private byte[] payloadMultiField1 = new byte[]{2};
- private byte[] payloadMultiField2 = new byte[]{4};
- public static final String NO_PAYLOAD_FIELD = "noPayloadField";
- public static final String MULTI_FIELD = "multiField";
- public static final String FIELD = "field";
-
- public IndexReader reader;
-
- public final class PayloadAnalyzer extends Analyzer {
-
- public PayloadAnalyzer() {
- super(PER_FIELD_REUSE_STRATEGY);
- }
-
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
- }
- }
-
- public final class PayloadFilter extends TokenFilter {
- private final String fieldName;
- private int numSeen = 0;
- private final PayloadAttribute payloadAtt;
-
- public PayloadFilter(TokenStream input, String fieldName) {
- super(input);
- this.fieldName = fieldName;
- payloadAtt = addAttribute(PayloadAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
-
- if (input.incrementToken()) {
- if (fieldName.equals(FIELD)) {
- payloadAtt.setPayload(new BytesRef(payloadField));
- } else if (fieldName.equals(MULTI_FIELD)) {
- if (numSeen % 2 == 0) {
- payloadAtt.setPayload(new BytesRef(payloadMultiField1));
- }
- else {
- payloadAtt.setPayload(new BytesRef(payloadMultiField2));
- }
- numSeen++;
- }
- return true;
- }
- return false;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.numSeen = 0;
- }
- }
-
- /**
- * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
- * and analyzes them using the PayloadAnalyzer
- * @param similarity The Similarity class to use in the Searcher
- * @param numDocs The num docs to add
- * @return An IndexSearcher
- */
- // TODO: randomize
- public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
- Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
- PayloadAnalyzer analyzer = new PayloadAnalyzer();
-
- // TODO randomize this
- IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
- analyzer).setSimilarity(similarity));
- // writer.infoStream = System.out;
- for (int i = 0; i < numDocs; i++) {
- Document doc = new Document();
- doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
- doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
- doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
- writer.addDocument(doc);
- }
- reader = DirectoryReader.open(writer, true);
- writer.close();
-
- IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
- searcher.setSimilarity(similarity);
- return searcher;
- }
-
- public void tearDown() throws Exception {
- reader.close();
- }
-}
Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (working copy)
@@ -1,192 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.SimplePayloadFilter;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.CheckHits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanPositionRangeQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.English;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-/** basic test of payload-spans */
-public class TestPayloadCheckQuery extends LuceneTestCase {
- private static IndexSearcher searcher;
- private static IndexReader reader;
- private static Directory directory;
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- Analyzer simplePayloadAnalyzer = new Analyzer() {
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
- }
- };
-
- directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(simplePayloadAnalyzer)
- .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
- //writer.infoStream = System.out;
- for (int i = 0; i < 2000; i++) {
- Document doc = new Document();
- doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
- writer.addDocument(doc);
- }
- reader = writer.getReader();
- searcher = newSearcher(reader);
- writer.close();
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- reader.close();
- directory.close();
- searcher = null;
- reader = null;
- directory = null;
- }
-
- private void checkHits(Query query, int[] results) throws IOException {
- CheckHits.checkHits(random(), query, "field", searcher, results);
- }
-
- public void testSpanPayloadCheck() throws Exception {
- SpanQuery term1 = new SpanTermQuery(new Term("field", "five"));
- BytesRef pay = new BytesRef("pos: " + 5);
- SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay));
- checkHits(query, new int[]
- {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
- assertTrue(searcher.explain(query, 1125).getValue() > 0.0f);
-
- SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
- SpanNearQuery snq;
- SpanQuery[] clauses;
- List<BytesRef> list;
- BytesRef pay2;
- clauses = new SpanQuery[2];
- clauses[0] = term1;
- clauses[1] = term2;
- snq = new SpanNearQuery(clauses, 0, true);
- pay = new BytesRef("pos: " + 0);
- pay2 = new BytesRef("pos: " + 1);
- list = new ArrayList<>();
- list.add(pay);
- list.add(pay2);
- query = new SpanPayloadCheckQuery(snq, list);
- checkHits(query, new int[]
- {500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599});
- clauses = new SpanQuery[3];
- clauses[0] = term1;
- clauses[1] = term2;
- clauses[2] = new SpanTermQuery(new Term("field", "five"));
- snq = new SpanNearQuery(clauses, 0, true);
- pay = new BytesRef("pos: " + 0);
- pay2 = new BytesRef("pos: " + 1);
- BytesRef pay3 = new BytesRef("pos: " + 2);
- list = new ArrayList<>();
- list.add(pay);
- list.add(pay2);
- list.add(pay3);
- query = new SpanPayloadCheckQuery(snq, list);
- checkHits(query, new int[]
- {505});
- }
-
- public void testUnorderedPayloadChecks() throws Exception {
-
- SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
- SpanTermQuery term100 = new SpanTermQuery(new Term("field", "hundred"));
- SpanTermQuery term4 = new SpanTermQuery(new Term("field", "four"));
- SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[]{term5, term100, term4}, 0, false);
-
- List<BytesRef> payloads = new ArrayList<>();
- payloads.add(new BytesRef("pos: " + 2));
- payloads.add(new BytesRef("pos: " + 1));
- payloads.add(new BytesRef("pos: " + 0));
-
- SpanPayloadCheckQuery payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
- checkHits(payloadQuery, new int[]{ 405 });
-
- payloads.clear();
- payloads.add(new BytesRef("pos: " + 0));
- payloads.add(new BytesRef("pos: " + 1));
- payloads.add(new BytesRef("pos: " + 2));
-
- payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
- checkHits(payloadQuery, new int[]{ 504 });
-
- }
-
- public void testComplexSpanChecks() throws Exception {
- SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
- SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
- //should be one position in between
- SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
- SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));
-
- SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[]{one, thous}, 0, true);
- SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[]{hundred, three}, 0, true);
- SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[]{oneThous, hundredThree}, 1, true);
- SpanQuery query;
- //this one's too small
- query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
- checkHits(query, new int[]{});
- //this one's just right
- query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
- checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
-
- List<BytesRef> payloads = new ArrayList<>();
- BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
- BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
- BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
- BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
- payloads.add(pay);
- payloads.add(pay2);
- payloads.add(pay3);
- payloads.add(pay4);
- query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
- checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
-
- }
-}
Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java (working copy)
@@ -1,127 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BaseExplanationTestCase;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.search.spans.SpanBoostQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * TestExplanations subclass focusing on payload queries
- */
-public class TestPayloadExplanations extends BaseExplanationTestCase {
-
- private static PayloadFunction functions[] = new PayloadFunction[] {
- new AveragePayloadFunction(),
- new MinPayloadFunction(),
- new MaxPayloadFunction(),
- };
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- searcher.setSimilarity(new ClassicSimilarity() {
- @Override
- public float scorePayload(int doc, int start, int end, BytesRef payload) {
- return 1 + (payload.hashCode() % 10);
- }
- });
- }
-
- /** macro for payloadscorequery */
- private SpanQuery pt(String s, PayloadFunction fn) {
- return new PayloadScoreQuery(new SpanTermQuery(new Term(FIELD,s)), fn);
- }
-
- /* simple PayloadTermQueries */
-
- public void testPT1() throws Exception {
- for (PayloadFunction fn : functions) {
- qtest(pt("w1", fn), new int[] {0,1,2,3});
- }
- }
-
- public void testPT2() throws Exception {
- for (PayloadFunction fn : functions) {
- SpanQuery q = pt("w1", fn);
- qtest(new SpanBoostQuery(q, 1000), new int[] {0,1,2,3});
- }
- }
-
- public void testPT4() throws Exception {
- for (PayloadFunction fn : functions) {
- qtest(pt("xx", fn), new int[] {2,3});
- }
- }
-
- public void testPT5() throws Exception {
- for (PayloadFunction fn : functions) {
- SpanQuery q = pt("xx", fn);
- qtest(new SpanBoostQuery(q, 1000), new int[] {2,3});
- }
- }
-
- // TODO: test the payloadnear query too!
-
- /*
- protected static final String[] docFields = {
- "w1 w2 w3 w4 w5",
- "w1 w3 w2 w3 zz",
- "w1 xx w2 yy w3",
- "w1 w3 xx w2 yy w3 zz"
- };
- */
-
- public void testAllFunctions(SpanQuery query, int[] expected) throws Exception {
- for (PayloadFunction fn : functions) {
- qtest(new PayloadScoreQuery(query, fn), expected);
- }
- }
-
- public void testSimpleTerm() throws Exception {
- SpanTermQuery q = new SpanTermQuery(new Term(FIELD, "w2"));
- testAllFunctions(q, new int[]{ 0, 1, 2, 3});
- }
-
- public void testOrTerm() throws Exception {
- SpanOrQuery q = new SpanOrQuery(
- new SpanTermQuery(new Term(FIELD, "xx")), new SpanTermQuery(new Term(FIELD, "yy"))
- );
- testAllFunctions(q, new int[]{ 2, 3 });
- }
-
- public void testOrderedNearQuery() throws Exception {
- SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
- new SpanTermQuery(new Term(FIELD, "w3")), new SpanTermQuery(new Term(FIELD, "w2"))
- }, 1, true);
- testAllFunctions(q, new int[]{ 1, 3 });
- }
-
- public void testUnorderedNearQuery() throws Exception {
- SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
- new SpanTermQuery(new Term(FIELD, "w2")), new SpanTermQuery(new Term(FIELD, "w3"))
- }, 1, false);
- testAllFunctions(q, new int[]{ 0, 1, 2, 3 });
- }
-}
Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java (working copy)
@@ -1,286 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.NoMergePolicy;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.CollectionStatistics;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TermStatistics;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.search.spans.SpanContainingQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.English;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestPayloadScoreQuery extends LuceneTestCase {
-
- private static void checkQuery(SpanQuery query, PayloadFunction function, int[] expectedDocs, float[] expectedScores) throws IOException {
-
- assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length);
-
- PayloadScoreQuery psq = new PayloadScoreQuery(query, function);
- TopDocs hits = searcher.search(psq, expectedDocs.length);
-
- for (int i = 0; i < hits.scoreDocs.length; i++) {
- if (i > expectedDocs.length - 1)
- fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
- if (hits.scoreDocs[i].doc != expectedDocs[i])
- fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
- assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001);
- }
-
- if (hits.scoreDocs.length > expectedDocs.length)
- fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]);
- }
-
- @Test
- public void testTermQuery() throws IOException {
-
- SpanTermQuery q = new SpanTermQuery(new Term("field", "eighteen"));
- for (PayloadFunction fn
- : new PayloadFunction[]{ new AveragePayloadFunction(), new MaxPayloadFunction(), new MinPayloadFunction() }) {
- checkQuery(q, fn, new int[]{ 118, 218, 18 },
- new float[] { 4.0f, 4.0f, 2.0f });
- }
-
- }
-
- @Test
- public void testOrQuery() throws IOException {
-
- SpanOrQuery q = new SpanOrQuery(new SpanTermQuery(new Term("field", "eighteen")),
- new SpanTermQuery(new Term("field", "nineteen")));
- for (PayloadFunction fn
- : new PayloadFunction[]{ new AveragePayloadFunction(), new MaxPayloadFunction(), new MinPayloadFunction() }) {
- checkQuery(q, fn, new int[]{ 118, 119, 218, 219, 18, 19 },
- new float[] { 4.0f, 4.0f, 4.0f, 4.0f, 2.0f, 2.0f });
- }
-
- }
-
- @Test
- public void testNearQuery() throws IOException {
-
- // 2 4
- // twenty two
- // 2 4 4 4
- // one hundred twenty two
-
- SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
- new SpanTermQuery(new Term("field", "twenty")),
- new SpanTermQuery(new Term("field", "two"))
- }, 0, true);
-
- checkQuery(q, new MaxPayloadFunction(), new int[]{ 22, 122, 222 }, new float[]{ 4.0f, 4.0f, 4.0f });
- checkQuery(q, new MinPayloadFunction(), new int[]{ 122, 222, 22 }, new float[]{ 4.0f, 4.0f, 2.0f });
- checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222, 22 }, new float[] { 4.0f, 4.0f, 3.0f });
-
- }
-
- @Test
- public void testNestedNearQuery() throws Exception {
-
- // (one OR hundred) NEAR (twenty two) ~ 1
- // 2 4 4 4
- // one hundred twenty two
- // two hundred twenty two
-
- SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
- new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
- new SpanNearQuery(new SpanQuery[]{
- new SpanTermQuery(new Term("field", "twenty")),
- new SpanTermQuery(new Term("field", "two"))
- }, 0, true)
- }, 1, true);
-
- checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 4.0f, 4.0f });
- checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 4.0f, 2.0f });
- checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[]{ 4.0f, 3.666666f });
-
- }
-
- @Test
- public void testSpanContainingQuery() throws Exception {
-
- // twenty WITHIN ((one OR hundred) NEAR two)~2
- SpanContainingQuery q = new SpanContainingQuery(
- new SpanNearQuery(new SpanQuery[]{
- new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
- new SpanTermQuery(new Term("field", "two"))
- }, 2, true),
- new SpanTermQuery(new Term("field", "twenty"))
- );
-
- checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[]{ 4.0f, 3.666666f });
- checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 4.0f, 4.0f });
- checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 4.0f, 2.0f });
-
- }
-
- private static IndexSearcher searcher;
- private static IndexReader reader;
- private static Directory directory;
- private static BoostingSimilarity similarity = new BoostingSimilarity();
- private static byte[] payload2 = new byte[]{2};
- private static byte[] payload4 = new byte[]{4};
-
- private static class PayloadAnalyzer extends Analyzer {
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(result, new PayloadFilter(result));
- }
- }
-
- private static class PayloadFilter extends TokenFilter {
-
- private int numSeen = 0;
- private final PayloadAttribute payAtt;
-
- public PayloadFilter(TokenStream input) {
- super(input);
- payAtt = addAttribute(PayloadAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- boolean result = false;
- if (input.incrementToken()) {
- if (numSeen % 4 == 0) {
- payAtt.setPayload(new BytesRef(payload2));
- } else {
- payAtt.setPayload(new BytesRef(payload4));
- }
- numSeen++;
- result = true;
- }
- return result;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.numSeen = 0;
- }
- }
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer())
- .setMergePolicy(NoMergePolicy.INSTANCE)
- .setSimilarity(similarity));
- //writer.infoStream = System.out;
- for (int i = 0; i < 300; i++) {
- Document doc = new Document();
- doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
- String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1);
- doc.add(newTextField("field2", txt, Field.Store.YES));
- writer.addDocument(doc);
- }
- reader = writer.getReader();
- writer.close();
-
- searcher = newSearcher(reader);
- searcher.setSimilarity(similarity);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- searcher = null;
- reader.close();
- reader = null;
- directory.close();
- directory = null;
- }
-
- static class BoostingSimilarity extends ClassicSimilarity {
-
- @Override
- public float queryNorm(float sumOfSquaredWeights) {
- return 1.0f;
- }
-
- @Override
- public float coord(int overlap, int maxOverlap) {
- return 1.0f;
- }
-
- @Override
- public float scorePayload(int docId, int start, int end, BytesRef payload) {
- //we know it is size 4 here, so ignore the offset/length
- return payload.bytes[payload.offset];
- }
-
- //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- //Make everything else 1 so we see the effect of the payload
- //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- @Override
- public float lengthNorm(FieldInvertState state) {
- return state.getBoost();
- }
-
- @Override
- public float sloppyFreq(int distance) {
- return 1.0f;
- }
-
- @Override
- public float tf(float freq) {
- return 1.0f;
- }
-
- // idf used for phrase queries
- @Override
- public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
- return Explanation.match(1.0f, "Inexplicable");
- }
-
- @Override
- public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
- return Explanation.match(1.0f, "Inexplicable");
- }
-
- }
-
-}
Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java (working copy)
@@ -1,551 +0,0 @@
-package org.apache.lucene.search.payloads;
-
-/*
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.spans.MultiSpansWrapper;
-import org.apache.lucene.search.spans.SpanFirstQuery;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanNotQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.SpanWeight;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
-
-public class TestPayloadSpans extends LuceneTestCase {
- private IndexSearcher searcher;
- private Similarity similarity = new ClassicSimilarity();
- protected IndexReader indexReader;
- private IndexReader closeIndexReader;
- private Directory directory;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- PayloadHelper helper = new PayloadHelper();
- searcher = helper.setUp(random(), similarity, 1000);
- indexReader = searcher.getIndexReader();
- }
-
- public void testSpanTermQuery() throws Exception {
- SpanTermQuery stq;
- Spans spans;
- stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
- PayloadSpanCollector collector = new PayloadSpanCollector();
- spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 100, 1, 1, 1);
-
- stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
- spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 100, 0, 0, 0);
- }
-
- public void testSpanFirst() throws IOException {
-
- SpanQuery match;
- SpanFirstQuery sfq;
- match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
- sfq = new SpanFirstQuery(match, 2);
- PayloadSpanCollector collector = new PayloadSpanCollector();
- Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS);
- checkSpans(spans, collector, 109, 1, 1, 1);
- //Test more complicated subclause
- SpanQuery[] clauses = new SpanQuery[2];
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
- match = new SpanNearQuery(clauses, 0, true);
- sfq = new SpanFirstQuery(match, 2);
- checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
-
- match = new SpanNearQuery(clauses, 0, false);
- sfq = new SpanFirstQuery(match, 2);
- checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
-
- }
-
- public void testSpanNot() throws Exception {
- SpanQuery[] clauses = new SpanQuery[2];
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
- SpanQuery spq = new SpanNearQuery(clauses, 5, true);
- SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));
-
-
-
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
-
- Document doc = new Document();
- doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
- writer.addDocument(doc);
- IndexReader reader = writer.getReader();
- writer.close();
-
- PayloadSpanCollector collector = new PayloadSpanCollector();
- checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), collector, 1, new int[]{2});
- reader.close();
- directory.close();
- }
-
- public void testNestedSpans() throws Exception {
- SpanTermQuery stq;
- Spans spans;
- IndexSearcher searcher = getSearcher();
- PayloadSpanCollector collector = new PayloadSpanCollector();
-
- stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, SpanWeight.Postings.PAYLOADS);
- assertNull(spans);
-
- SpanQuery[] clauses = new SpanQuery[3];
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
- clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
- SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false);
-
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{3,3});
-
-
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
- clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
-
- spanNearQuery = new SpanNearQuery(clauses, 6, true);
-
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
-
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 1, new int[]{3});
-
- clauses = new SpanQuery[2];
-
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr"));
-
- spanNearQuery = new SpanNearQuery(clauses, 6, true);
-
- // xx within 6 of rr
-
- SpanQuery[] clauses2 = new SpanQuery[2];
-
- clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy"));
- clauses2[1] = spanNearQuery;
-
- SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false);
-
- // yy within 6 of xx within 6 of rr
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{3,3});
- closeIndexReader.close();
- directory.close();
- }
-
- public void testFirstClauseWithoutPayload() throws Exception {
- Spans spans;
- IndexSearcher searcher = getSearcher();
-
- SpanQuery[] clauses = new SpanQuery[3];
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq"));
- clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss"));
-
- SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true);
-
- SpanQuery[] clauses2 = new SpanQuery[2];
-
- clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp"));
- clauses2[1] = spanNearQuery;
-
- SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false);
-
- SpanQuery[] clauses3 = new SpanQuery[2];
-
- clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
- clauses3[1] = snq;
-
- PayloadSpanCollector collector = new PayloadSpanCollector();
- SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
-
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 1, new int[]{3});
- closeIndexReader.close();
- directory.close();
- }
-
- public void testHeavilyNestedSpanQuery() throws Exception {
- Spans spans;
- IndexSearcher searcher = getSearcher();
-
- SpanQuery[] clauses = new SpanQuery[3];
- clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"));
- clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
-
- SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true);
-
- clauses = new SpanQuery[3];
- clauses[0] = spanNearQuery;
- clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five"));
- clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six"));
-
- SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true);
-
- SpanQuery[] clauses2 = new SpanQuery[2];
- clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven"));
- clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten"));
- SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false);
-
- SpanQuery[] clauses3 = new SpanQuery[3];
- clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine"));
- clauses3[1] = spanNearQuery2;
- clauses3[2] = spanNearQuery3;
-
- SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
-
- PayloadSpanCollector collector = new PayloadSpanCollector();
- spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
- assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{8, 8});
- closeIndexReader.close();
- directory.close();
- }
-
- public void testShrinkToAfterShortestMatch() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new TestPayloadAnalyzer()));
-
- Document doc = new Document();
- doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k")));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- IndexSearcher is = newSearcher(reader);
- writer.close();
-
- SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
- SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
- SpanQuery[] sqs = { stq1, stq2 };
- SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
- Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
-
- TopDocs topDocs = is.search(snq, 1);
- Set<String> payloadSet = new HashSet<>();
- for (int i = 0; i < topDocs.scoreDocs.length; i++) {
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- collector.reset();
- spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
- }
- }
- }
- }
- assertEquals(2, payloadSet.size());
- assertTrue(payloadSet.contains("a:Noise:10"));
- assertTrue(payloadSet.contains("k:Noise:11"));
- reader.close();
- directory.close();
- }
-
- public void testShrinkToAfterShortestMatch2() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new TestPayloadAnalyzer()));
-
- Document doc = new Document();
- doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k")));
- writer.addDocument(doc);
- IndexReader reader = writer.getReader();
- IndexSearcher is = newSearcher(reader);
- writer.close();
-
- SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
- SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
- SpanQuery[] sqs = { stq1, stq2 };
- SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
- Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
-
- TopDocs topDocs = is.search(snq, 1);
- Set<String> payloadSet = new HashSet<>();
- for (int i = 0; i < topDocs.scoreDocs.length; i++) {
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- collector.reset();
- spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
-
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
- }
- }
- }
- }
- assertEquals(2, payloadSet.size());
- assertTrue(payloadSet.contains("a:Noise:10"));
- assertTrue(payloadSet.contains("k:Noise:11"));
- reader.close();
- directory.close();
- }
-
- public void testShrinkToAfterShortestMatch3() throws IOException {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new TestPayloadAnalyzer()));
-
- Document doc = new Document();
- doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a")));
- writer.addDocument(doc);
- IndexReader reader = writer.getReader();
- IndexSearcher is = newSearcher(reader);
- writer.close();
-
- SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
- SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
- SpanQuery[] sqs = { stq1, stq2 };
- SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
- Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
-
- TopDocs topDocs = is.search(snq, 1);
- Set<String> payloadSet = new HashSet<>();
- for (int i = 0; i < topDocs.scoreDocs.length; i++) {
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- collector.reset();
- spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
-
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
- }
- }
- }
- }
- assertEquals(2, payloadSet.size());
- if(VERBOSE) {
- for (final String payload : payloadSet)
- System.out.println("match:" + payload);
-
- }
- assertTrue(payloadSet.contains("a:Noise:10"));
- assertTrue(payloadSet.contains("k:Noise:11"));
- reader.close();
- directory.close();
- }
-
- public void testPayloadSpanUtil() throws Exception {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
-
- Document doc = new Document();
- doc.add(newTextField(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
- IndexSearcher searcher = newSearcher(reader);
-
- PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
-
- Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
- if(VERBOSE) {
- System.out.println("Num payloads:" + payloads.size());
- for (final byte [] bytes : payloads) {
- System.out.println(new String(bytes, StandardCharsets.UTF_8));
- }
- }
- reader.close();
- directory.close();
- }
-
- private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads,
- int expectedPayloadLength, int expectedFirstByte) throws IOException {
- assertTrue("spans is null and it shouldn't be", spans != null);
- //each position match should have a span associated with it, since there is just one underlying term query, there should
- //only be one entry in the span
- int seen = 0;
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- collector.reset();
- spans.collect(collector);
-
- Collection<byte[]> payload = collector.getPayloads();
- assertEquals("payload size", expectedNumPayloads, payload.size());
- for (final byte [] thePayload : payload) {
- assertEquals("payload length", expectedPayloadLength, thePayload.length);
- assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
- }
-
- seen++;
- }
- }
- assertEquals("expectedNumSpans", expectedNumSpans, seen);
- }
-
- private IndexSearcher getSearcher() throws Exception {
- directory = newDirectory();
- String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
-
- Document doc = null;
- for(int i = 0; i < docs.length; i++) {
- doc = new Document();
- String docText = docs[i];
- doc.add(newTextField(PayloadHelper.FIELD, docText, Field.Store.YES));
- writer.addDocument(doc);
- }
-
- closeIndexReader = writer.getReader();
- writer.close();
-
- IndexSearcher searcher = newSearcher(closeIndexReader);
- return searcher;
- }
-
- private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException {
- int cnt = 0;
-
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- if(VERBOSE)
- System.out.println("\nSpans Dump --");
- collector.reset();
- spans.collect(collector);
-
- Collection<byte[]> payload = collector.getPayloads();
- if(VERBOSE) {
- System.out.println("payloads for span:" + payload.size());
- for (final byte [] bytes : payload) {
- System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
- + new String(bytes, StandardCharsets.UTF_8));
- }
- }
- assertEquals("payload size", numPayloads[cnt], payload.size());
-
- cnt++;
- }
- }
-
- assertEquals("expected numSpans", numSpans, cnt);
- }
-
- final class PayloadAnalyzer extends Analyzer {
-
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(result, new PayloadFilter(result));
- }
- }
-
- final class PayloadFilter extends TokenFilter {
- Set<String> entities = new HashSet<>();
- Set<String> nopayload = new HashSet<>();
- int pos;
- PayloadAttribute payloadAtt;
- CharTermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
-
- public PayloadFilter(TokenStream input) {
- super(input);
- pos = 0;
- entities.add("xx");
- entities.add("one");
- nopayload.add("nopayload");
- nopayload.add("np");
- termAtt = addAttribute(CharTermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- payloadAtt = addAttribute(PayloadAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- String token = termAtt.toString();
-
- if (!nopayload.contains(token)) {
- if (entities.contains(token)) {
- payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
- } else {
- payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
- }
- }
- pos += posIncrAtt.getPositionIncrement();
- return true;
- }
- return false;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.pos = 0;
- }
- }
-
- public final class TestPayloadAnalyzer extends Analyzer {
-
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(result, new PayloadFilter(result));
- }
- }
-}
Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (working copy)
@@ -1,308 +0,0 @@
-package org.apache.lucene.search.payloads;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.CheckHits;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.QueryUtils;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.search.spans.MultiSpansWrapper;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.English;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-
-/**
- *
- *
- **/
-public class TestPayloadTermQuery extends LuceneTestCase {
- private static IndexSearcher searcher;
- private static IndexReader reader;
- private static Similarity similarity = new BoostingSimilarity();
- private static final byte[] payloadField = new byte[]{1};
- private static final byte[] payloadMultiField1 = new byte[]{2};
- private static final byte[] payloadMultiField2 = new byte[]{4};
- protected static Directory directory;
-
- private static class PayloadAnalyzer extends Analyzer {
-
- private PayloadAnalyzer() {
- super(PER_FIELD_REUSE_STRATEGY);
- }
-
- @Override
- public TokenStreamComponents createComponents(String fieldName) {
- Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
- return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
- }
- }
-
- private static class PayloadFilter extends TokenFilter {
- private final String fieldName;
- private int numSeen = 0;
-
- private final PayloadAttribute payloadAtt;
-
- public PayloadFilter(TokenStream input, String fieldName) {
- super(input);
- this.fieldName = fieldName;
- payloadAtt = addAttribute(PayloadAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- boolean hasNext = input.incrementToken();
- if (hasNext) {
- if (fieldName.equals("field")) {
- payloadAtt.setPayload(new BytesRef(payloadField));
- } else if (fieldName.equals("multiField")) {
- if (numSeen % 2 == 0) {
- payloadAtt.setPayload(new BytesRef(payloadMultiField1));
- } else {
- payloadAtt.setPayload(new BytesRef(payloadMultiField2));
- }
- numSeen++;
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.numSeen = 0;
- }
- }
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer())
- .setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
- //writer.infoStream = System.out;
- for (int i = 0; i < 1000; i++) {
- Document doc = new Document();
- Field noPayloadField = newTextField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES);
- //noPayloadField.setBoost(0);
- doc.add(noPayloadField);
- doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
- doc.add(newTextField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
- writer.addDocument(doc);
- }
- reader = writer.getReader();
- writer.close();
-
- searcher = newSearcher(reader);
- searcher.setSimilarity(similarity);
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- searcher = null;
- reader.close();
- reader = null;
- directory.close();
- directory = null;
- }
-
- public void test() throws IOException {
- SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term("field", "seventy")),
- new MaxPayloadFunction());
- TopDocs hits = searcher.search(query, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
-
- //they should all have the exact same score, because they all contain seventy once, and we set
- //all the other similarity factors to be 1
-
- assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
- for (int i = 0; i < hits.scoreDocs.length; i++) {
- ScoreDoc doc = hits.scoreDocs[i];
- assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
- }
- CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
- Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
- assertTrue("spans is null and it shouldn't be", spans != null);
- /*float score = hits.score(0);
- for (int i =1; i < hits.length(); i++)
- {
- assertTrue("scores are not equal and they should be", score == hits.score(i));
- }*/
-
- }
-
- public void testQuery() {
- SpanQuery boostingFuncTermQuery = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
- new MaxPayloadFunction());
- QueryUtils.check(boostingFuncTermQuery);
-
- SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
-
- assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
-
- SpanQuery boostingFuncTermQuery2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
- new AveragePayloadFunction());
-
- QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
- }
-
- public void testMultipleMatchesPerDoc() throws Exception {
- SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")),
- new MaxPayloadFunction());
- TopDocs hits = searcher.search(query, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
-
- //they should all have the exact same score, because they all contain seventy once, and we set
- //all the other similarity factors to be 1
-
- //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
- assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
- //there should be exactly 10 items that score a 4, all the rest should score a 2
- //The 10 items are: 70 + i*100 where i in [0-9]
- int numTens = 0;
- for (int i = 0; i < hits.scoreDocs.length; i++) {
- ScoreDoc doc = hits.scoreDocs[i];
- if (doc.doc % 10 == 0) {
- numTens++;
- assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
- } else {
- assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
- }
- }
- assertTrue(numTens + " does not equal: " + 10, numTens == 10);
- CheckHits.checkExplanations(query, "field", searcher, true);
- Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query);
- assertTrue("spans is null and it shouldn't be", spans != null);
- //should be two matches per document
- int count = 0;
- //100 hits times 2 matches per hit, we should have 200 in count
- while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
- while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
- count++;
- }
- }
- assertTrue(count + " does not equal: " + 200, count == 200);
- }
-
- public void testNoMatch() throws Exception {
- SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.FIELD, "junk")),
- new MaxPayloadFunction());
- TopDocs hits = searcher.search(query, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
-
- }
-
- public void testNoPayload() throws Exception {
- SpanQuery q1 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero")),
- new MaxPayloadFunction());
- SpanQuery q2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo")),
- new MaxPayloadFunction());
- BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
- BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
- BooleanQuery.Builder query = new BooleanQuery.Builder();
- query.add(c1);
- query.add(c2);
- TopDocs hits = searcher.search(query.build(), 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
- int[] results = new int[1];
- results[0] = 0;//hits.scoreDocs[0].doc;
- CheckHits.checkHitCollector(random(), query.build(), PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
- }
-
- static class BoostingSimilarity extends ClassicSimilarity {
-
- @Override
- public float queryNorm(float sumOfSquaredWeights) {
- return 1;
- }
-
- @Override
- public float coord(int overlap, int maxOverlap) {
- return 1;
- }
-
- // TODO: Remove warning after API has been finalized
- @Override
- public float scorePayload(int docId, int start, int end, BytesRef payload) {
- //we know it is size 4 here, so ignore the offset/length
- return payload.bytes[payload.offset];
- }
-
- //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- //Make everything else 1 so we see the effect of the payload
- //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- @Override
- public float lengthNorm(FieldInvertState state) {
- return state.getBoost();
- }
-
- @Override
- public float sloppyFreq(int distance) {
- return 1;
- }
-
- @Override
- public float idf(long docFreq, long docCount) {
- return 1;
- }
-
- @Override
- public float tf(float freq) {
- return freq == 0 ? 0 : 1;
- }
- }
-
- static class FullSimilarity extends ClassicSimilarity{
- public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
- //we know it is size 4 here, so ignore the offset/length
- return payload[offset];
- }
- }
-
-}
Index: lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (revision 1703369)
+++ lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (working copy)
@@ -1,52 +0,0 @@
-package org.apache.lucene.search.spans;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.SlowCompositeReaderWrapper;
-import org.apache.lucene.search.IndexSearcher;
-
-import java.io.IOException;
-
-/**
- *
- * A wrapper to perform span operations on a non-leaf reader context
- * <p>
- * NOTE: This should be used for testing purposes only
- * @lucene.internal
- */
-public class MultiSpansWrapper {
-
- public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException {
- return wrap(reader, spanQuery, SpanWeight.Postings.POSITIONS);
- }
-
- public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanWeight.Postings requiredPostings) throws IOException {
-
- LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing
- LeafReaderContext lrContext = lr.getContext();
- IndexSearcher searcher = new IndexSearcher(lr);
- searcher.setQueryCache(null);
-
- SpanWeight w = spanQuery.createWeight(searcher, false);
-
- return w.getSpans(lrContext, requiredPostings);
- }
-}
Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1703369)
+++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy)
@@ -83,7 +83,7 @@
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
-import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
+import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,7 +22,7 @@
* An abstract class that defines a way for PayloadScoreQuery instances to transform
* the cumulative effects of payload scores for a document.
*
- * @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information
+ * @see org.apache.lucene.queries.payloads.PayloadScoreQuery for more information
*
* @lucene.experimental This class and its derivations are experimental and subject to
* change
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java (revision 1703369)
+++ lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java (working copy)
@@ -20,8 +20,8 @@
* <p>
* The following Query implementations are provided:
* <ol>
- * <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
+ * <li>{@link org.apache.lucene.queries.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by
* a SpanQuery, boost the score based on the value of the payload located at those terms.</li>
* </ol>
*/
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Copyright 2004 The Apache Software Foundation
@@ -16,6 +16,13 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
@@ -32,11 +39,11 @@
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.MultiSpansWrapper;
+import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@@ -48,13 +55,6 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import java.io.IOException;
-import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
-
public class TestPayloadSpans extends LuceneTestCase {
private IndexSearcher searcher;
private Similarity similarity = new ClassicSimilarity();
@@ -74,15 +74,15 @@
SpanTermQuery stq;
Spans spans;
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy"));
- PayloadSpanCollector collector = new PayloadSpanCollector();
+
spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 100, 1, 1, 1);
+ checkSpans(spans, 100, 1, 1, 1);
stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy"));
spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 100, 0, 0, 0);
+ checkSpans(spans, 100, 0, 0, 0);
}
public void testSpanFirst() throws IOException {
@@ -91,9 +91,8 @@
SpanFirstQuery sfq;
match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
sfq = new SpanFirstQuery(match, 2);
- PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS);
- checkSpans(spans, collector, 109, 1, 1, 1);
+ checkSpans(spans, 109, 1, 1, 1);
//Test more complicated subclause
SpanQuery[] clauses = new SpanQuery[2];
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
@@ -100,11 +99,11 @@
clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred"));
match = new SpanNearQuery(clauses, 0, true);
sfq = new SpanFirstQuery(match, 2);
- checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
+ checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
match = new SpanNearQuery(clauses, 0, false);
sfq = new SpanFirstQuery(match, 2);
- checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1);
+ checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1);
}
@@ -127,8 +126,7 @@
IndexReader reader = writer.getReader();
writer.close();
- PayloadSpanCollector collector = new PayloadSpanCollector();
- checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), collector, 1, new int[]{2});
+ checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), 1, new int[]{2});
reader.close();
directory.close();
}
@@ -137,7 +135,6 @@
SpanTermQuery stq;
Spans spans;
IndexSearcher searcher = getSearcher();
- PayloadSpanCollector collector = new PayloadSpanCollector();
stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark"));
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, SpanWeight.Postings.PAYLOADS);
@@ -151,7 +148,7 @@
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{3,3});
+ checkSpans(spans, 2, new int[]{3,3});
clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx"));
@@ -163,7 +160,7 @@
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 1, new int[]{3});
+ checkSpans(spans, 1, new int[]{3});
clauses = new SpanQuery[2];
@@ -184,7 +181,7 @@
// yy within 6 of xx within 6 of rr
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{3,3});
+ checkSpans(spans, 2, new int[]{3,3});
closeIndexReader.close();
directory.close();
}
@@ -212,12 +209,11 @@
clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np"));
clauses3[1] = snq;
- PayloadSpanCollector collector = new PayloadSpanCollector();
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 1, new int[]{3});
+ checkSpans(spans, 1, new int[]{3});
closeIndexReader.close();
directory.close();
}
@@ -252,10 +248,9 @@
SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false);
- PayloadSpanCollector collector = new PayloadSpanCollector();
spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS);
assertTrue("spans is null and it shouldn't be", spans != null);
- checkSpans(spans, collector, 2, new int[]{8, 8});
+ checkSpans(spans, 2, new int[]{8, 8});
closeIndexReader.close();
directory.close();
}
@@ -277,7 +272,7 @@
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
+ VerifyingCollector collector = new VerifyingCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
@@ -287,9 +282,8 @@
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
+ for (final BytesRef payload : collector.payloads) {
+ payloadSet.add(Term.toString(payload));
}
}
}
@@ -317,7 +311,7 @@
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
+ VerifyingCollector collector = new VerifyingCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
@@ -327,10 +321,8 @@
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
-
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
+ for (final BytesRef payload: collector.payloads) {
+ payloadSet.add(Term.toString(payload));
}
}
}
@@ -358,20 +350,18 @@
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = { stq1, stq2 };
SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
- PayloadSpanCollector collector = new PayloadSpanCollector();
Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS);
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<>();
+ VerifyingCollector collector = new VerifyingCollector();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
- Collection<byte[]> payloads = collector.getPayloads();
-
- for (final byte [] payload : payloads) {
- payloadSet.add(new String(payload, StandardCharsets.UTF_8));
+ for (final BytesRef payload : collector.payloads) {
+ payloadSet.add(Term.toString(payload));
}
}
}
@@ -387,57 +377,51 @@
reader.close();
directory.close();
}
-
- public void testPayloadSpanUtil() throws Exception {
- Directory directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
- newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
- Document doc = new Document();
- doc.add(newTextField(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
- IndexSearcher searcher = newSearcher(reader);
+ static class VerifyingCollector implements SpanCollector {
- PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
-
- Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
- if(VERBOSE) {
- System.out.println("Num payloads:" + payloads.size());
- for (final byte [] bytes : payloads) {
- System.out.println(new String(bytes, StandardCharsets.UTF_8));
+ List<BytesRef> payloads = new ArrayList<>();
+
+ @Override
+ public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
+ if (postings.getPayload() != null) {
+ payloads.add(BytesRef.deepCopyOf(postings.getPayload()));
}
}
- reader.close();
- directory.close();
+
+ @Override
+ public void reset() {
+ payloads.clear();
+ }
+
+ public void verify(int expectedLength, int expectedFirstByte) {
+ for (BytesRef payload : payloads) {
+ assertEquals("Incorrect payload length", expectedLength, payload.length);
+ assertEquals("Incorrect first byte", expectedFirstByte, payload.bytes[0]);
+ }
+ }
}
- private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads,
+ private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads,
int expectedPayloadLength, int expectedFirstByte) throws IOException {
assertTrue("spans is null and it shouldn't be", spans != null);
//each position match should have a span associated with it, since there is just one underlying term query, there should
//only be one entry in the span
+ VerifyingCollector collector = new VerifyingCollector();
int seen = 0;
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
collector.reset();
spans.collect(collector);
-
- Collection<byte[]> payload = collector.getPayloads();
- assertEquals("payload size", expectedNumPayloads, payload.size());
- for (final byte [] thePayload : payload) {
- assertEquals("payload length", expectedPayloadLength, thePayload.length);
- assertEquals("payload first byte", expectedFirstByte, thePayload[0]);
- }
-
+ collector.verify(expectedPayloadLength, expectedFirstByte);
+ assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size());
seen++;
}
}
assertEquals("expectedNumSpans", expectedNumSpans, seen);
}
-
+
+
private IndexSearcher getSearcher() throws Exception {
directory = newDirectory();
String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"};
@@ -459,9 +443,9 @@
return searcher;
}
- private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException {
+ private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException {
int cnt = 0;
-
+ VerifyingCollector collector = new VerifyingCollector();
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
if(VERBOSE)
@@ -468,17 +452,8 @@
System.out.println("\nSpans Dump --");
collector.reset();
spans.collect(collector);
+ assertEquals("payload size", numPayloads[cnt], collector.payloads.size());
- Collection<byte[]> payload = collector.getPayloads();
- if(VERBOSE) {
- System.out.println("payloads for span:" + payload.size());
- for (final byte [] bytes : payload) {
- System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " "
- + new String(bytes, StandardCharsets.UTF_8));
- }
- }
- assertEquals("payload size", numPayloads[cnt], payload.size());
-
cnt++;
}
}
Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java (revision 1703369)
+++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.queries.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java (revision 1703369)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java (working copy)
@@ -3,8 +3,8 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
-import org.apache.lucene.search.payloads.AveragePayloadFunction;
-import org.apache.lucene.search.payloads.PayloadScoreQuery;
+import org.apache.lucene.queries.payloads.AveragePayloadFunction;
+import org.apache.lucene.queries.payloads.PayloadScoreQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
Index: lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java
===================================================================
--- lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java (revision 1703369)
+++ lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Index: lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java
===================================================================
--- lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java (revision 1703369)
+++ lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.search.payloads;
+package org.apache.lucene.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,6 +17,12 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
@@ -36,12 +42,6 @@
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
/**
* Experimental class to get set of payloads for most standard Lucene queries.
* Operates like Highlighter - IndexReader should only contain doc of interest,
Index: lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java
===================================================================
--- lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java (revision 0)
+++ lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java (working copy)
@@ -0,0 +1,130 @@
+package org.apache.lucene.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestPayloadSpanUtil extends LuceneTestCase {
+
+ public static final String FIELD = "f";
+
+ public void testPayloadSpanUtil() throws Exception {
+ Directory directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
+ newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));
+
+ Document doc = new Document();
+ doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES));
+ writer.addDocument(doc);
+
+ IndexReader reader = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
+
+ Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
+ if(VERBOSE) {
+ System.out.println("Num payloads:" + payloads.size());
+ for (final byte [] bytes : payloads) {
+ System.out.println(new String(bytes, StandardCharsets.UTF_8));
+ }
+ }
+ reader.close();
+ directory.close();
+ }
+
+ final class PayloadAnalyzer extends Analyzer {
+
+ @Override
+ public TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result));
+ }
+ }
+
+ final class PayloadFilter extends TokenFilter {
+ Set<String> entities = new HashSet<>();
+ Set<String> nopayload = new HashSet<>();
+ int pos;
+ PayloadAttribute payloadAtt;
+ CharTermAttribute termAtt;
+ PositionIncrementAttribute posIncrAtt;
+
+ public PayloadFilter(TokenStream input) {
+ super(input);
+ pos = 0;
+ entities.add("xx");
+ entities.add("one");
+ nopayload.add("nopayload");
+ nopayload.add("np");
+ termAtt = addAttribute(CharTermAttribute.class);
+ posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ payloadAtt = addAttribute(PayloadAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ String token = termAtt.toString();
+
+ if (!nopayload.contains(token)) {
+ if (entities.contains(token)) {
+ payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
+ } else {
+ payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
+ }
+ }
+ pos += posIncrAtt.getPositionIncrement();
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ this.pos = 0;
+ }
+ }
+
+}
Property changes on: lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java (revision 1703369)
+++ lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -23,8 +25,6 @@
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.IndexSearcher;
-import java.io.IOException;
-
/**
*
* A wrapper to perform span operations on a non-leaf reader context
Index: solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
===================================================================
--- solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (revision 1703369)
+++ solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (working copy)
@@ -28,7 +28,7 @@
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
+import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;