| Index: lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (working copy) |
| @@ -24,9 +24,7 @@ |
| * The payload of a Token. |
| * <p> |
| * The payload is stored in the index at each position, and can |
| - * be used to influence scoring when using Payload-based queries |
| - * in the {@link org.apache.lucene.search.payloads} and |
| - * {@link org.apache.lucene.search.spans} packages. |
| + * be used to influence scoring when using Payload-based queries. |
| * <p> |
| * NOTE: because the payload will be stored at each position, it's usually |
| * best to use the minimum number of bytes necessary. Some codec implementations |
| Index: lucene/core/src/java/org/apache/lucene/search/package-info.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/package-info.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/package-info.java (working copy) |
| @@ -33,7 +33,7 @@ |
| * <h2>Search Basics</h2> |
| * <p> |
| * Lucene offers a wide variety of {@link org.apache.lucene.search.Query} implementations, most of which are in |
| - * this package, its subpackages ({@link org.apache.lucene.search.spans spans}, {@link org.apache.lucene.search.payloads payloads}), |
| + * this package, its subpackage ({@link org.apache.lucene.search.spans spans}, |
| * or the <a href="{@docRoot}/../queries/overview-summary.html">queries module</a>. These implementations can be combined in a wide |
| * variety of ways to provide complex querying capabilities along with information about where matches took place in the document |
| * collection. The <a href="#query">Query Classes</a> section below highlights some of the more important Query classes. For details |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (working copy) |
| @@ -1,57 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| - |
| -/** |
| - * Calculate the final score as the average score of all payloads seen. |
| - * <p> |
| - * Is thread safe and completely reusable. |
| - * |
| - **/ |
| -public class AveragePayloadFunction extends PayloadFunction{ |
| - |
| - @Override |
| - public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) { |
| - return currentPayloadScore + currentScore; |
| - } |
| - |
| - @Override |
| - public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) { |
| - return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1; |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - final int prime = 31; |
| - int result = 1; |
| - result = prime * result + this.getClass().hashCode(); |
| - return result; |
| - } |
| - |
| - @Override |
| - public boolean equals(Object obj) { |
| - if (this == obj) |
| - return true; |
| - if (obj == null) |
| - return false; |
| - if (getClass() != obj.getClass()) |
| - return false; |
| - return true; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (working copy) |
| @@ -1,60 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| - |
| -/** |
| - * Returns the maximum payload score seen, else 1 if there are no payloads on the doc. |
| - * <p> |
| - * Is thread safe and completely reusable. |
| - * |
| - **/ |
| -public class MaxPayloadFunction extends PayloadFunction { |
| - @Override |
| - public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) { |
| - if (numPayloadsSeen == 0) { |
| - return currentPayloadScore; |
| - } else { |
| - return Math.max(currentPayloadScore, currentScore); |
| - } |
| - } |
| - |
| - @Override |
| - public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) { |
| - return numPayloadsSeen > 0 ? payloadScore : 1; |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - final int prime = 31; |
| - int result = 1; |
| - result = prime * result + this.getClass().hashCode(); |
| - return result; |
| - } |
| - |
| - @Override |
| - public boolean equals(Object obj) { |
| - if (this == obj) |
| - return true; |
| - if (obj == null) |
| - return false; |
| - if (getClass() != obj.getClass()) |
| - return false; |
| - return true; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (working copy) |
| @@ -1,59 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -/** |
| - * Calculates the minimum payload seen |
| - * |
| - **/ |
| -public class MinPayloadFunction extends PayloadFunction { |
| - |
| - @Override |
| - public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) { |
| - if (numPayloadsSeen == 0) { |
| - return currentPayloadScore; |
| - } else { |
| - return Math.min(currentPayloadScore, currentScore); |
| - } |
| - } |
| - |
| - @Override |
| - public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) { |
| - return numPayloadsSeen > 0 ? payloadScore : 1; |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - final int prime = 31; |
| - int result = 1; |
| - result = prime * result + this.getClass().hashCode(); |
| - return result; |
| - } |
| - |
| - @Override |
| - public boolean equals(Object obj) { |
| - if (this == obj) |
| - return true; |
| - if (obj == null) |
| - return false; |
| - if (getClass() != obj.getClass()) |
| - return false; |
| - return true; |
| - } |
| - |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (working copy) |
| @@ -1,70 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.search.Explanation; |
| - |
| -/** |
| - * An abstract class that defines a way for PayloadScoreQuery instances to transform |
| - * the cumulative effects of payload scores for a document. |
| - * |
| - * @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information |
| - * |
| - * @lucene.experimental This class and its derivations are experimental and subject to |
| - * change |
| - * |
| - **/ |
| -public abstract class PayloadFunction { |
| - |
| - /** |
| - * Calculate the score up to this point for this doc and field |
| - * @param docId The current doc |
| - * @param field The field |
| - * @param start The start position of the matching Span |
| - * @param end The end position of the matching Span |
| - * @param numPayloadsSeen The number of payloads seen so far |
| - * @param currentScore The current score so far |
| - * @param currentPayloadScore The score for the current payload |
| - * @return The new current Score |
| - * |
| - * @see org.apache.lucene.search.spans.Spans |
| - */ |
| - public abstract float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore); |
| - |
| - /** |
| - * Calculate the final score for all the payloads seen so far for this doc/field |
| - * @param docId The current doc |
| - * @param field The current field |
| - * @param numPayloadsSeen The total number of payloads seen on this document |
| - * @param payloadScore The raw score for those payloads |
| - * @return The final score for the payloads |
| - */ |
| - public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore); |
| - |
| - public Explanation explain(int docId, String field, int numPayloadsSeen, float payloadScore){ |
| - return Explanation.match( |
| - docScore(docId, field, numPayloadsSeen, payloadScore), |
| - getClass().getSimpleName() + ".docScore()"); |
| - }; |
| - |
| - @Override |
| - public abstract int hashCode(); |
| - |
| - @Override |
| - public abstract boolean equals(Object o); |
| - |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java (working copy) |
| @@ -1,215 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.Map; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.PostingsEnum; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.TermContext; |
| -import org.apache.lucene.search.Explanation; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.Scorer; |
| -import org.apache.lucene.search.similarities.ClassicSimilarity; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.spans.SpanCollector; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanScorer; |
| -import org.apache.lucene.search.spans.SpanWeight; |
| -import org.apache.lucene.search.spans.Spans; |
| -import org.apache.lucene.util.BytesRef; |
| - |
| -/** |
| - * A Query class that uses a {@link PayloadFunction} to modify the score of a |
| - * wrapped SpanQuery |
| - * |
| - * NOTE: In order to take advantage of this with the default scoring implementation |
| - * ({@link ClassicSimilarity}), you must override {@link ClassicSimilarity#scorePayload(int, int, int, BytesRef)}, |
| - * which returns 1 by default. |
| - * |
| - * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef) |
| - */ |
| -public class PayloadScoreQuery extends SpanQuery { |
| - |
| - private final SpanQuery wrappedQuery; |
| - private final PayloadFunction function; |
| - |
| - /** |
| - * Creates a new PayloadScoreQuery |
| - * @param wrappedQuery the query to wrap |
| - * @param function a PayloadFunction to use to modify the scores |
| - */ |
| - public PayloadScoreQuery(SpanQuery wrappedQuery, PayloadFunction function) { |
| - this.wrappedQuery = wrappedQuery; |
| - this.function = function; |
| - } |
| - |
| - @Override |
| - public String getField() { |
| - return wrappedQuery.getField(); |
| - } |
| - |
| - @Override |
| - public String toString(String field) { |
| - return "PayloadSpanQuery[" + wrappedQuery.toString(field) + "; " + function.toString() + "]"; |
| - } |
| - |
| - @Override |
| - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { |
| - SpanWeight innerWeight = wrappedQuery.createWeight(searcher, needsScores); |
| - if (!needsScores) |
| - return innerWeight; |
| - return new PayloadSpanWeight(searcher, innerWeight); |
| - } |
| - |
| - @Override |
| - public boolean equals(Object o) { |
| - if (this == o) return true; |
| - if (!(o instanceof PayloadScoreQuery)) return false; |
| - if (!super.equals(o)) return false; |
| - |
| - PayloadScoreQuery that = (PayloadScoreQuery) o; |
| - |
| - if (wrappedQuery != null ? !wrappedQuery.equals(that.wrappedQuery) : that.wrappedQuery != null) return false; |
| - return !(function != null ? !function.equals(that.function) : that.function != null); |
| - |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - int result = super.hashCode(); |
| - result = 31 * result + (wrappedQuery != null ? wrappedQuery.hashCode() : 0); |
| - result = 31 * result + (function != null ? function.hashCode() : 0); |
| - return result; |
| - } |
| - |
| - private class PayloadSpanWeight extends SpanWeight { |
| - |
| - private final SpanWeight innerWeight; |
| - |
| - public PayloadSpanWeight(IndexSearcher searcher, SpanWeight innerWeight) throws IOException { |
| - super(PayloadScoreQuery.this, searcher, null); |
| - this.innerWeight = innerWeight; |
| - } |
| - |
| - @Override |
| - public void extractTermContexts(Map<Term, TermContext> contexts) { |
| - innerWeight.extractTermContexts(contexts); |
| - } |
| - |
| - @Override |
| - public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws IOException { |
| - return innerWeight.getSpans(ctx, requiredPostings.atLeast(Postings.PAYLOADS)); |
| - } |
| - |
| - @Override |
| - public Scorer scorer(LeafReaderContext context) throws IOException { |
| - Spans spans = getSpans(context, Postings.PAYLOADS); |
| - if (spans == null) |
| - return null; |
| - return new PayloadSpanScorer(spans, this, innerWeight.getSimScorer(context)); |
| - } |
| - |
| - @Override |
| - public void extractTerms(Set<Term> terms) { |
| - innerWeight.extractTerms(terms); |
| - } |
| - |
| - @Override |
| - public float getValueForNormalization() throws IOException { |
| - return innerWeight.getValueForNormalization(); |
| - } |
| - |
| - @Override |
| - public void normalize(float queryNorm, float topLevelBoost) { |
| - innerWeight.normalize(queryNorm, topLevelBoost); |
| - } |
| - |
| - @Override |
| - public Explanation explain(LeafReaderContext context, int doc) throws IOException { |
| - PayloadSpanScorer scorer = (PayloadSpanScorer) scorer(context); |
| - if (scorer == null || scorer.advance(doc) != doc) |
| - return Explanation.noMatch("No match"); |
| - |
| - SpanWeight innerWeight = ((PayloadSpanWeight)scorer.getWeight()).innerWeight; |
| - Explanation innerExpl = innerWeight.explain(context, doc); |
| - scorer.freq(); // force freq calculation |
| - Explanation payloadExpl = scorer.getPayloadExplanation(); |
| - |
| - return Explanation.match(scorer.scoreCurrentDoc(), "PayloadSpanQuery, product of:", innerExpl, payloadExpl); |
| - } |
| - } |
| - |
| - private class PayloadSpanScorer extends SpanScorer implements SpanCollector { |
| - |
| - private int payloadsSeen; |
| - private float payloadScore; |
| - |
| - private PayloadSpanScorer(Spans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException { |
| - super(spans, weight, docScorer); |
| - } |
| - |
| - @Override |
| - protected void doStartCurrentDoc() { |
| - payloadScore = 0; |
| - payloadsSeen = 0; |
| - } |
| - |
| - @Override |
| - protected void doCurrentSpans() throws IOException { |
| - spans.collect(this); |
| - } |
| - |
| - @Override |
| - public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { |
| - BytesRef payload = postings.getPayload(); |
| - if (payload == null) |
| - return; |
| - float payloadFactor = docScorer.computePayloadFactor(docID(), spans.startPosition(), spans.endPosition(), payload); |
| - payloadScore = function.currentScore(docID(), getField(), spans.startPosition(), spans.endPosition(), |
| - payloadsSeen, payloadScore, payloadFactor); |
| - payloadsSeen++; |
| - } |
| - |
| - protected float getPayloadScore() { |
| - return function.docScore(docID(), getField(), payloadsSeen, payloadScore); |
| - } |
| - |
| - protected Explanation getPayloadExplanation() { |
| - return function.explain(docID(), getField(), payloadsSeen, payloadScore); |
| - } |
| - |
| - protected float getSpanScore() throws IOException { |
| - return super.scoreCurrentDoc(); |
| - } |
| - |
| - @Override |
| - protected float scoreCurrentDoc() throws IOException { |
| - return getSpanScore() * getPayloadScore(); |
| - } |
| - |
| - @Override |
| - public void reset() { |
| - |
| - } |
| - } |
| - |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanCollector.java (working copy) |
| @@ -1,57 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.index.PostingsEnum; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.spans.SpanCollector; |
| -import org.apache.lucene.util.BytesRef; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Collection; |
| - |
| -/** |
| - * SpanCollector for collecting payloads |
| - */ |
| -public class PayloadSpanCollector implements SpanCollector { |
| - |
| - private final Collection<byte[]> payloads = new ArrayList<>(); |
| - |
| - @Override |
| - public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { |
| - BytesRef payload = postings.getPayload(); |
| - if (payload == null) |
| - return; |
| - final byte[] bytes = new byte[payload.length]; |
| - System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length); |
| - payloads.add(bytes); |
| - } |
| - |
| - @Override |
| - public void reset() { |
| - payloads.clear(); |
| - } |
| - |
| - /** |
| - * @return the collected payloads |
| - */ |
| - public Collection<byte[]> getPayloads() { |
| - return payloads; |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java (working copy) |
| @@ -1,191 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.IndexReaderContext; |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.BooleanClause; |
| -import org.apache.lucene.search.BooleanQuery; |
| -import org.apache.lucene.search.DisjunctionMaxQuery; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.MultiPhraseQuery; |
| -import org.apache.lucene.search.PhraseQuery; |
| -import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.TermQuery; |
| -import org.apache.lucene.search.spans.SpanNearQuery; |
| -import org.apache.lucene.search.spans.SpanOrQuery; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.search.spans.SpanWeight; |
| -import org.apache.lucene.search.spans.Spans; |
| - |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Collection; |
| -import java.util.Iterator; |
| -import java.util.List; |
| - |
| -/** |
| - * Experimental class to get set of payloads for most standard Lucene queries. |
| - * Operates like Highlighter - IndexReader should only contain doc of interest, |
| - * best to use MemoryIndex. |
| - * |
| - * @lucene.experimental |
| - * |
| - */ |
| -public class PayloadSpanUtil { |
| - private IndexReaderContext context; |
| - |
| - /** |
| - * @param context |
| - * that contains doc with payloads to extract |
| - * |
| - * @see IndexReader#getContext() |
| - */ |
| - public PayloadSpanUtil(IndexReaderContext context) { |
| - this.context = context; |
| - } |
| - |
| - /** |
| - * Query should be rewritten for wild/fuzzy support. |
| - * |
| - * @param query rewritten query |
| - * @return payloads Collection |
| - * @throws IOException if there is a low-level I/O error |
| - */ |
| - public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException { |
| - Collection<byte[]> payloads = new ArrayList<>(); |
| - queryToSpanQuery(query, payloads); |
| - return payloads; |
| - } |
| - |
| - private void queryToSpanQuery(Query query, Collection<byte[]> payloads) |
| - throws IOException { |
| - if (query instanceof BooleanQuery) { |
| - for (BooleanClause clause : (BooleanQuery) query) { |
| - if (!clause.isProhibited()) { |
| - queryToSpanQuery(clause.getQuery(), payloads); |
| - } |
| - } |
| - |
| - } else if (query instanceof PhraseQuery) { |
| - Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms(); |
| - SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; |
| - for (int i = 0; i < phraseQueryTerms.length; i++) { |
| - clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); |
| - } |
| - |
| - int slop = ((PhraseQuery) query).getSlop(); |
| - boolean inorder = false; |
| - |
| - if (slop == 0) { |
| - inorder = true; |
| - } |
| - |
| - SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); |
| - getPayloads(payloads, sp); |
| - } else if (query instanceof TermQuery) { |
| - SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm()); |
| - getPayloads(payloads, stq); |
| - } else if (query instanceof SpanQuery) { |
| - getPayloads(payloads, (SpanQuery) query); |
| - } else if (query instanceof DisjunctionMaxQuery) { |
| - |
| - for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator |
| - .hasNext();) { |
| - queryToSpanQuery(iterator.next(), payloads); |
| - } |
| - |
| - } else if (query instanceof MultiPhraseQuery) { |
| - final MultiPhraseQuery mpq = (MultiPhraseQuery) query; |
| - final List<Term[]> termArrays = mpq.getTermArrays(); |
| - final int[] positions = mpq.getPositions(); |
| - if (positions.length > 0) { |
| - |
| - int maxPosition = positions[positions.length - 1]; |
| - for (int i = 0; i < positions.length - 1; ++i) { |
| - if (positions[i] > maxPosition) { |
| - maxPosition = positions[i]; |
| - } |
| - } |
| - |
| - @SuppressWarnings({"rawtypes","unchecked"}) final List<Query>[] disjunctLists = |
| - new List[maxPosition + 1]; |
| - int distinctPositions = 0; |
| - |
| - for (int i = 0; i < termArrays.size(); ++i) { |
| - final Term[] termArray = termArrays.get(i); |
| - List<Query> disjuncts = disjunctLists[positions[i]]; |
| - if (disjuncts == null) { |
| - disjuncts = (disjunctLists[positions[i]] = new ArrayList<>( |
| - termArray.length)); |
| - ++distinctPositions; |
| - } |
| - for (final Term term : termArray) { |
| - disjuncts.add(new SpanTermQuery(term)); |
| - } |
| - } |
| - |
| - int positionGaps = 0; |
| - int position = 0; |
| - final SpanQuery[] clauses = new SpanQuery[distinctPositions]; |
| - for (int i = 0; i < disjunctLists.length; ++i) { |
| - List<Query> disjuncts = disjunctLists[i]; |
| - if (disjuncts != null) { |
| - clauses[position++] = new SpanOrQuery(disjuncts |
| - .toArray(new SpanQuery[disjuncts.size()])); |
| - } else { |
| - ++positionGaps; |
| - } |
| - } |
| - |
| - final int slop = mpq.getSlop(); |
| - final boolean inorder = (slop == 0); |
| - |
| - SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, |
| - inorder); |
| - getPayloads(payloads, sp); |
| - } |
| - } |
| - } |
| - |
| - private void getPayloads(Collection<byte []> payloads, SpanQuery query) |
| - throws IOException { |
| - |
| - final IndexSearcher searcher = new IndexSearcher(context); |
| - searcher.setQueryCache(null); |
| - |
| - SpanWeight w = (SpanWeight) searcher.createNormalizedWeight(query, false); |
| - |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - for (LeafReaderContext leafReaderContext : context.leaves()) { |
| - final Spans spans = w.getSpans(leafReaderContext, SpanWeight.Postings.PAYLOADS); |
| - if (spans != null) { |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - collector.reset(); |
| - spans.collect(collector); |
| - payloads.addAll(collector.getPayloads()); |
| - } |
| - } |
| - } |
| - } |
| - } |
| -} |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java (working copy) |
| @@ -1,189 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.util.List; |
| -import java.util.Map; |
| -import java.util.Set; |
| - |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.PostingsEnum; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.TermContext; |
| -import org.apache.lucene.index.Terms; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.Scorer; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.spans.FilterSpans; |
| -import org.apache.lucene.search.spans.FilterSpans.AcceptStatus; |
| -import org.apache.lucene.search.spans.SpanCollector; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanScorer; |
| -import org.apache.lucene.search.spans.SpanWeight; |
| -import org.apache.lucene.search.spans.Spans; |
| -import org.apache.lucene.util.BytesRef; |
| - |
| -/** |
| - * Only return those matches that have a specific payload at the given position. |
| - */ |
| -public class SpanPayloadCheckQuery extends SpanQuery { |
| - |
| - protected final List<BytesRef> payloadToMatch; |
| - protected final SpanQuery match; |
| - |
| - /** |
| - * @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check |
| - * @param payloadToMatch The {@link java.util.List} of payloads to match |
| - */ |
| - public SpanPayloadCheckQuery(SpanQuery match, List<BytesRef> payloadToMatch) { |
| - this.match = match; |
| - this.payloadToMatch = payloadToMatch; |
| - } |
| - |
| - @Override |
| - public String getField() { |
| - return match.getField(); |
| - } |
| - |
| - @Override |
| - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { |
| - SpanWeight matchWeight = match.createWeight(searcher, false); |
| - return new SpanPayloadCheckWeight(searcher, needsScores ? getTermContexts(matchWeight) : null, matchWeight); |
| - } |
| - |
| - /** |
| - * Weight that pulls its Spans using a PayloadSpanCollector |
| - */ |
| - public class SpanPayloadCheckWeight extends SpanWeight { |
| - |
| - final SpanWeight matchWeight; |
| - |
| - public SpanPayloadCheckWeight(IndexSearcher searcher, Map<Term, TermContext> termContexts, SpanWeight matchWeight) throws IOException { |
| - super(SpanPayloadCheckQuery.this, searcher, termContexts); |
| - this.matchWeight = matchWeight; |
| - } |
| - |
| - @Override |
| - public void extractTerms(Set<Term> terms) { |
| - matchWeight.extractTerms(terms); |
| - } |
| - |
| - @Override |
| - public void extractTermContexts(Map<Term, TermContext> contexts) { |
| - matchWeight.extractTermContexts(contexts); |
| - } |
| - |
| - @Override |
| - public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { |
| - final PayloadChecker collector = new PayloadChecker(); |
| - Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS)); |
| - return (matchSpans == null) ? null : new FilterSpans(matchSpans) { |
| - @Override |
| - protected AcceptStatus accept(Spans candidate) throws IOException { |
| - collector.reset(); |
| - candidate.collect(collector); |
| - return collector.match(); |
| - } |
| - }; |
| - } |
| - |
| - @Override |
| - public Scorer scorer(LeafReaderContext context) throws IOException { |
| - if (field == null) |
| - return null; |
| - |
| - Terms terms = context.reader().terms(field); |
| - if (terms != null && terms.hasPositions() == false) { |
| - throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run SpanQuery (query=" + parentQuery + ")"); |
| - } |
| - |
| - Spans spans = getSpans(context, Postings.PAYLOADS); |
| - Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context); |
| - return (spans == null) ? null : new SpanScorer(spans, this, simScorer); |
| - } |
| - } |
| - |
| - private class PayloadChecker implements SpanCollector { |
| - |
| - int upto = 0; |
| - boolean matches = true; |
| - |
| - @Override |
| - public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { |
| - if (!matches) |
| - return; |
| - if (upto >= payloadToMatch.size()) { |
| - matches = false; |
| - return; |
| - } |
| - BytesRef payload = postings.getPayload(); |
| - if (payloadToMatch.get(upto) == null) { |
| - matches = payload == null; |
| - upto++; |
| - return; |
| - } |
| - if (payload == null) { |
| - matches = false; |
| - upto++; |
| - return; |
| - } |
| - matches = payloadToMatch.get(upto).bytesEquals(payload); |
| - upto++; |
| - } |
| - |
| - AcceptStatus match() { |
| - return matches && upto == payloadToMatch.size() ? AcceptStatus.YES : AcceptStatus.NO; |
| - } |
| - |
| - @Override |
| - public void reset() { |
| - this.upto = 0; |
| - this.matches = true; |
| - } |
| - } |
| - |
| - @Override |
| - public String toString(String field) { |
| - StringBuilder buffer = new StringBuilder(); |
| - buffer.append("spanPayCheck("); |
| - buffer.append(match.toString(field)); |
| - buffer.append(", payloadRef: "); |
| - for (BytesRef bytes : payloadToMatch) { |
| - buffer.append(Term.toString(bytes)); |
| - buffer.append(';'); |
| - } |
| - buffer.append(")"); |
| - return buffer.toString(); |
| - } |
| - |
| - @Override |
| - public boolean equals(Object o) { |
| - if (! super.equals(o)) { |
| - return false; |
| - } |
| - SpanPayloadCheckQuery other = (SpanPayloadCheckQuery)o; |
| - return this.payloadToMatch.equals(other.payloadToMatch); |
| - } |
| - |
| - @Override |
| - public int hashCode() { |
| - int h = super.hashCode(); |
| - h = (h * 63) ^ payloadToMatch.hashCode(); |
| - return h; |
| - } |
| -} |
| \ No newline at end of file |
| Index: lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java (revision 1703369) |
| +++ lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java (working copy) |
| @@ -1,27 +0,0 @@ |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -/** |
| - * The payloads package provides Query mechanisms for finding and using payloads. |
| - * <p> |
| - * The following Query implementations are provided: |
| - * <ol> |
| - * <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by |
| - * a SpanQuery, boost the score based on the value of the payload located at those terms.</li> |
| - * </ol> |
| - */ |
| -package org.apache.lucene.search.payloads; |
| Index: lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy) |
| @@ -19,8 +19,8 @@ |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| -import java.nio.charset.StandardCharsets; |
| -import java.util.Collection; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockPayloadAnalyzer; |
| @@ -38,9 +38,8 @@ |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.payloads.PayloadSpanCollector; |
| -import org.apache.lucene.search.payloads.PayloadSpanUtil; |
| import org.apache.lucene.search.spans.MultiSpansWrapper; |
| +import org.apache.lucene.search.spans.SpanCollector; |
| import org.apache.lucene.search.spans.SpanNearQuery; |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| @@ -201,6 +200,22 @@ |
| store.close(); |
| } |
| |
| + static class PayloadSpanCollector implements SpanCollector { |
| + |
| + List<BytesRef> payloads = new ArrayList<>(); |
| + |
| + @Override |
| + public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { |
| + if (postings.getPayload() != null) |
| + payloads.add(BytesRef.deepCopyOf(postings.getPayload())); |
| + } |
| + |
| + @Override |
| + public void reset() { |
| + payloads.clear(); |
| + } |
| + } |
| + |
| public void testPayloadsPos0() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockPayloadAnalyzer()); |
| @@ -248,12 +263,11 @@ |
| } |
| collector.reset(); |
| pspans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| sawZero |= pspans.startPosition() == 0; |
| - for (byte[] bytes : payloads) { |
| + for (BytesRef payload : collector.payloads) { |
| count++; |
| if (VERBOSE) { |
| - System.out.println(" payload: " + new String(bytes, StandardCharsets.UTF_8)); |
| + System.out.println(" payload: " + Term.toString(payload)); |
| } |
| } |
| } |
| @@ -276,17 +290,6 @@ |
| assertEquals(4, count); |
| assertTrue(sawZero); |
| |
| - sawZero = false; |
| - PayloadSpanUtil psu = new PayloadSpanUtil(is.getTopReaderContext()); |
| - Collection<byte[]> pls = psu.getPayloadsForQuery(snq); |
| - count = pls.size(); |
| - for (byte[] bytes : pls) { |
| - String s = new String(bytes, StandardCharsets.UTF_8); |
| - //System.out.println(s); |
| - sawZero |= s.equals("pos: 0"); |
| - } |
| - assertEquals(8, count); |
| - assertTrue(sawZero); |
| writer.close(); |
| is.getIndexReader().close(); |
| dir.close(); |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (working copy) |
| @@ -1,141 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.analysis.*; |
| -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| -import org.apache.lucene.index.DirectoryReader; |
| -import org.apache.lucene.index.IndexWriterConfig; |
| -import org.apache.lucene.index.IndexWriter; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.document.TextField; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.English; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.store.MockDirectoryWrapper; |
| -import org.apache.lucene.store.RAMDirectory; |
| - |
| -import java.io.IOException; |
| -import java.util.Random; |
| - |
| -/** |
| - * |
| - * |
| - **/ |
| -public class PayloadHelper { |
| - |
| - private byte[] payloadField = new byte[]{1}; |
| - private byte[] payloadMultiField1 = new byte[]{2}; |
| - private byte[] payloadMultiField2 = new byte[]{4}; |
| - public static final String NO_PAYLOAD_FIELD = "noPayloadField"; |
| - public static final String MULTI_FIELD = "multiField"; |
| - public static final String FIELD = "field"; |
| - |
| - public IndexReader reader; |
| - |
| - public final class PayloadAnalyzer extends Analyzer { |
| - |
| - public PayloadAnalyzer() { |
| - super(PER_FIELD_REUSE_STRATEGY); |
| - } |
| - |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); |
| - } |
| - } |
| - |
| - public final class PayloadFilter extends TokenFilter { |
| - private final String fieldName; |
| - private int numSeen = 0; |
| - private final PayloadAttribute payloadAtt; |
| - |
| - public PayloadFilter(TokenStream input, String fieldName) { |
| - super(input); |
| - this.fieldName = fieldName; |
| - payloadAtt = addAttribute(PayloadAttribute.class); |
| - } |
| - |
| - @Override |
| - public boolean incrementToken() throws IOException { |
| - |
| - if (input.incrementToken()) { |
| - if (fieldName.equals(FIELD)) { |
| - payloadAtt.setPayload(new BytesRef(payloadField)); |
| - } else if (fieldName.equals(MULTI_FIELD)) { |
| - if (numSeen % 2 == 0) { |
| - payloadAtt.setPayload(new BytesRef(payloadMultiField1)); |
| - } |
| - else { |
| - payloadAtt.setPayload(new BytesRef(payloadMultiField2)); |
| - } |
| - numSeen++; |
| - } |
| - return true; |
| - } |
| - return false; |
| - } |
| - |
| - @Override |
| - public void reset() throws IOException { |
| - super.reset(); |
| - this.numSeen = 0; |
| - } |
| - } |
| - |
| - /** |
| - * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField |
| - * and analyzes them using the PayloadAnalyzer |
| - * @param similarity The Similarity class to use in the Searcher |
| - * @param numDocs The num docs to add |
| - * @return An IndexSearcher |
| - */ |
| - // TODO: randomize |
| - public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException { |
| - Directory directory = new MockDirectoryWrapper(random, new RAMDirectory()); |
| - PayloadAnalyzer analyzer = new PayloadAnalyzer(); |
| - |
| - // TODO randomize this |
| - IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( |
| - analyzer).setSimilarity(similarity)); |
| - // writer.infoStream = System.out; |
| - for (int i = 0; i < numDocs; i++) { |
| - Document doc = new Document(); |
| - doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES)); |
| - doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES)); |
| - doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES)); |
| - writer.addDocument(doc); |
| - } |
| - reader = DirectoryReader.open(writer, true); |
| - writer.close(); |
| - |
| - IndexSearcher searcher = LuceneTestCase.newSearcher(reader); |
| - searcher.setSimilarity(similarity); |
| - return searcher; |
| - } |
| - |
| - public void tearDown() throws Exception { |
| - reader.close(); |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (working copy) |
| @@ -1,192 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| -import java.nio.charset.StandardCharsets; |
| -import java.util.ArrayList; |
| -import java.util.Collections; |
| -import java.util.List; |
| - |
| -import org.apache.lucene.analysis.Analyzer; |
| -import org.apache.lucene.analysis.MockTokenizer; |
| -import org.apache.lucene.analysis.SimplePayloadFilter; |
| -import org.apache.lucene.analysis.Tokenizer; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.CheckHits; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.spans.SpanNearQuery; |
| -import org.apache.lucene.search.spans.SpanPositionRangeQuery; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.English; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.apache.lucene.util.TestUtil; |
| -import org.junit.AfterClass; |
| -import org.junit.BeforeClass; |
| - |
| -/** basic test of payload-spans */ |
| -public class TestPayloadCheckQuery extends LuceneTestCase { |
| - private static IndexSearcher searcher; |
| - private static IndexReader reader; |
| - private static Directory directory; |
| - |
| - @BeforeClass |
| - public static void beforeClass() throws Exception { |
| - Analyzer simplePayloadAnalyzer = new Analyzer() { |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer)); |
| - } |
| - }; |
| - |
| - directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(simplePayloadAnalyzer) |
| - .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy())); |
| - //writer.infoStream = System.out; |
| - for (int i = 0; i < 2000; i++) { |
| - Document doc = new Document(); |
| - doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES)); |
| - writer.addDocument(doc); |
| - } |
| - reader = writer.getReader(); |
| - searcher = newSearcher(reader); |
| - writer.close(); |
| - } |
| - |
| - @AfterClass |
| - public static void afterClass() throws Exception { |
| - reader.close(); |
| - directory.close(); |
| - searcher = null; |
| - reader = null; |
| - directory = null; |
| - } |
| - |
| - private void checkHits(Query query, int[] results) throws IOException { |
| - CheckHits.checkHits(random(), query, "field", searcher, results); |
| - } |
| - |
| - public void testSpanPayloadCheck() throws Exception { |
| - SpanQuery term1 = new SpanTermQuery(new Term("field", "five")); |
| - BytesRef pay = new BytesRef("pos: " + 5); |
| - SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay)); |
| - checkHits(query, new int[] |
| - {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995}); |
| - assertTrue(searcher.explain(query, 1125).getValue() > 0.0f); |
| - |
| - SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred")); |
| - SpanNearQuery snq; |
| - SpanQuery[] clauses; |
| - List<BytesRef> list; |
| - BytesRef pay2; |
| - clauses = new SpanQuery[2]; |
| - clauses[0] = term1; |
| - clauses[1] = term2; |
| - snq = new SpanNearQuery(clauses, 0, true); |
| - pay = new BytesRef("pos: " + 0); |
| - pay2 = new BytesRef("pos: " + 1); |
| - list = new ArrayList<>(); |
| - list.add(pay); |
| - list.add(pay2); |
| - query = new SpanPayloadCheckQuery(snq, list); |
| - checkHits(query, new int[] |
| - {500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599}); |
| - clauses = new SpanQuery[3]; |
| - clauses[0] = term1; |
| - clauses[1] = term2; |
| - clauses[2] = new SpanTermQuery(new Term("field", "five")); |
| - snq = new SpanNearQuery(clauses, 0, true); |
| - pay = new BytesRef("pos: " + 0); |
| - pay2 = new BytesRef("pos: " + 1); |
| - BytesRef pay3 = new BytesRef("pos: " + 2); |
| - list = new ArrayList<>(); |
| - list.add(pay); |
| - list.add(pay2); |
| - list.add(pay3); |
| - query = new SpanPayloadCheckQuery(snq, list); |
| - checkHits(query, new int[] |
| - {505}); |
| - } |
| - |
| - public void testUnorderedPayloadChecks() throws Exception { |
| - |
| - SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five")); |
| - SpanTermQuery term100 = new SpanTermQuery(new Term("field", "hundred")); |
| - SpanTermQuery term4 = new SpanTermQuery(new Term("field", "four")); |
| - SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[]{term5, term100, term4}, 0, false); |
| - |
| - List<BytesRef> payloads = new ArrayList<>(); |
| - payloads.add(new BytesRef("pos: " + 2)); |
| - payloads.add(new BytesRef("pos: " + 1)); |
| - payloads.add(new BytesRef("pos: " + 0)); |
| - |
| - SpanPayloadCheckQuery payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads); |
| - checkHits(payloadQuery, new int[]{ 405 }); |
| - |
| - payloads.clear(); |
| - payloads.add(new BytesRef("pos: " + 0)); |
| - payloads.add(new BytesRef("pos: " + 1)); |
| - payloads.add(new BytesRef("pos: " + 2)); |
| - |
| - payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads); |
| - checkHits(payloadQuery, new int[]{ 504 }); |
| - |
| - } |
| - |
| - public void testComplexSpanChecks() throws Exception { |
| - SpanTermQuery one = new SpanTermQuery(new Term("field", "one")); |
| - SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand")); |
| - //should be one position in between |
| - SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred")); |
| - SpanTermQuery three = new SpanTermQuery(new Term("field", "three")); |
| - |
| - SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[]{one, thous}, 0, true); |
| - SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[]{hundred, three}, 0, true); |
| - SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[]{oneThous, hundredThree}, 1, true); |
| - SpanQuery query; |
| - //this one's too small |
| - query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2); |
| - checkHits(query, new int[]{}); |
| - //this one's just right |
| - query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6); |
| - checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); |
| - |
| - List<BytesRef> payloads = new ArrayList<>(); |
| - BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8)); |
| - BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8)); |
| - BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8)); |
| - BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8)); |
| - payloads.add(pay); |
| - payloads.add(pay2); |
| - payloads.add(pay3); |
| - payloads.add(pay4); |
| - query = new SpanPayloadCheckQuery(oneThousHunThree, payloads); |
| - checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); |
| - |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java (working copy) |
| @@ -1,127 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.BaseExplanationTestCase; |
| -import org.apache.lucene.search.similarities.ClassicSimilarity; |
| -import org.apache.lucene.search.spans.SpanBoostQuery; |
| -import org.apache.lucene.search.spans.SpanNearQuery; |
| -import org.apache.lucene.search.spans.SpanOrQuery; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.util.BytesRef; |
| - |
| -/** |
| - * TestExplanations subclass focusing on payload queries |
| - */ |
| -public class TestPayloadExplanations extends BaseExplanationTestCase { |
| - |
| - private static PayloadFunction functions[] = new PayloadFunction[] { |
| - new AveragePayloadFunction(), |
| - new MinPayloadFunction(), |
| - new MaxPayloadFunction(), |
| - }; |
| - |
| - @Override |
| - public void setUp() throws Exception { |
| - super.setUp(); |
| - searcher.setSimilarity(new ClassicSimilarity() { |
| - @Override |
| - public float scorePayload(int doc, int start, int end, BytesRef payload) { |
| - return 1 + (payload.hashCode() % 10); |
| - } |
| - }); |
| - } |
| - |
| - /** macro for payloadscorequery */ |
| - private SpanQuery pt(String s, PayloadFunction fn) { |
| - return new PayloadScoreQuery(new SpanTermQuery(new Term(FIELD,s)), fn); |
| - } |
| - |
| - /* simple PayloadTermQueries */ |
| - |
| - public void testPT1() throws Exception { |
| - for (PayloadFunction fn : functions) { |
| - qtest(pt("w1", fn), new int[] {0,1,2,3}); |
| - } |
| - } |
| - |
| - public void testPT2() throws Exception { |
| - for (PayloadFunction fn : functions) { |
| - SpanQuery q = pt("w1", fn); |
| - qtest(new SpanBoostQuery(q, 1000), new int[] {0,1,2,3}); |
| - } |
| - } |
| - |
| - public void testPT4() throws Exception { |
| - for (PayloadFunction fn : functions) { |
| - qtest(pt("xx", fn), new int[] {2,3}); |
| - } |
| - } |
| - |
| - public void testPT5() throws Exception { |
| - for (PayloadFunction fn : functions) { |
| - SpanQuery q = pt("xx", fn); |
| - qtest(new SpanBoostQuery(q, 1000), new int[] {2,3}); |
| - } |
| - } |
| - |
| - // TODO: test the payloadnear query too! |
| - |
| - /* |
| - protected static final String[] docFields = { |
| - "w1 w2 w3 w4 w5", |
| - "w1 w3 w2 w3 zz", |
| - "w1 xx w2 yy w3", |
| - "w1 w3 xx w2 yy w3 zz" |
| - }; |
| - */ |
| - |
| - public void testAllFunctions(SpanQuery query, int[] expected) throws Exception { |
| - for (PayloadFunction fn : functions) { |
| - qtest(new PayloadScoreQuery(query, fn), expected); |
| - } |
| - } |
| - |
| - public void testSimpleTerm() throws Exception { |
| - SpanTermQuery q = new SpanTermQuery(new Term(FIELD, "w2")); |
| - testAllFunctions(q, new int[]{ 0, 1, 2, 3}); |
| - } |
| - |
| - public void testOrTerm() throws Exception { |
| - SpanOrQuery q = new SpanOrQuery( |
| - new SpanTermQuery(new Term(FIELD, "xx")), new SpanTermQuery(new Term(FIELD, "yy")) |
| - ); |
| - testAllFunctions(q, new int[]{ 2, 3 }); |
| - } |
| - |
| - public void testOrderedNearQuery() throws Exception { |
| - SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ |
| - new SpanTermQuery(new Term(FIELD, "w3")), new SpanTermQuery(new Term(FIELD, "w2")) |
| - }, 1, true); |
| - testAllFunctions(q, new int[]{ 1, 3 }); |
| - } |
| - |
| - public void testUnorderedNearQuery() throws Exception { |
| - SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ |
| - new SpanTermQuery(new Term(FIELD, "w2")), new SpanTermQuery(new Term(FIELD, "w3")) |
| - }, 1, false); |
| - testAllFunctions(q, new int[]{ 0, 1, 2, 3 }); |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java (working copy) |
| @@ -1,286 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.analysis.Analyzer; |
| -import org.apache.lucene.analysis.MockTokenizer; |
| -import org.apache.lucene.analysis.TokenFilter; |
| -import org.apache.lucene.analysis.TokenStream; |
| -import org.apache.lucene.analysis.Tokenizer; |
| -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.index.FieldInvertState; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.NoMergePolicy; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.CollectionStatistics; |
| -import org.apache.lucene.search.Explanation; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.TermStatistics; |
| -import org.apache.lucene.search.TopDocs; |
| -import org.apache.lucene.search.similarities.ClassicSimilarity; |
| -import org.apache.lucene.search.spans.SpanContainingQuery; |
| -import org.apache.lucene.search.spans.SpanNearQuery; |
| -import org.apache.lucene.search.spans.SpanOrQuery; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.English; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.junit.AfterClass; |
| -import org.junit.BeforeClass; |
| -import org.junit.Test; |
| - |
| -public class TestPayloadScoreQuery extends LuceneTestCase { |
| - |
| - private static void checkQuery(SpanQuery query, PayloadFunction function, int[] expectedDocs, float[] expectedScores) throws IOException { |
| - |
| - assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length); |
| - |
| - PayloadScoreQuery psq = new PayloadScoreQuery(query, function); |
| - TopDocs hits = searcher.search(psq, expectedDocs.length); |
| - |
| - for (int i = 0; i < hits.scoreDocs.length; i++) { |
| - if (i > expectedDocs.length - 1) |
| - fail("Unexpected hit in document " + hits.scoreDocs[i].doc); |
| - if (hits.scoreDocs[i].doc != expectedDocs[i]) |
| - fail("Unexpected hit in document " + hits.scoreDocs[i].doc); |
| - assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001); |
| - } |
| - |
| - if (hits.scoreDocs.length > expectedDocs.length) |
| - fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]); |
| - } |
| - |
| - @Test |
| - public void testTermQuery() throws IOException { |
| - |
| - SpanTermQuery q = new SpanTermQuery(new Term("field", "eighteen")); |
| - for (PayloadFunction fn |
| - : new PayloadFunction[]{ new AveragePayloadFunction(), new MaxPayloadFunction(), new MinPayloadFunction() }) { |
| - checkQuery(q, fn, new int[]{ 118, 218, 18 }, |
| - new float[] { 4.0f, 4.0f, 2.0f }); |
| - } |
| - |
| - } |
| - |
| - @Test |
| - public void testOrQuery() throws IOException { |
| - |
| - SpanOrQuery q = new SpanOrQuery(new SpanTermQuery(new Term("field", "eighteen")), |
| - new SpanTermQuery(new Term("field", "nineteen"))); |
| - for (PayloadFunction fn |
| - : new PayloadFunction[]{ new AveragePayloadFunction(), new MaxPayloadFunction(), new MinPayloadFunction() }) { |
| - checkQuery(q, fn, new int[]{ 118, 119, 218, 219, 18, 19 }, |
| - new float[] { 4.0f, 4.0f, 4.0f, 4.0f, 2.0f, 2.0f }); |
| - } |
| - |
| - } |
| - |
| - @Test |
| - public void testNearQuery() throws IOException { |
| - |
| - // 2 4 |
| - // twenty two |
| - // 2 4 4 4 |
| - // one hundred twenty two |
| - |
| - SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ |
| - new SpanTermQuery(new Term("field", "twenty")), |
| - new SpanTermQuery(new Term("field", "two")) |
| - }, 0, true); |
| - |
| - checkQuery(q, new MaxPayloadFunction(), new int[]{ 22, 122, 222 }, new float[]{ 4.0f, 4.0f, 4.0f }); |
| - checkQuery(q, new MinPayloadFunction(), new int[]{ 122, 222, 22 }, new float[]{ 4.0f, 4.0f, 2.0f }); |
| - checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222, 22 }, new float[] { 4.0f, 4.0f, 3.0f }); |
| - |
| - } |
| - |
| - @Test |
| - public void testNestedNearQuery() throws Exception { |
| - |
| - // (one OR hundred) NEAR (twenty two) ~ 1 |
| - // 2 4 4 4 |
| - // one hundred twenty two |
| - // two hundred twenty two |
| - |
| - SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{ |
| - new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), |
| - new SpanNearQuery(new SpanQuery[]{ |
| - new SpanTermQuery(new Term("field", "twenty")), |
| - new SpanTermQuery(new Term("field", "two")) |
| - }, 0, true) |
| - }, 1, true); |
| - |
| - checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 4.0f, 4.0f }); |
| - checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 4.0f, 2.0f }); |
| - checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[]{ 4.0f, 3.666666f }); |
| - |
| - } |
| - |
| - @Test |
| - public void testSpanContainingQuery() throws Exception { |
| - |
| - // twenty WITHIN ((one OR hundred) NEAR two)~2 |
| - SpanContainingQuery q = new SpanContainingQuery( |
| - new SpanNearQuery(new SpanQuery[]{ |
| - new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))), |
| - new SpanTermQuery(new Term("field", "two")) |
| - }, 2, true), |
| - new SpanTermQuery(new Term("field", "twenty")) |
| - ); |
| - |
| - checkQuery(q, new AveragePayloadFunction(), new int[] { 222, 122 }, new float[]{ 4.0f, 3.666666f }); |
| - checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 4.0f, 4.0f }); |
| - checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 4.0f, 2.0f }); |
| - |
| - } |
| - |
| - private static IndexSearcher searcher; |
| - private static IndexReader reader; |
| - private static Directory directory; |
| - private static BoostingSimilarity similarity = new BoostingSimilarity(); |
| - private static byte[] payload2 = new byte[]{2}; |
| - private static byte[] payload4 = new byte[]{4}; |
| - |
| - private static class PayloadAnalyzer extends Analyzer { |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(result, new PayloadFilter(result)); |
| - } |
| - } |
| - |
| - private static class PayloadFilter extends TokenFilter { |
| - |
| - private int numSeen = 0; |
| - private final PayloadAttribute payAtt; |
| - |
| - public PayloadFilter(TokenStream input) { |
| - super(input); |
| - payAtt = addAttribute(PayloadAttribute.class); |
| - } |
| - |
| - @Override |
| - public boolean incrementToken() throws IOException { |
| - boolean result = false; |
| - if (input.incrementToken()) { |
| - if (numSeen % 4 == 0) { |
| - payAtt.setPayload(new BytesRef(payload2)); |
| - } else { |
| - payAtt.setPayload(new BytesRef(payload4)); |
| - } |
| - numSeen++; |
| - result = true; |
| - } |
| - return result; |
| - } |
| - |
| - @Override |
| - public void reset() throws IOException { |
| - super.reset(); |
| - this.numSeen = 0; |
| - } |
| - } |
| - |
| - @BeforeClass |
| - public static void beforeClass() throws Exception { |
| - directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()) |
| - .setMergePolicy(NoMergePolicy.INSTANCE) |
| - .setSimilarity(similarity)); |
| - //writer.infoStream = System.out; |
| - for (int i = 0; i < 300; i++) { |
| - Document doc = new Document(); |
| - doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES)); |
| - String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1); |
| - doc.add(newTextField("field2", txt, Field.Store.YES)); |
| - writer.addDocument(doc); |
| - } |
| - reader = writer.getReader(); |
| - writer.close(); |
| - |
| - searcher = newSearcher(reader); |
| - searcher.setSimilarity(similarity); |
| - } |
| - |
| - @AfterClass |
| - public static void afterClass() throws Exception { |
| - searcher = null; |
| - reader.close(); |
| - reader = null; |
| - directory.close(); |
| - directory = null; |
| - } |
| - |
| - static class BoostingSimilarity extends ClassicSimilarity { |
| - |
| - @Override |
| - public float queryNorm(float sumOfSquaredWeights) { |
| - return 1.0f; |
| - } |
| - |
| - @Override |
| - public float coord(int overlap, int maxOverlap) { |
| - return 1.0f; |
| - } |
| - |
| - @Override |
| - public float scorePayload(int docId, int start, int end, BytesRef payload) { |
| - //we know it is size 4 here, so ignore the offset/length |
| - return payload.bytes[payload.offset]; |
| - } |
| - |
| - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| - //Make everything else 1 so we see the effect of the payload |
| - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| - @Override |
| - public float lengthNorm(FieldInvertState state) { |
| - return state.getBoost(); |
| - } |
| - |
| - @Override |
| - public float sloppyFreq(int distance) { |
| - return 1.0f; |
| - } |
| - |
| - @Override |
| - public float tf(float freq) { |
| - return 1.0f; |
| - } |
| - |
| - // idf used for phrase queries |
| - @Override |
| - public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) { |
| - return Explanation.match(1.0f, "Inexplicable"); |
| - } |
| - |
| - @Override |
| - public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { |
| - return Explanation.match(1.0f, "Inexplicable"); |
| - } |
| - |
| - } |
| - |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadSpans.java (working copy) |
| @@ -1,551 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| - |
| -/* |
| - * Copyright 2004 The Apache Software Foundation |
| - * |
| - * Licensed under the Apache License, Version 2.0 (the "License"); |
| - * you may not use this file except in compliance with the License. |
| - * You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.analysis.Analyzer; |
| -import org.apache.lucene.analysis.MockTokenizer; |
| -import org.apache.lucene.analysis.TokenFilter; |
| -import org.apache.lucene.analysis.TokenStream; |
| -import org.apache.lucene.analysis.Tokenizer; |
| -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.document.TextField; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.PostingsEnum; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.TermQuery; |
| -import org.apache.lucene.search.TopDocs; |
| -import org.apache.lucene.search.similarities.ClassicSimilarity; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.spans.MultiSpansWrapper; |
| -import org.apache.lucene.search.spans.SpanFirstQuery; |
| -import org.apache.lucene.search.spans.SpanNearQuery; |
| -import org.apache.lucene.search.spans.SpanNotQuery; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.search.spans.SpanWeight; |
| -import org.apache.lucene.search.spans.Spans; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.LuceneTestCase; |
| - |
| -import java.io.IOException; |
| -import java.io.StringReader; |
| -import java.nio.charset.StandardCharsets; |
| -import java.util.Collection; |
| -import java.util.HashSet; |
| -import java.util.Set; |
| - |
| -public class TestPayloadSpans extends LuceneTestCase { |
| - private IndexSearcher searcher; |
| - private Similarity similarity = new ClassicSimilarity(); |
| - protected IndexReader indexReader; |
| - private IndexReader closeIndexReader; |
| - private Directory directory; |
| - |
| - @Override |
| - public void setUp() throws Exception { |
| - super.setUp(); |
| - PayloadHelper helper = new PayloadHelper(); |
| - searcher = helper.setUp(random(), similarity, 1000); |
| - indexReader = searcher.getIndexReader(); |
| - } |
| - |
| - public void testSpanTermQuery() throws Exception { |
| - SpanTermQuery stq; |
| - Spans spans; |
| - stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 100, 1, 1, 1); |
| - |
| - stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); |
| - spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 100, 0, 0, 0); |
| - } |
| - |
| - public void testSpanFirst() throws IOException { |
| - |
| - SpanQuery match; |
| - SpanFirstQuery sfq; |
| - match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| - sfq = new SpanFirstQuery(match, 2); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS); |
| - checkSpans(spans, collector, 109, 1, 1, 1); |
| - //Test more complicated subclause |
| - SpanQuery[] clauses = new SpanQuery[2]; |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); |
| - match = new SpanNearQuery(clauses, 0, true); |
| - sfq = new SpanFirstQuery(match, 2); |
| - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1); |
| - |
| - match = new SpanNearQuery(clauses, 0, false); |
| - sfq = new SpanFirstQuery(match, 2); |
| - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1); |
| - |
| - } |
| - |
| - public void testSpanNot() throws Exception { |
| - SpanQuery[] clauses = new SpanQuery[2]; |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); |
| - SpanQuery spq = new SpanNearQuery(clauses, 5, true); |
| - SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"))); |
| - |
| - |
| - |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity)); |
| - |
| - Document doc = new Document(); |
| - doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES)); |
| - writer.addDocument(doc); |
| - IndexReader reader = writer.getReader(); |
| - writer.close(); |
| - |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), collector, 1, new int[]{2}); |
| - reader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testNestedSpans() throws Exception { |
| - SpanTermQuery stq; |
| - Spans spans; |
| - IndexSearcher searcher = getSearcher(); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - |
| - stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, SpanWeight.Postings.PAYLOADS); |
| - assertNull(spans); |
| - |
| - SpanQuery[] clauses = new SpanQuery[3]; |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); |
| - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); |
| - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); |
| - |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{3,3}); |
| - |
| - |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); |
| - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); |
| - |
| - spanNearQuery = new SpanNearQuery(clauses, 6, true); |
| - |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS); |
| - |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 1, new int[]{3}); |
| - |
| - clauses = new SpanQuery[2]; |
| - |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); |
| - |
| - spanNearQuery = new SpanNearQuery(clauses, 6, true); |
| - |
| - // xx within 6 of rr |
| - |
| - SpanQuery[] clauses2 = new SpanQuery[2]; |
| - |
| - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); |
| - clauses2[1] = spanNearQuery; |
| - |
| - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false); |
| - |
| - // yy within 6 of xx within 6 of rr |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{3,3}); |
| - closeIndexReader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testFirstClauseWithoutPayload() throws Exception { |
| - Spans spans; |
| - IndexSearcher searcher = getSearcher(); |
| - |
| - SpanQuery[] clauses = new SpanQuery[3]; |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq")); |
| - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss")); |
| - |
| - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true); |
| - |
| - SpanQuery[] clauses2 = new SpanQuery[2]; |
| - |
| - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp")); |
| - clauses2[1] = spanNearQuery; |
| - |
| - SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false); |
| - |
| - SpanQuery[] clauses3 = new SpanQuery[2]; |
| - |
| - clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); |
| - clauses3[1] = snq; |
| - |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| - |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 1, new int[]{3}); |
| - closeIndexReader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testHeavilyNestedSpanQuery() throws Exception { |
| - Spans spans; |
| - IndexSearcher searcher = getSearcher(); |
| - |
| - SpanQuery[] clauses = new SpanQuery[3]; |
| - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")); |
| - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); |
| - |
| - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true); |
| - |
| - clauses = new SpanQuery[3]; |
| - clauses[0] = spanNearQuery; |
| - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five")); |
| - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six")); |
| - |
| - SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true); |
| - |
| - SpanQuery[] clauses2 = new SpanQuery[2]; |
| - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven")); |
| - clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten")); |
| - SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false); |
| - |
| - SpanQuery[] clauses3 = new SpanQuery[3]; |
| - clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine")); |
| - clauses3[1] = spanNearQuery2; |
| - clauses3[2] = spanNearQuery3; |
| - |
| - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); |
| - |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{8, 8}); |
| - closeIndexReader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testShrinkToAfterShortestMatch() throws IOException { |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new TestPayloadAnalyzer())); |
| - |
| - Document doc = new Document(); |
| - doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k"))); |
| - writer.addDocument(doc); |
| - |
| - IndexReader reader = writer.getReader(); |
| - IndexSearcher is = newSearcher(reader); |
| - writer.close(); |
| - |
| - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); |
| - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| - SpanQuery[] sqs = { stq1, stq2 }; |
| - SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| - |
| - TopDocs topDocs = is.search(snq, 1); |
| - Set<String> payloadSet = new HashSet<>(); |
| - for (int i = 0; i < topDocs.scoreDocs.length; i++) { |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - collector.reset(); |
| - spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - } |
| - } |
| - assertEquals(2, payloadSet.size()); |
| - assertTrue(payloadSet.contains("a:Noise:10")); |
| - assertTrue(payloadSet.contains("k:Noise:11")); |
| - reader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testShrinkToAfterShortestMatch2() throws IOException { |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new TestPayloadAnalyzer())); |
| - |
| - Document doc = new Document(); |
| - doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k"))); |
| - writer.addDocument(doc); |
| - IndexReader reader = writer.getReader(); |
| - IndexSearcher is = newSearcher(reader); |
| - writer.close(); |
| - |
| - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); |
| - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| - SpanQuery[] sqs = { stq1, stq2 }; |
| - SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| - |
| - TopDocs topDocs = is.search(snq, 1); |
| - Set<String> payloadSet = new HashSet<>(); |
| - for (int i = 0; i < topDocs.scoreDocs.length; i++) { |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - collector.reset(); |
| - spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - } |
| - } |
| - assertEquals(2, payloadSet.size()); |
| - assertTrue(payloadSet.contains("a:Noise:10")); |
| - assertTrue(payloadSet.contains("k:Noise:11")); |
| - reader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testShrinkToAfterShortestMatch3() throws IOException { |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new TestPayloadAnalyzer())); |
| - |
| - Document doc = new Document(); |
| - doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"))); |
| - writer.addDocument(doc); |
| - IndexReader reader = writer.getReader(); |
| - IndexSearcher is = newSearcher(reader); |
| - writer.close(); |
| - |
| - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); |
| - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| - SpanQuery[] sqs = { stq1, stq2 }; |
| - SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| - |
| - TopDocs topDocs = is.search(snq, 1); |
| - Set<String> payloadSet = new HashSet<>(); |
| - for (int i = 0; i < topDocs.scoreDocs.length; i++) { |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - collector.reset(); |
| - spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - } |
| - } |
| - assertEquals(2, payloadSet.size()); |
| - if(VERBOSE) { |
| - for (final String payload : payloadSet) |
| - System.out.println("match:" + payload); |
| - |
| - } |
| - assertTrue(payloadSet.contains("a:Noise:10")); |
| - assertTrue(payloadSet.contains("k:Noise:11")); |
| - reader.close(); |
| - directory.close(); |
| - } |
| - |
| - public void testPayloadSpanUtil() throws Exception { |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity)); |
| - |
| - Document doc = new Document(); |
| - doc.add(newTextField(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES)); |
| - writer.addDocument(doc); |
| - |
| - IndexReader reader = writer.getReader(); |
| - writer.close(); |
| - IndexSearcher searcher = newSearcher(reader); |
| - |
| - PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext()); |
| - |
| - Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); |
| - if(VERBOSE) { |
| - System.out.println("Num payloads:" + payloads.size()); |
| - for (final byte [] bytes : payloads) { |
| - System.out.println(new String(bytes, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - reader.close(); |
| - directory.close(); |
| - } |
| - |
| - private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads, |
| - int expectedPayloadLength, int expectedFirstByte) throws IOException { |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - //each position match should have a span associated with it, since there is just one underlying term query, there should |
| - //only be one entry in the span |
| - int seen = 0; |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - collector.reset(); |
| - spans.collect(collector); |
| - |
| - Collection<byte[]> payload = collector.getPayloads(); |
| - assertEquals("payload size", expectedNumPayloads, payload.size()); |
| - for (final byte [] thePayload : payload) { |
| - assertEquals("payload length", expectedPayloadLength, thePayload.length); |
| - assertEquals("payload first byte", expectedFirstByte, thePayload[0]); |
| - } |
| - |
| - seen++; |
| - } |
| - } |
| - assertEquals("expectedNumSpans", expectedNumSpans, seen); |
| - } |
| - |
| - private IndexSearcher getSearcher() throws Exception { |
| - directory = newDirectory(); |
| - String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity)); |
| - |
| - Document doc = null; |
| - for(int i = 0; i < docs.length; i++) { |
| - doc = new Document(); |
| - String docText = docs[i]; |
| - doc.add(newTextField(PayloadHelper.FIELD, docText, Field.Store.YES)); |
| - writer.addDocument(doc); |
| - } |
| - |
| - closeIndexReader = writer.getReader(); |
| - writer.close(); |
| - |
| - IndexSearcher searcher = newSearcher(closeIndexReader); |
| - return searcher; |
| - } |
| - |
| - private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException { |
| - int cnt = 0; |
| - |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - if(VERBOSE) |
| - System.out.println("\nSpans Dump --"); |
| - collector.reset(); |
| - spans.collect(collector); |
| - |
| - Collection<byte[]> payload = collector.getPayloads(); |
| - if(VERBOSE) { |
| - System.out.println("payloads for span:" + payload.size()); |
| - for (final byte [] bytes : payload) { |
| - System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " " |
| - + new String(bytes, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - assertEquals("payload size", numPayloads[cnt], payload.size()); |
| - |
| - cnt++; |
| - } |
| - } |
| - |
| - assertEquals("expected numSpans", numSpans, cnt); |
| - } |
| - |
| - final class PayloadAnalyzer extends Analyzer { |
| - |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(result, new PayloadFilter(result)); |
| - } |
| - } |
| - |
| - final class PayloadFilter extends TokenFilter { |
| - Set<String> entities = new HashSet<>(); |
| - Set<String> nopayload = new HashSet<>(); |
| - int pos; |
| - PayloadAttribute payloadAtt; |
| - CharTermAttribute termAtt; |
| - PositionIncrementAttribute posIncrAtt; |
| - |
| - public PayloadFilter(TokenStream input) { |
| - super(input); |
| - pos = 0; |
| - entities.add("xx"); |
| - entities.add("one"); |
| - nopayload.add("nopayload"); |
| - nopayload.add("np"); |
| - termAtt = addAttribute(CharTermAttribute.class); |
| - posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| - payloadAtt = addAttribute(PayloadAttribute.class); |
| - } |
| - |
| - @Override |
| - public boolean incrementToken() throws IOException { |
| - if (input.incrementToken()) { |
| - String token = termAtt.toString(); |
| - |
| - if (!nopayload.contains(token)) { |
| - if (entities.contains(token)) { |
| - payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos )); |
| - } else { |
| - payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos )); |
| - } |
| - } |
| - pos += posIncrAtt.getPositionIncrement(); |
| - return true; |
| - } |
| - return false; |
| - } |
| - |
| - @Override |
| - public void reset() throws IOException { |
| - super.reset(); |
| - this.pos = 0; |
| - } |
| - } |
| - |
| - public final class TestPayloadAnalyzer extends Analyzer { |
| - |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(result, new PayloadFilter(result)); |
| - } |
| - } |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (working copy) |
| @@ -1,308 +0,0 @@ |
| -package org.apache.lucene.search.payloads; |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.IOException; |
| - |
| -import org.apache.lucene.analysis.Analyzer; |
| -import org.apache.lucene.analysis.MockTokenizer; |
| -import org.apache.lucene.analysis.TokenFilter; |
| -import org.apache.lucene.analysis.TokenStream; |
| -import org.apache.lucene.analysis.Tokenizer; |
| -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| -import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| -import org.apache.lucene.index.FieldInvertState; |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.RandomIndexWriter; |
| -import org.apache.lucene.index.Term; |
| -import org.apache.lucene.search.BooleanClause; |
| -import org.apache.lucene.search.BooleanQuery; |
| -import org.apache.lucene.search.CheckHits; |
| -import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.QueryUtils; |
| -import org.apache.lucene.search.ScoreDoc; |
| -import org.apache.lucene.search.TopDocs; |
| -import org.apache.lucene.search.similarities.ClassicSimilarity; |
| -import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.spans.MultiSpansWrapper; |
| -import org.apache.lucene.search.spans.SpanQuery; |
| -import org.apache.lucene.search.spans.SpanTermQuery; |
| -import org.apache.lucene.search.spans.Spans; |
| -import org.apache.lucene.store.Directory; |
| -import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.English; |
| -import org.apache.lucene.util.LuceneTestCase; |
| -import org.junit.AfterClass; |
| -import org.junit.BeforeClass; |
| - |
| - |
| -/** |
| - * |
| - * |
| - **/ |
| -public class TestPayloadTermQuery extends LuceneTestCase { |
| - private static IndexSearcher searcher; |
| - private static IndexReader reader; |
| - private static Similarity similarity = new BoostingSimilarity(); |
| - private static final byte[] payloadField = new byte[]{1}; |
| - private static final byte[] payloadMultiField1 = new byte[]{2}; |
| - private static final byte[] payloadMultiField2 = new byte[]{4}; |
| - protected static Directory directory; |
| - |
| - private static class PayloadAnalyzer extends Analyzer { |
| - |
| - private PayloadAnalyzer() { |
| - super(PER_FIELD_REUSE_STRATEGY); |
| - } |
| - |
| - @Override |
| - public TokenStreamComponents createComponents(String fieldName) { |
| - Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| - return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); |
| - } |
| - } |
| - |
| - private static class PayloadFilter extends TokenFilter { |
| - private final String fieldName; |
| - private int numSeen = 0; |
| - |
| - private final PayloadAttribute payloadAtt; |
| - |
| - public PayloadFilter(TokenStream input, String fieldName) { |
| - super(input); |
| - this.fieldName = fieldName; |
| - payloadAtt = addAttribute(PayloadAttribute.class); |
| - } |
| - |
| - @Override |
| - public boolean incrementToken() throws IOException { |
| - boolean hasNext = input.incrementToken(); |
| - if (hasNext) { |
| - if (fieldName.equals("field")) { |
| - payloadAtt.setPayload(new BytesRef(payloadField)); |
| - } else if (fieldName.equals("multiField")) { |
| - if (numSeen % 2 == 0) { |
| - payloadAtt.setPayload(new BytesRef(payloadMultiField1)); |
| - } else { |
| - payloadAtt.setPayload(new BytesRef(payloadMultiField2)); |
| - } |
| - numSeen++; |
| - } |
| - return true; |
| - } else { |
| - return false; |
| - } |
| - } |
| - |
| - @Override |
| - public void reset() throws IOException { |
| - super.reset(); |
| - this.numSeen = 0; |
| - } |
| - } |
| - |
| - @BeforeClass |
| - public static void beforeClass() throws Exception { |
| - directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()) |
| - .setSimilarity(similarity).setMergePolicy(newLogMergePolicy())); |
| - //writer.infoStream = System.out; |
| - for (int i = 0; i < 1000; i++) { |
| - Document doc = new Document(); |
| - Field noPayloadField = newTextField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES); |
| - //noPayloadField.setBoost(0); |
| - doc.add(noPayloadField); |
| - doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES)); |
| - doc.add(newTextField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES)); |
| - writer.addDocument(doc); |
| - } |
| - reader = writer.getReader(); |
| - writer.close(); |
| - |
| - searcher = newSearcher(reader); |
| - searcher.setSimilarity(similarity); |
| - } |
| - |
| - @AfterClass |
| - public static void afterClass() throws Exception { |
| - searcher = null; |
| - reader.close(); |
| - reader = null; |
| - directory.close(); |
| - directory = null; |
| - } |
| - |
| - public void test() throws IOException { |
| - SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term("field", "seventy")), |
| - new MaxPayloadFunction()); |
| - TopDocs hits = searcher.search(query, 100); |
| - assertTrue("hits is null and it shouldn't be", hits != null); |
| - assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); |
| - |
| - //they should all have the exact same score, because they all contain seventy once, and we set |
| - //all the other similarity factors to be 1 |
| - |
| - assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1); |
| - for (int i = 0; i < hits.scoreDocs.length; i++) { |
| - ScoreDoc doc = hits.scoreDocs[i]; |
| - assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); |
| - } |
| - CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); |
| - Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - /*float score = hits.score(0); |
| - for (int i =1; i < hits.length(); i++) |
| - { |
| - assertTrue("scores are not equal and they should be", score == hits.score(i)); |
| - }*/ |
| - |
| - } |
| - |
| - public void testQuery() { |
| - SpanQuery boostingFuncTermQuery = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")), |
| - new MaxPayloadFunction()); |
| - QueryUtils.check(boostingFuncTermQuery); |
| - |
| - SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")); |
| - |
| - assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery)); |
| - |
| - SpanQuery boostingFuncTermQuery2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")), |
| - new AveragePayloadFunction()); |
| - |
| - QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2); |
| - } |
| - |
| - public void testMultipleMatchesPerDoc() throws Exception { |
| - SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")), |
| - new MaxPayloadFunction()); |
| - TopDocs hits = searcher.search(query, 100); |
| - assertTrue("hits is null and it shouldn't be", hits != null); |
| - assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); |
| - |
| - //they should all have the exact same score, because they all contain seventy once, and we set |
| - //all the other similarity factors to be 1 |
| - |
| - //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); |
| - assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0); |
| - //there should be exactly 10 items that score a 4, all the rest should score a 2 |
| - //The 10 items are: 70 + i*100 where i in [0-9] |
| - int numTens = 0; |
| - for (int i = 0; i < hits.scoreDocs.length; i++) { |
| - ScoreDoc doc = hits.scoreDocs[i]; |
| - if (doc.doc % 10 == 0) { |
| - numTens++; |
| - assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0); |
| - } else { |
| - assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); |
| - } |
| - } |
| - assertTrue(numTens + " does not equal: " + 10, numTens == 10); |
| - CheckHits.checkExplanations(query, "field", searcher, true); |
| - Spans spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), query); |
| - assertTrue("spans is null and it shouldn't be", spans != null); |
| - //should be two matches per document |
| - int count = 0; |
| - //100 hits times 2 matches per hit, we should have 200 in count |
| - while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| - while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| - count++; |
| - } |
| - } |
| - assertTrue(count + " does not equal: " + 200, count == 200); |
| - } |
| - |
| - public void testNoMatch() throws Exception { |
| - SpanQuery query = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.FIELD, "junk")), |
| - new MaxPayloadFunction()); |
| - TopDocs hits = searcher.search(query, 100); |
| - assertTrue("hits is null and it shouldn't be", hits != null); |
| - assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0); |
| - |
| - } |
| - |
| - public void testNoPayload() throws Exception { |
| - SpanQuery q1 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero")), |
| - new MaxPayloadFunction()); |
| - SpanQuery q2 = new PayloadScoreQuery(new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo")), |
| - new MaxPayloadFunction()); |
| - BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST); |
| - BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT); |
| - BooleanQuery.Builder query = new BooleanQuery.Builder(); |
| - query.add(c1); |
| - query.add(c2); |
| - TopDocs hits = searcher.search(query.build(), 100); |
| - assertTrue("hits is null and it shouldn't be", hits != null); |
| - assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1); |
| - int[] results = new int[1]; |
| - results[0] = 0;//hits.scoreDocs[0].doc; |
| - CheckHits.checkHitCollector(random(), query.build(), PayloadHelper.NO_PAYLOAD_FIELD, searcher, results); |
| - } |
| - |
| - static class BoostingSimilarity extends ClassicSimilarity { |
| - |
| - @Override |
| - public float queryNorm(float sumOfSquaredWeights) { |
| - return 1; |
| - } |
| - |
| - @Override |
| - public float coord(int overlap, int maxOverlap) { |
| - return 1; |
| - } |
| - |
| - // TODO: Remove warning after API has been finalized |
| - @Override |
| - public float scorePayload(int docId, int start, int end, BytesRef payload) { |
| - //we know it is size 4 here, so ignore the offset/length |
| - return payload.bytes[payload.offset]; |
| - } |
| - |
| - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| - //Make everything else 1 so we see the effect of the payload |
| - //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| - @Override |
| - public float lengthNorm(FieldInvertState state) { |
| - return state.getBoost(); |
| - } |
| - |
| - @Override |
| - public float sloppyFreq(int distance) { |
| - return 1; |
| - } |
| - |
| - @Override |
| - public float idf(long docFreq, long docCount) { |
| - return 1; |
| - } |
| - |
| - @Override |
| - public float tf(float freq) { |
| - return freq == 0 ? 0 : 1; |
| - } |
| - } |
| - |
| - static class FullSimilarity extends ClassicSimilarity{ |
| - public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) { |
| - //we know it is size 4 here, so ignore the offset/length |
| - return payload[offset]; |
| - } |
| - } |
| - |
| -} |
| Index: lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (revision 1703369) |
| +++ lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (working copy) |
| @@ -1,52 +0,0 @@ |
| -package org.apache.lucene.search.spans; |
| - |
| -/* |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.LeafReader; |
| -import org.apache.lucene.index.LeafReaderContext; |
| -import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| -import org.apache.lucene.search.IndexSearcher; |
| - |
| -import java.io.IOException; |
| - |
| -/** |
| - * |
| - * A wrapper to perform span operations on a non-leaf reader context |
| - * <p> |
| - * NOTE: This should be used for testing purposes only |
| - * @lucene.internal |
| - */ |
| -public class MultiSpansWrapper { |
| - |
| - public static Spans wrap(IndexReader reader, SpanQuery spanQuery) throws IOException { |
| - return wrap(reader, spanQuery, SpanWeight.Postings.POSITIONS); |
| - } |
| - |
| - public static Spans wrap(IndexReader reader, SpanQuery spanQuery, SpanWeight.Postings requiredPostings) throws IOException { |
| - |
| - LeafReader lr = SlowCompositeReaderWrapper.wrap(reader); // slow, but ok for testing |
| - LeafReaderContext lrContext = lr.getContext(); |
| - IndexSearcher searcher = new IndexSearcher(lr); |
| - searcher.setQueryCache(null); |
| - |
| - SpanWeight w = spanQuery.createWeight(searcher, false); |
| - |
| - return w.getSpans(lrContext, requiredPostings); |
| - } |
| -} |
| Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java |
| =================================================================== |
| --- lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1703369) |
| +++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) |
| @@ -83,7 +83,7 @@ |
| import org.apache.lucene.search.join.ScoreMode; |
| import org.apache.lucene.search.join.ToChildBlockJoinQuery; |
| import org.apache.lucene.search.join.ToParentBlockJoinQuery; |
| -import org.apache.lucene.search.payloads.SpanPayloadCheckQuery; |
| +import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery; |
| import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; |
| import org.apache.lucene.search.spans.SpanNearQuery; |
| import org.apache.lucene.search.spans.SpanNotQuery; |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/AveragePayloadFunction.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/MaxPayloadFunction.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/MinPayloadFunction.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadFunction.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| @@ -22,7 +22,7 @@ |
| * An abstract class that defines a way for PayloadScoreQuery instances to transform |
| * the cumulative effects of payload scores for a document. |
| * |
| - * @see org.apache.lucene.search.payloads.PayloadScoreQuery for more information |
| + * @see org.apache.lucene.queries.payloads.PayloadScoreQuery for more information |
| * |
| * @lucene.experimental This class and its derivations are experimental and subject to |
| * change |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| Index: lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java (revision 1703369) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/payloads/package-info.java (working copy) |
| @@ -20,8 +20,8 @@ |
| * <p> |
| * The following Query implementations are provided: |
| * <ol> |
| - * <li>{@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by |
| + * <li>{@link org.apache.lucene.queries.payloads.PayloadScoreQuery PayloadScoreQuery} -- For all terms matched by |
| * a SpanQuery, boost the score based on the value of the payload located at those terms.</li> |
| * </ol> |
| */ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/PayloadHelper.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadExplanations.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadScoreQuery.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadSpans.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| |
| /* |
| * Copyright 2004 The Apache Software Foundation |
| @@ -16,6 +16,13 @@ |
| * limitations under the License. |
| */ |
| |
| +import java.io.IOException; |
| +import java.io.StringReader; |
| +import java.util.ArrayList; |
| +import java.util.HashSet; |
| +import java.util.List; |
| +import java.util.Set; |
| + |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.TokenFilter; |
| @@ -32,11 +39,11 @@ |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.IndexSearcher; |
| -import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.similarities.ClassicSimilarity; |
| import org.apache.lucene.search.similarities.Similarity; |
| import org.apache.lucene.search.spans.MultiSpansWrapper; |
| +import org.apache.lucene.search.spans.SpanCollector; |
| import org.apache.lucene.search.spans.SpanFirstQuery; |
| import org.apache.lucene.search.spans.SpanNearQuery; |
| import org.apache.lucene.search.spans.SpanNotQuery; |
| @@ -48,13 +55,6 @@ |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| -import java.io.IOException; |
| -import java.io.StringReader; |
| -import java.nio.charset.StandardCharsets; |
| -import java.util.Collection; |
| -import java.util.HashSet; |
| -import java.util.Set; |
| - |
| public class TestPayloadSpans extends LuceneTestCase { |
| private IndexSearcher searcher; |
| private Similarity similarity = new ClassicSimilarity(); |
| @@ -74,15 +74,15 @@ |
| SpanTermQuery stq; |
| Spans spans; |
| stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| + |
| spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 100, 1, 1, 1); |
| + checkSpans(spans, 100, 1, 1, 1); |
| |
| stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); |
| spans = MultiSpansWrapper.wrap(indexReader, stq, SpanWeight.Postings.PAYLOADS); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 100, 0, 0, 0); |
| + checkSpans(spans, 100, 0, 0, 0); |
| } |
| |
| public void testSpanFirst() throws IOException { |
| @@ -91,9 +91,8 @@ |
| SpanFirstQuery sfq; |
| match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| sfq = new SpanFirstQuery(match, 2); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| Spans spans = MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS); |
| - checkSpans(spans, collector, 109, 1, 1, 1); |
| + checkSpans(spans, 109, 1, 1, 1); |
| //Test more complicated subclause |
| SpanQuery[] clauses = new SpanQuery[2]; |
| clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); |
| @@ -100,11 +99,11 @@ |
| clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); |
| match = new SpanNearQuery(clauses, 0, true); |
| sfq = new SpanFirstQuery(match, 2); |
| - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1); |
| + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1); |
| |
| match = new SpanNearQuery(clauses, 0, false); |
| sfq = new SpanFirstQuery(match, 2); |
| - checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), collector, 100, 2, 1, 1); |
| + checkSpans(MultiSpansWrapper.wrap(indexReader, sfq, SpanWeight.Postings.PAYLOADS), 100, 2, 1, 1); |
| |
| } |
| |
| @@ -127,8 +126,7 @@ |
| IndexReader reader = writer.getReader(); |
| writer.close(); |
| |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| - checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), collector, 1, new int[]{2}); |
| + checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), 1, new int[]{2}); |
| reader.close(); |
| directory.close(); |
| } |
| @@ -137,7 +135,6 @@ |
| SpanTermQuery stq; |
| Spans spans; |
| IndexSearcher searcher = getSearcher(); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| |
| stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), stq, SpanWeight.Postings.PAYLOADS); |
| @@ -151,7 +148,7 @@ |
| |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{3,3}); |
| + checkSpans(spans, 2, new int[]{3,3}); |
| |
| |
| clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); |
| @@ -163,7 +160,7 @@ |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), spanNearQuery, SpanWeight.Postings.PAYLOADS); |
| |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 1, new int[]{3}); |
| + checkSpans(spans, 1, new int[]{3}); |
| |
| clauses = new SpanQuery[2]; |
| |
| @@ -184,7 +181,7 @@ |
| // yy within 6 of xx within 6 of rr |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{3,3}); |
| + checkSpans(spans, 2, new int[]{3,3}); |
| closeIndexReader.close(); |
| directory.close(); |
| } |
| @@ -212,12 +209,11 @@ |
| clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); |
| clauses3[1] = snq; |
| |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 1, new int[]{3}); |
| + checkSpans(spans, 1, new int[]{3}); |
| closeIndexReader.close(); |
| directory.close(); |
| } |
| @@ -252,10 +248,9 @@ |
| |
| SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); |
| |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| spans = MultiSpansWrapper.wrap(searcher.getIndexReader(), nestedSpanNearQuery, SpanWeight.Postings.PAYLOADS); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| - checkSpans(spans, collector, 2, new int[]{8, 8}); |
| + checkSpans(spans, 2, new int[]{8, 8}); |
| closeIndexReader.close(); |
| directory.close(); |
| } |
| @@ -277,7 +272,7 @@ |
| SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| SpanQuery[] sqs = { stq1, stq2 }; |
| SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| + VerifyingCollector collector = new VerifyingCollector(); |
| Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| |
| TopDocs topDocs = is.search(snq, 1); |
| @@ -287,9 +282,8 @@ |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| collector.reset(); |
| spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| + for (final BytesRef payload : collector.payloads) { |
| + payloadSet.add(Term.toString(payload)); |
| } |
| } |
| } |
| @@ -317,7 +311,7 @@ |
| SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| SpanQuery[] sqs = { stq1, stq2 }; |
| SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| + VerifyingCollector collector = new VerifyingCollector(); |
| Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| |
| TopDocs topDocs = is.search(snq, 1); |
| @@ -327,10 +321,8 @@ |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| collector.reset(); |
| spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| + for (final BytesRef payload: collector.payloads) { |
| + payloadSet.add(Term.toString(payload)); |
| } |
| } |
| } |
| @@ -358,20 +350,18 @@ |
| SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| SpanQuery[] sqs = { stq1, stq2 }; |
| SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); |
| - PayloadSpanCollector collector = new PayloadSpanCollector(); |
| Spans spans = MultiSpansWrapper.wrap(is.getIndexReader(), snq, SpanWeight.Postings.PAYLOADS); |
| |
| TopDocs topDocs = is.search(snq, 1); |
| Set<String> payloadSet = new HashSet<>(); |
| + VerifyingCollector collector = new VerifyingCollector(); |
| for (int i = 0; i < topDocs.scoreDocs.length; i++) { |
| while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| collector.reset(); |
| spans.collect(collector); |
| - Collection<byte[]> payloads = collector.getPayloads(); |
| - |
| - for (final byte [] payload : payloads) { |
| - payloadSet.add(new String(payload, StandardCharsets.UTF_8)); |
| + for (final BytesRef payload : collector.payloads) { |
| + payloadSet.add(Term.toString(payload)); |
| } |
| } |
| } |
| @@ -387,57 +377,51 @@ |
| reader.close(); |
| directory.close(); |
| } |
| - |
| - public void testPayloadSpanUtil() throws Exception { |
| - Directory directory = newDirectory(); |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| - newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity)); |
| |
| - Document doc = new Document(); |
| - doc.add(newTextField(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES)); |
| - writer.addDocument(doc); |
| - |
| - IndexReader reader = writer.getReader(); |
| - writer.close(); |
| - IndexSearcher searcher = newSearcher(reader); |
| + static class VerifyingCollector implements SpanCollector { |
| |
| - PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext()); |
| - |
| - Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); |
| - if(VERBOSE) { |
| - System.out.println("Num payloads:" + payloads.size()); |
| - for (final byte [] bytes : payloads) { |
| - System.out.println(new String(bytes, StandardCharsets.UTF_8)); |
| + List<BytesRef> payloads = new ArrayList<>(); |
| + |
| + @Override |
| + public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { |
| + if (postings.getPayload() != null) { |
| + payloads.add(BytesRef.deepCopyOf(postings.getPayload())); |
| } |
| } |
| - reader.close(); |
| - directory.close(); |
| + |
| + @Override |
| + public void reset() { |
| + payloads.clear(); |
| + } |
| + |
| + public void verify(int expectedLength, int expectedFirstByte) { |
| + for (BytesRef payload : payloads) { |
| + assertEquals("Incorrect payload length", expectedLength, payload.length); |
| + assertEquals("Incorrect first byte", expectedFirstByte, payload.bytes[0]); |
| + } |
| + } |
| } |
| |
| - private void checkSpans(Spans spans, PayloadSpanCollector collector, int expectedNumSpans, int expectedNumPayloads, |
| + private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, |
| int expectedPayloadLength, int expectedFirstByte) throws IOException { |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| //each position match should have a span associated with it, since there is just one underlying term query, there should |
| //only be one entry in the span |
| + VerifyingCollector collector = new VerifyingCollector(); |
| int seen = 0; |
| while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| collector.reset(); |
| spans.collect(collector); |
| - |
| - Collection<byte[]> payload = collector.getPayloads(); |
| - assertEquals("payload size", expectedNumPayloads, payload.size()); |
| - for (final byte [] thePayload : payload) { |
| - assertEquals("payload length", expectedPayloadLength, thePayload.length); |
| - assertEquals("payload first byte", expectedFirstByte, thePayload[0]); |
| - } |
| - |
| + collector.verify(expectedPayloadLength, expectedFirstByte); |
| + assertEquals("expectedNumPayloads", expectedNumPayloads, collector.payloads.size()); |
| seen++; |
| } |
| } |
| assertEquals("expectedNumSpans", expectedNumSpans, seen); |
| } |
| - |
| + |
| + |
| private IndexSearcher getSearcher() throws Exception { |
| directory = newDirectory(); |
| String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; |
| @@ -459,9 +443,9 @@ |
| return searcher; |
| } |
| |
| - private void checkSpans(Spans spans, PayloadSpanCollector collector, int numSpans, int[] numPayloads) throws IOException { |
| + private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { |
| int cnt = 0; |
| - |
| + VerifyingCollector collector = new VerifyingCollector(); |
| while (spans.nextDoc() != Spans.NO_MORE_DOCS) { |
| while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { |
| if(VERBOSE) |
| @@ -468,17 +452,8 @@ |
| System.out.println("\nSpans Dump --"); |
| collector.reset(); |
| spans.collect(collector); |
| + assertEquals("payload size", numPayloads[cnt], collector.payloads.size()); |
| |
| - Collection<byte[]> payload = collector.getPayloads(); |
| - if(VERBOSE) { |
| - System.out.println("payloads for span:" + payload.size()); |
| - for (final byte [] bytes : payload) { |
| - System.out.println("doc:" + spans.docID() + " s:" + spans.startPosition() + " e:" + spans.endPosition() + " " |
| - + new String(bytes, StandardCharsets.UTF_8)); |
| - } |
| - } |
| - assertEquals("payload size", numPayloads[cnt], payload.size()); |
| - |
| cnt++; |
| } |
| } |
| Index: lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java (revision 1703369) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadTermQuery.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.queries.payloads; |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java |
| =================================================================== |
| --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java (revision 1703369) |
| +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java (working copy) |
| @@ -3,8 +3,8 @@ |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.queryparser.xml.DOMUtils; |
| import org.apache.lucene.queryparser.xml.ParserException; |
| -import org.apache.lucene.search.payloads.AveragePayloadFunction; |
| -import org.apache.lucene.search.payloads.PayloadScoreQuery; |
| +import org.apache.lucene.queries.payloads.AveragePayloadFunction; |
| +import org.apache.lucene.queries.payloads.PayloadScoreQuery; |
| import org.apache.lucene.search.spans.SpanBoostQuery; |
| import org.apache.lucene.search.spans.SpanQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| Index: lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java |
| =================================================================== |
| --- lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java (revision 1703369) |
| +++ lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanCollector.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| Index: lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java |
| =================================================================== |
| --- lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java (revision 1703369) |
| +++ lucene/sandbox/src/java/org/apache/lucene/payloads/PayloadSpanUtil.java (working copy) |
| @@ -1,4 +1,4 @@ |
| -package org.apache.lucene.search.payloads; |
| +package org.apache.lucene.payloads; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| @@ -17,6 +17,12 @@ |
| * limitations under the License. |
| */ |
| |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Collection; |
| +import java.util.Iterator; |
| +import java.util.List; |
| + |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.index.LeafReaderContext; |
| @@ -36,12 +42,6 @@ |
| import org.apache.lucene.search.spans.SpanWeight; |
| import org.apache.lucene.search.spans.Spans; |
| |
| -import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Collection; |
| -import java.util.Iterator; |
| -import java.util.List; |
| - |
| /** |
| * Experimental class to get set of payloads for most standard Lucene queries. |
| * Operates like Highlighter - IndexReader should only contain doc of interest, |
| Index: lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java |
| =================================================================== |
| --- lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java (revision 0) |
| +++ lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java (working copy) |
| @@ -0,0 +1,130 @@ |
| +package org.apache.lucene.payloads; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.nio.charset.StandardCharsets; |
| +import java.util.Collection; |
| +import java.util.HashSet; |
| +import java.util.Set; |
| + |
| +import org.apache.lucene.analysis.Analyzer; |
| +import org.apache.lucene.analysis.MockTokenizer; |
| +import org.apache.lucene.analysis.TokenFilter; |
| +import org.apache.lucene.analysis.TokenStream; |
| +import org.apache.lucene.analysis.Tokenizer; |
| +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.RandomIndexWriter; |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.search.similarities.ClassicSimilarity; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestPayloadSpanUtil extends LuceneTestCase { |
| + |
| + public static final String FIELD = "f"; |
| + |
| + public void testPayloadSpanUtil() throws Exception { |
| + Directory directory = newDirectory(); |
| + RandomIndexWriter writer = new RandomIndexWriter(random(), directory, |
| + newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity())); |
| + |
| + Document doc = new Document(); |
| + doc.add(newTextField(FIELD, "xx rr yy mm pp", Field.Store.YES)); |
| + writer.addDocument(doc); |
| + |
| + IndexReader reader = writer.getReader(); |
| + writer.close(); |
| + IndexSearcher searcher = newSearcher(reader); |
| + |
| + PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext()); |
| + |
| + Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr"))); |
| + if(VERBOSE) { |
| + System.out.println("Num payloads:" + payloads.size()); |
| + for (final byte [] bytes : payloads) { |
| + System.out.println(new String(bytes, StandardCharsets.UTF_8)); |
| + } |
| + } |
| + reader.close(); |
| + directory.close(); |
| + } |
| + |
| + final class PayloadAnalyzer extends Analyzer { |
| + |
| + @Override |
| + public TokenStreamComponents createComponents(String fieldName) { |
| + Tokenizer result = new MockTokenizer(MockTokenizer.SIMPLE, true); |
| + return new TokenStreamComponents(result, new PayloadFilter(result)); |
| + } |
| + } |
| + |
| + final class PayloadFilter extends TokenFilter { |
| + Set<String> entities = new HashSet<>(); |
| + Set<String> nopayload = new HashSet<>(); |
| + int pos; |
| + PayloadAttribute payloadAtt; |
| + CharTermAttribute termAtt; |
| + PositionIncrementAttribute posIncrAtt; |
| + |
| + public PayloadFilter(TokenStream input) { |
| + super(input); |
| + pos = 0; |
| + entities.add("xx"); |
| + entities.add("one"); |
| + nopayload.add("nopayload"); |
| + nopayload.add("np"); |
| + termAtt = addAttribute(CharTermAttribute.class); |
| + posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| + payloadAtt = addAttribute(PayloadAttribute.class); |
| + } |
| + |
| + @Override |
| + public boolean incrementToken() throws IOException { |
| + if (input.incrementToken()) { |
| + String token = termAtt.toString(); |
| + |
| + if (!nopayload.contains(token)) { |
| + if (entities.contains(token)) { |
| + payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos )); |
| + } else { |
| + payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos )); |
| + } |
| + } |
| + pos += posIncrAtt.getPositionIncrement(); |
| + return true; |
| + } |
| + return false; |
| + } |
| + |
| + @Override |
| + public void reset() throws IOException { |
| + super.reset(); |
| + this.pos = 0; |
| + } |
| + } |
| + |
| +} |
| |
| Property changes on: lucene/sandbox/src/test/org/apache/lucene/payloads/TestPayloadSpanUtil.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java (revision 1703369) |
| +++ lucene/test-framework/src/java/org/apache/lucene/search/spans/MultiSpansWrapper.java (working copy) |
| @@ -17,6 +17,8 @@ |
| * limitations under the License. |
| */ |
| |
| +import java.io.IOException; |
| + |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| @@ -23,8 +25,6 @@ |
| import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.search.IndexSearcher; |
| |
| -import java.io.IOException; |
| - |
| /** |
| * |
| * A wrapper to perform span operations on a non-leaf reader context |
| Index: solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java |
| =================================================================== |
| --- solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (revision 1703369) |
| +++ solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (working copy) |
| @@ -28,7 +28,7 @@ |
| import org.apache.lucene.analysis.core.WhitespaceAnalyzer; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.payloads.SpanPayloadCheckQuery; |
| +import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.solr.SolrTestCaseJ4; |