| Index: lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java |
| =================================================================== |
| --- lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (revision 0) |
| +++ lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (working copy) |
| @@ -0,0 +1,94 @@ |
| +package org.apache.lucene.queries; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.Arrays; |
| + |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.RandomIndexWriter; |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.ScoreDoc; |
| +import org.apache.lucene.search.TopDocs; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class CommonTermsFieldsQueryTest extends LuceneTestCase { |
| + public void testExtraFields() throws IOException { |
| + Directory dir = newDirectory(); |
| + RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| + String[] docs = new String[] { |
| + "one", "this is the end of the world right", |
| + "two", "is this it or maybe not", |
| + "three", "this is the end of the universe as we know it", |
| + "this is four", "there is the famous restaurant at the end of the universe",}; |
| + for (int i = 0; i < docs.length; i += 2) { |
| + Document doc = new Document(); |
| + doc.add(newStringField("id", "" + (i / 2), Field.Store.YES)); |
| + doc.add(newTextField("title", docs[i], Field.Store.NO)); |
| + doc.add(newTextField("text", docs[i+1], Field.Store.NO)); |
| + w.addDocument(doc); |
| + } |
| + |
| + IndexReader r = w.getReader(); |
| + IndexSearcher s = newSearcher(r); |
| + // Fields query can still be used as a regular commonterms query |
| + { |
| + CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.SHOULD, |
| + random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("text")); |
| + query.add(new Term("text", "is")); |
| + query.add(new Term("text", "this")); |
| + query.add(new Term("text", "end")); |
| + query.add(new Term("text", "world")); |
| + query.add(new Term("text", "universe")); |
| + query.add(new Term("text", "right")); |
| + query.setLowFreqMinimumNumberShouldMatch(0.5f); |
| + TopDocs search = s.search(query, 10); |
| + assertEquals(1, search.totalHits); |
| + assertEquals("0", r.document(search.scoreDocs[0].doc).get("id")); |
| + } |
| + // But you don't have to search the source of common-ness |
| + { |
| + CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST, |
| + random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title")); |
| + query.add(new Term("text", "four")); |
| + query.add(new Term("text", "this")); |
| + TopDocs search = s.search(query, 10); |
| + assertEquals(1, search.totalHits); |
| + assertEquals("3", r.document(search.scoreDocs[0].doc).get("id")); |
| + } |
| + // And you can search across both fields but the common terms aren't required in either |
| + { |
| + CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST, |
| + random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title", "text")); |
| + query.add(new Term("text", "four")); |
| + query.add(new Term("text", "this")); |
| + query.add(new Term("text", "universe")); |
| + TopDocs search = s.search(query, 10); |
| + assertEquals(1, search.totalHits); |
| + assertEquals("3", r.document(search.scoreDocs[0].doc).get("id")); |
| + } |
| + r.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| +} |
| Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (revision 1564816) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (working copy) |
| @@ -186,15 +186,15 @@ |
| for (int i = 0; i < queryTerms.length; i++) { |
| TermContext termContext = contextArray[i]; |
| if (termContext == null) { |
| - lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur); |
| + lowFreq.add(buildQueryForTerm(queryTerms[i], null), lowFreqOccur); |
| } else { |
| if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency) |
| || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency |
| * (float) maxDoc))) { |
| highFreq |
| - .add(new TermQuery(queryTerms[i], termContext), highFreqOccur); |
| + .add(buildQueryForTerm(queryTerms[i], termContext), highFreqOccur); |
| } else { |
| - lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur); |
| + lowFreq.add(buildQueryForTerm(queryTerms[i], termContext), lowFreqOccur); |
| } |
| } |
| |
| @@ -232,6 +232,18 @@ |
| return query; |
| } |
| } |
| + |
| + /** |
| + * Build the query to match term. |
| + * @param term the term to match |
| + * @param termContext the context for that term |
| + */ |
| + protected Query buildQueryForTerm(Term term, TermContext termContext) { |
| + if (termContext == null) { |
| + return new TermQuery(term); |
| + } |
| + return new TermQuery(term, termContext); |
| + } |
| |
| public void collectTermContext(IndexReader reader, |
| List<AtomicReaderContext> leaves, TermContext[] contextArray, |
| Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (revision 0) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (working copy) |
| @@ -0,0 +1,122 @@ |
| +package org.apache.lucene.queries; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| +import java.util.List; |
| + |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.index.TermContext; |
| +import org.apache.lucene.index.Terms; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| +import org.apache.lucene.search.similarities.Similarity; |
| +import org.apache.lucene.search.BooleanQuery; |
| +import org.apache.lucene.search.Query; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.util.ToStringUtils; |
| + |
| +public class CommonTermsFieldsQuery extends CommonTermsQuery { |
| + private final List<String> fields; |
| + |
| + /** |
| + * Creates a new {@link CommonTermsFieldsQuery} |
| + * |
| + * @param highFreqOccur |
| + * {@link Occur} used for high frequency terms |
| + * @param lowFreqOccur |
| + * {@link Occur} used for low frequency terms |
| + * @param maxTermFrequency |
| + * a value in [0..1) (or absolute number >=1) representing the |
| + * maximum threshold of a terms document frequency to be considered a |
| + * low frequency term. |
| + * @param fields |
| + * fields to match |
| + * @throws IllegalArgumentException |
| + * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or |
| + * highFreqOccur |
| + */ |
| + public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur, |
| + float maxTermFrequency, List<String> fields) { |
| + this(highFreqOccur, lowFreqOccur, maxTermFrequency, fields, false); |
| + } |
| + |
| + /** |
| + * Creates a new {@link CommonTermsFieldsQuery} |
| + * |
| + * @param highFreqOccur |
| + * {@link Occur} used for high frequency terms |
| + * @param lowFreqOccur |
| + * {@link Occur} used for low frequency terms |
| + * @param maxTermFrequency |
| + * a value in [0..1) (or absolute number >=1) representing the |
| + * maximum threshold of a terms document frequency to be considered a |
| + * low frequency term. |
| + * @param fields |
| + * fields to match |
| + * @param disableCoord |
| + * disables {@link Similarity#coord(int,int)} in scoring for the low |
| + * / high frequency sub-queries |
| + * @throws IllegalArgumentException |
| + * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or |
| + * highFreqOccur |
| + */ |
| + public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur, |
| + float maxTermFrequency, List<String> fields, boolean disableCoord) { |
| + super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord); |
| + |
| + this.fields = fields; |
| + } |
| + |
| + /** |
| + * Get the fields to match. |
| + */ |
| + public List<String> getFields() { |
| + return fields; |
| + } |
| + |
| + @Override |
| + protected Query buildQueryForTerm(Term term, TermContext termContext) { |
| + if (fields.size() == 1) { |
| + return buildQueryForSingleField(fields.get(0), term, termContext); |
| + } |
| + BooleanQuery query = new BooleanQuery(disableCoord); |
| + for (String field: fields) { |
| + query.add(buildQueryForSingleField(field, term, termContext), Occur.SHOULD); |
| + } |
| + return query; |
| + } |
| + |
| + private Query buildQueryForSingleField(String field, Term term, TermContext termContext) { |
| + if (field.equals(term.field())) { |
| + return super.buildQueryForTerm(term, termContext); |
| + } |
| + return new TermQuery(new Term(field, term.bytes())); |
| + } |
| + |
| + @Override |
| + public String toString(String field) { |
| + StringBuilder buffer = new StringBuilder(); |
| + buffer.append('['); |
| + buffer.append(super.toString(field)); |
| + buffer.append("](for "); |
| + for (int i = 0; i < fields.size(); i++) { |
| + buffer.append(fields.get(i)); |
| + if (i != fields.size() - 1) buffer.append(", "); |
| + } |
| + buffer.append(')'); |
| + return buffer.toString(); |
| + } |
| +} |