blob: 95228d0af16eec7343529327c84e0badcecdd200 [file] [log] [blame]
Index: lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java
===================================================================
--- lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (revision 0)
+++ lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (working copy)
@@ -0,0 +1,94 @@
+package org.apache.lucene.queries;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class CommonTermsFieldsQueryTest extends LuceneTestCase {
+ public void testExtraFields() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ String[] docs = new String[] {
+ "one", "this is the end of the world right",
+ "two", "is this it or maybe not",
+ "three", "this is the end of the universe as we know it",
+ "this is four", "there is the famous restaurant at the end of the universe",};
+ for (int i = 0; i < docs.length; i += 2) {
+ Document doc = new Document();
+ doc.add(newStringField("id", "" + (i / 2), Field.Store.YES));
+ doc.add(newTextField("title", docs[i], Field.Store.NO));
+ doc.add(newTextField("text", docs[i+1], Field.Store.NO));
+ w.addDocument(doc);
+ }
+
+ IndexReader r = w.getReader();
+ IndexSearcher s = newSearcher(r);
+ // Fields query can still be used as a regular commonterms query
+ {
+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.SHOULD,
+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("text"));
+ query.add(new Term("text", "is"));
+ query.add(new Term("text", "this"));
+ query.add(new Term("text", "end"));
+ query.add(new Term("text", "world"));
+ query.add(new Term("text", "universe"));
+ query.add(new Term("text", "right"));
+ query.setLowFreqMinimumNumberShouldMatch(0.5f);
+ TopDocs search = s.search(query, 10);
+ assertEquals(1, search.totalHits);
+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+ }
+ // But you don't have to search the source of common-ness
+ {
+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title"));
+ query.add(new Term("text", "four"));
+ query.add(new Term("text", "this"));
+ TopDocs search = s.search(query, 10);
+ assertEquals(1, search.totalHits);
+ assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
+ }
+ // And you can search across both fields but the common terms aren't required in either
+ {
+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title", "text"));
+ query.add(new Term("text", "four"));
+ query.add(new Term("text", "this"));
+ query.add(new Term("text", "universe"));
+ TopDocs search = s.search(query, 10);
+ assertEquals(1, search.totalHits);
+ assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
+ }
+ r.close();
+ w.close();
+ dir.close();
+ }
+}
Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (revision 1564816)
+++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (working copy)
@@ -186,15 +186,15 @@
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
if (termContext == null) {
- lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
+ lowFreq.add(buildQueryForTerm(queryTerms[i], null), lowFreqOccur);
} else {
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
* (float) maxDoc))) {
highFreq
- .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
+ .add(buildQueryForTerm(queryTerms[i], termContext), highFreqOccur);
} else {
- lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
+ lowFreq.add(buildQueryForTerm(queryTerms[i], termContext), lowFreqOccur);
}
}
@@ -232,6 +232,18 @@
return query;
}
}
+
+ /**
+ * Build the query to match term.
+ * @param term the term to match
+ * @param termContext the context for that term
+ */
+ protected Query buildQueryForTerm(Term term, TermContext termContext) {
+ if (termContext == null) {
+ return new TermQuery(term);
+ }
+ return new TermQuery(term, termContext);
+ }
public void collectTermContext(IndexReader reader,
List<AtomicReaderContext> leaves, TermContext[] contextArray,
Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java
===================================================================
--- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (revision 0)
+++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (working copy)
@@ -0,0 +1,122 @@
+package org.apache.lucene.queries;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.List;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.ToStringUtils;
+
+public class CommonTermsFieldsQuery extends CommonTermsQuery {
+ private final List<String> fields;
+
+ /**
+ * Creates a new {@link CommonTermsFieldsQuery}
+ *
+ * @param highFreqOccur
+ * {@link Occur} used for high frequency terms
+ * @param lowFreqOccur
+ * {@link Occur} used for low frequency terms
+ * @param maxTermFrequency
+ * a value in [0..1) (or absolute number >=1) representing the
+ * maximum threshold of a terms document frequency to be considered a
+ * low frequency term.
+ * @param fields
+ * fields to match
+ * @throws IllegalArgumentException
+ * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
+ * highFreqOccur
+ */
+ public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
+ float maxTermFrequency, List<String> fields) {
+ this(highFreqOccur, lowFreqOccur, maxTermFrequency, fields, false);
+ }
+
+ /**
+ * Creates a new {@link CommonTermsFieldsQuery}
+ *
+ * @param highFreqOccur
+ * {@link Occur} used for high frequency terms
+ * @param lowFreqOccur
+ * {@link Occur} used for low frequency terms
+ * @param maxTermFrequency
+ * a value in [0..1) (or absolute number >=1) representing the
+ * maximum threshold of a terms document frequency to be considered a
+ * low frequency term.
+ * @param fields
+ * fields to match
+ * @param disableCoord
+ * disables {@link Similarity#coord(int,int)} in scoring for the low
+ * / high frequency sub-queries
+ * @throws IllegalArgumentException
+ * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
+ * highFreqOccur
+ */
+ public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
+ float maxTermFrequency, List<String> fields, boolean disableCoord) {
+ super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
+
+ this.fields = fields;
+ }
+
+ /**
+ * Get the fields to match.
+ */
+ public List<String> getFields() {
+ return fields;
+ }
+
+ @Override
+ protected Query buildQueryForTerm(Term term, TermContext termContext) {
+ if (fields.size() == 1) {
+ return buildQueryForSingleField(fields.get(0), term, termContext);
+ }
+ BooleanQuery query = new BooleanQuery(disableCoord);
+ for (String field: fields) {
+ query.add(buildQueryForSingleField(field, term, termContext), Occur.SHOULD);
+ }
+ return query;
+ }
+
+ private Query buildQueryForSingleField(String field, Term term, TermContext termContext) {
+ if (field.equals(term.field())) {
+ return super.buildQueryForTerm(term, termContext);
+ }
+ return new TermQuery(new Term(field, term.bytes()));
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append('[');
+ buffer.append(super.toString(field));
+ buffer.append("](for ");
+ for (int i = 0; i < fields.size(); i++) {
+ buffer.append(fields.get(i));
+ if (i != fields.size() - 1) buffer.append(", ");
+ }
+ buffer.append(')');
+ return buffer.toString();
+ }
+}