docs/attachments/LUCENE-5435/LUCENE-5435.patch - lucene-jira-archive - Git at Google

 Index: lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java
 ===================================================================
 --- lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java	(revision 0)
 +++ lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java	(working copy)
 @@ -0,0 +1,94 @@
 +package org.apache.lucene.queries;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.Arrays;
 +
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.document.Field;
 +import org.apache.lucene.index.IndexReader;
 +import org.apache.lucene.index.RandomIndexWriter;
 +import org.apache.lucene.index.Term;
 +import org.apache.lucene.search.BooleanClause.Occur;
 +import org.apache.lucene.search.IndexSearcher;
 +import org.apache.lucene.search.ScoreDoc;
 +import org.apache.lucene.search.TopDocs;
 +import org.apache.lucene.store.Directory;
 +import org.apache.lucene.util.LuceneTestCase;
 +
 +public class CommonTermsFieldsQueryTest extends LuceneTestCase {
 +  public void testExtraFields() throws IOException {
 +    Directory dir = newDirectory();
 +    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
 +    String[] docs = new String[] {
 +        "one", "this is the end of the world right",
 +        "two", "is this it or maybe not",
 +        "three", "this is the end of the universe as we know it",
 +        "this is four", "there is the famous restaurant at the end of the universe",};
 +    for (int i = 0; i < docs.length; i += 2) {
 +      Document doc = new Document();
 +      doc.add(newStringField("id", "" + (i / 2), Field.Store.YES));
 +      doc.add(newTextField("title", docs[i], Field.Store.NO));
 +      doc.add(newTextField("text", docs[i+1], Field.Store.NO));
 +      w.addDocument(doc);
 +    }
 +
 +    IndexReader r = w.getReader();
 +    IndexSearcher s = newSearcher(r);
 +    // Fields query can still be used as a regular commonterms query
 +    {
 +      CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.SHOULD,
 +          random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("text"));
 +      query.add(new Term("text", "is"));
 +      query.add(new Term("text", "this"));
 +      query.add(new Term("text", "end"));
 +      query.add(new Term("text", "world"));
 +      query.add(new Term("text", "universe"));
 +      query.add(new Term("text", "right"));
 +      query.setLowFreqMinimumNumberShouldMatch(0.5f);
 +      TopDocs search = s.search(query, 10);
 +      assertEquals(1, search.totalHits);
 +      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
 +    }
 +    // But you don't have to search the source of common-ness
 +    {
 +      CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
 +          random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title"));
 +      query.add(new Term("text", "four"));
 +      query.add(new Term("text", "this"));
 +      TopDocs search = s.search(query, 10);
 +      assertEquals(1, search.totalHits);
 +      assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
 +    }
 +    // And you can search across both fields but the common terms aren't required in either
 +    {
 +      CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
 +          random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title", "text"));
 +      query.add(new Term("text", "four"));
 +      query.add(new Term("text", "this"));
 +      query.add(new Term("text", "universe"));
 +      TopDocs search = s.search(query, 10);
 +      assertEquals(1, search.totalHits);
 +      assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
 +    }
 +    r.close();
 +    w.close();
 +    dir.close();
 +  }
 +}
 Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
 ===================================================================
 --- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java	(revision 1564816)
 +++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java	(working copy)
 @@ -186,15 +186,15 @@
      for (int i = 0; i < queryTerms.length; i++) {
        TermContext termContext = contextArray[i];
        if (termContext == null) {
 -        lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
 +        lowFreq.add(buildQueryForTerm(queryTerms[i], null), lowFreqOccur);
        } else {
          if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
              || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
                  * (float) maxDoc))) {
            highFreq
 -              .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
 +              .add(buildQueryForTerm(queryTerms[i], termContext), highFreqOccur);
          } else {
 -          lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
 +          lowFreq.add(buildQueryForTerm(queryTerms[i], termContext), lowFreqOccur);
          }
        }

 @@ -232,6 +232,18 @@
        return query;
      }
    }
 +
 +  /**
 +   * Build the query to match term.
 +   * @param term the term to match
 +   * @param termContext the context for that term
 +   */
 +  protected Query buildQueryForTerm(Term term, TermContext termContext) {
 +    if (termContext == null) {
 +      return new TermQuery(term);
 +    }
 +    return new TermQuery(term, termContext);
 +  }

    public void collectTermContext(IndexReader reader,
        List<AtomicReaderContext> leaves, TermContext[] contextArray,
 Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java
 ===================================================================
 --- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java	(revision 0)
 +++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java	(working copy)
 @@ -0,0 +1,122 @@
 +package org.apache.lucene.queries;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +import java.util.List;
 +
 +import org.apache.lucene.index.Term;
 +import org.apache.lucene.index.TermContext;
 +import org.apache.lucene.index.Terms;
 +import org.apache.lucene.search.BooleanClause.Occur;
 +import org.apache.lucene.search.similarities.Similarity;
 +import org.apache.lucene.search.BooleanQuery;
 +import org.apache.lucene.search.Query;
 +import org.apache.lucene.search.TermQuery;
 +import org.apache.lucene.util.ToStringUtils;
 +
 +public class CommonTermsFieldsQuery extends CommonTermsQuery {
 +  private final List<String> fields;
 +
 +  /**
 +   * Creates a new {@link CommonTermsFieldsQuery}
 +   *
 +   * @param highFreqOccur
 +   *          {@link Occur} used for high frequency terms
 +   * @param lowFreqOccur
 +   *          {@link Occur} used for low frequency terms
 +   * @param maxTermFrequency
 +   *          a value in [0..1) (or absolute number >=1) representing the
 +   *          maximum threshold of a terms document frequency to be considered a
 +   *          low frequency term.
 +   * @param fields
 +   *          fields to match
 +   * @throws IllegalArgumentException
 +   *           if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
 +   *           highFreqOccur
 +   */
 +  public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
 +      float maxTermFrequency, List<String> fields) {
 +    this(highFreqOccur, lowFreqOccur, maxTermFrequency, fields, false);
 +  }
 +
 +  /**
 +   * Creates a new {@link CommonTermsFieldsQuery}
 +   *
 +   * @param highFreqOccur
 +   *          {@link Occur} used for high frequency terms
 +   * @param lowFreqOccur
 +   *          {@link Occur} used for low frequency terms
 +   * @param maxTermFrequency
 +   *          a value in [0..1) (or absolute number >=1) representing the
 +   *          maximum threshold of a terms document frequency to be considered a
 +   *          low frequency term.
 +   * @param fields
 +   *          fields to match
 +   * @param disableCoord
 +   *          disables {@link Similarity#coord(int,int)} in scoring for the low
 +   *          / high frequency sub-queries
 +   * @throws IllegalArgumentException
 +   *           if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
 +   *           highFreqOccur
 +   */
 +  public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
 +      float maxTermFrequency, List<String> fields, boolean disableCoord) {
 +    super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
 +
 +    this.fields = fields;
 +  }
 +
 +  /**
 +   * Get the fields to match.
 +   */
 +  public List<String> getFields() {
 +    return fields;
 +  }
 +
 +  @Override
 +  protected Query buildQueryForTerm(Term term, TermContext termContext) {
 +    if (fields.size() == 1) {
 +      return buildQueryForSingleField(fields.get(0), term, termContext);
 +    }
 +    BooleanQuery query = new BooleanQuery(disableCoord);
 +    for (String field: fields) {
 +      query.add(buildQueryForSingleField(field, term, termContext), Occur.SHOULD);
 +    }
 +    return query;
 +  }
 +
 +  private Query buildQueryForSingleField(String field, Term term, TermContext termContext) {
 +    if (field.equals(term.field())) {
 +      return super.buildQueryForTerm(term, termContext);
 +    }
 +    return new TermQuery(new Term(field, term.bytes()));
 +  }
 +
 +  @Override
 +  public String toString(String field) {
 +    StringBuilder buffer = new StringBuilder();
 +    buffer.append('[');
 +    buffer.append(super.toString(field));
 +    buffer.append("](for ");
 +    for (int i = 0; i < fields.size(); i++) {
 +      buffer.append(fields.get(i));
 +      if (i != fields.size() - 1) buffer.append(", ");
 +    }
 +    buffer.append(')');
 +    return buffer.toString();
 +  }
 +}
	Index: lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java
	===================================================================
	--- lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (revision 0)
	+++ lucene/queries/src/test/org/apache/lucene/queries/CommonTermsFieldsQueryTest.java (working copy)
	@@ -0,0 +1,94 @@
	+package org.apache.lucene.queries;
	+
	+/*
	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	+ * contributor license agreements. See the NOTICE file distributed with
	+ * this work for additional information regarding copyright ownership.
	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	+ * (the "License"); you may not use this file except in compliance with
	+ * the License. You may obtain a copy of the License at
	+ *
	+ * http://www.apache.org/licenses/LICENSE-2.0
	+ *
	+ * Unless required by applicable law or agreed to in writing, software
	+ * distributed under the License is distributed on an "AS IS" BASIS,
	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	+ * See the License for the specific language governing permissions and
	+ * limitations under the License.
	+ */
	+
	+import java.io.IOException;
	+import java.util.Arrays;
	+
	+import org.apache.lucene.document.Document;
	+import org.apache.lucene.document.Field;
	+import org.apache.lucene.index.IndexReader;
	+import org.apache.lucene.index.RandomIndexWriter;
	+import org.apache.lucene.index.Term;
	+import org.apache.lucene.search.BooleanClause.Occur;
	+import org.apache.lucene.search.IndexSearcher;
	+import org.apache.lucene.search.ScoreDoc;
	+import org.apache.lucene.search.TopDocs;
	+import org.apache.lucene.store.Directory;
	+import org.apache.lucene.util.LuceneTestCase;
	+
	+public class CommonTermsFieldsQueryTest extends LuceneTestCase {
	+ public void testExtraFields() throws IOException {
	+ Directory dir = newDirectory();
	+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
	+ String[] docs = new String[] {
	+ "one", "this is the end of the world right",
	+ "two", "is this it or maybe not",
	+ "three", "this is the end of the universe as we know it",
	+ "this is four", "there is the famous restaurant at the end of the universe",};
	+ for (int i = 0; i < docs.length; i += 2) {
	+ Document doc = new Document();
	+ doc.add(newStringField("id", "" + (i / 2), Field.Store.YES));
	+ doc.add(newTextField("title", docs[i], Field.Store.NO));
	+ doc.add(newTextField("text", docs[i+1], Field.Store.NO));
	+ w.addDocument(doc);
	+ }
	+
	+ IndexReader r = w.getReader();
	+ IndexSearcher s = newSearcher(r);
	+ // Fields query can still be used as a regular commonterms query
	+ {
	+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.SHOULD,
	+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("text"));
	+ query.add(new Term("text", "is"));
	+ query.add(new Term("text", "this"));
	+ query.add(new Term("text", "end"));
	+ query.add(new Term("text", "world"));
	+ query.add(new Term("text", "universe"));
	+ query.add(new Term("text", "right"));
	+ query.setLowFreqMinimumNumberShouldMatch(0.5f);
	+ TopDocs search = s.search(query, 10);
	+ assertEquals(1, search.totalHits);
	+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
	+ }
	+ // But you don't have to search the source of common-ness
	+ {
	+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
	+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title"));
	+ query.add(new Term("text", "four"));
	+ query.add(new Term("text", "this"));
	+ TopDocs search = s.search(query, 10);
	+ assertEquals(1, search.totalHits);
	+ assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
	+ }
	+ // And you can search across both fields but the common terms aren't required in either
	+ {
	+ CommonTermsFieldsQuery query = new CommonTermsFieldsQuery(Occur.SHOULD, Occur.MUST,
	+ random().nextBoolean() ? 2.0f : 0.5f, Arrays.asList("title", "text"));
	+ query.add(new Term("text", "four"));
	+ query.add(new Term("text", "this"));
	+ query.add(new Term("text", "universe"));
	+ TopDocs search = s.search(query, 10);
	+ assertEquals(1, search.totalHits);
	+ assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
	+ }
	+ r.close();
	+ w.close();
	+ dir.close();
	+ }
	+}
	Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
	===================================================================
	--- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (revision 1564816)
	+++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (working copy)
	@@ -186,15 +186,15 @@
	for (int i = 0; i < queryTerms.length; i++) {
	TermContext termContext = contextArray[i];
	if (termContext == null) {
	- lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
	+ lowFreq.add(buildQueryForTerm(queryTerms[i], null), lowFreqOccur);
	} else {
	if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
	\|\| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
	* (float) maxDoc))) {
	highFreq
	- .add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
	+ .add(buildQueryForTerm(queryTerms[i], termContext), highFreqOccur);
	} else {
	- lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
	+ lowFreq.add(buildQueryForTerm(queryTerms[i], termContext), lowFreqOccur);
	}
	}

	@@ -232,6 +232,18 @@
	return query;
	}
	}
	+
	+ /**
	+ * Build the query to match term.
	+ * @param term the term to match
	+ * @param termContext the context for that term
	+ */
	+ protected Query buildQueryForTerm(Term term, TermContext termContext) {
	+ if (termContext == null) {
	+ return new TermQuery(term);
	+ }
	+ return new TermQuery(term, termContext);
	+ }

	public void collectTermContext(IndexReader reader,
	List<AtomicReaderContext> leaves, TermContext[] contextArray,
	Index: lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java
	===================================================================
	--- lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (revision 0)
	+++ lucene/queries/src/java/org/apache/lucene/queries/CommonTermsFieldsQuery.java (working copy)
	@@ -0,0 +1,122 @@
	+package org.apache.lucene.queries;
	+
	+/*
	+ * Licensed to the Apache Software Foundation (ASF) under one or more
	+ * contributor license agreements. See the NOTICE file distributed with
	+ * this work for additional information regarding copyright ownership.
	+ * The ASF licenses this file to You under the Apache License, Version 2.0
	+ * (the "License"); you may not use this file except in compliance with
	+ * the License. You may obtain a copy of the License at
	+ *
	+ * http://www.apache.org/licenses/LICENSE-2.0
	+ *
	+ * Unless required by applicable law or agreed to in writing, software
	+ * distributed under the License is distributed on an "AS IS" BASIS,
	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	+ * See the License for the specific language governing permissions and
	+ * limitations under the License.
	+ */
	+import java.util.List;
	+
	+import org.apache.lucene.index.Term;
	+import org.apache.lucene.index.TermContext;
	+import org.apache.lucene.index.Terms;
	+import org.apache.lucene.search.BooleanClause.Occur;
	+import org.apache.lucene.search.similarities.Similarity;
	+import org.apache.lucene.search.BooleanQuery;
	+import org.apache.lucene.search.Query;
	+import org.apache.lucene.search.TermQuery;
	+import org.apache.lucene.util.ToStringUtils;
	+
	+public class CommonTermsFieldsQuery extends CommonTermsQuery {
	+ private final List<String> fields;
	+
	+ /**
	+ * Creates a new {@link CommonTermsFieldsQuery}
	+ *
	+ * @param highFreqOccur
	+ * {@link Occur} used for high frequency terms
	+ * @param lowFreqOccur
	+ * {@link Occur} used for low frequency terms
	+ * @param maxTermFrequency
	+ * a value in [0..1) (or absolute number >=1) representing the
	+ * maximum threshold of a terms document frequency to be considered a
	+ * low frequency term.
	+ * @param fields
	+ * fields to match
	+ * @throws IllegalArgumentException
	+ * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
	+ * highFreqOccur
	+ */
	+ public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
	+ float maxTermFrequency, List<String> fields) {
	+ this(highFreqOccur, lowFreqOccur, maxTermFrequency, fields, false);
	+ }
	+
	+ /**
	+ * Creates a new {@link CommonTermsFieldsQuery}
	+ *
	+ * @param highFreqOccur
	+ * {@link Occur} used for high frequency terms
	+ * @param lowFreqOccur
	+ * {@link Occur} used for low frequency terms
	+ * @param maxTermFrequency
	+ * a value in [0..1) (or absolute number >=1) representing the
	+ * maximum threshold of a terms document frequency to be considered a
	+ * low frequency term.
	+ * @param fields
	+ * fields to match
	+ * @param disableCoord
	+ * disables {@link Similarity#coord(int,int)} in scoring for the low
	+ * / high frequency sub-queries
	+ * @throws IllegalArgumentException
	+ * if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
	+ * highFreqOccur
	+ */
	+ public CommonTermsFieldsQuery(Occur highFreqOccur, Occur lowFreqOccur,
	+ float maxTermFrequency, List<String> fields, boolean disableCoord) {
	+ super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
	+
	+ this.fields = fields;
	+ }
	+
	+ /**
	+ * Get the fields to match.
	+ */
	+ public List<String> getFields() {
	+ return fields;
	+ }
	+
	+ @Override
	+ protected Query buildQueryForTerm(Term term, TermContext termContext) {
	+ if (fields.size() == 1) {
	+ return buildQueryForSingleField(fields.get(0), term, termContext);
	+ }
	+ BooleanQuery query = new BooleanQuery(disableCoord);
	+ for (String field: fields) {
	+ query.add(buildQueryForSingleField(field, term, termContext), Occur.SHOULD);
	+ }
	+ return query;
	+ }
	+
	+ private Query buildQueryForSingleField(String field, Term term, TermContext termContext) {
	+ if (field.equals(term.field())) {
	+ return super.buildQueryForTerm(term, termContext);
	+ }
	+ return new TermQuery(new Term(field, term.bytes()));
	+ }
	+
	+ @Override
	+ public String toString(String field) {
	+ StringBuilder buffer = new StringBuilder();
	+ buffer.append('[');
	+ buffer.append(super.toString(field));
	+ buffer.append("](for ");
	+ for (int i = 0; i < fields.size(); i++) {
	+ buffer.append(fields.get(i));
	+ if (i != fields.size() - 1) buffer.append(", ");
	+ }
	+ buffer.append(')');
	+ return buffer.toString();
	+ }
	+}