blob: b7b434edcfcd0a42480cc8928ed67527699c062c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Search;
using NUnit.Framework;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Directory = Lucene.Net.Store.Directory;
using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory;
using BooleanQuery = Lucene.Net.Search.BooleanQuery;
using Collector = Lucene.Net.Search.Collector;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using Scorer = Lucene.Net.Search.Scorer;
using Searcher = Lucene.Net.Search.Searcher;
using Similarity = Lucene.Net.Search.Similarity;
using TermQuery = Lucene.Net.Search.TermQuery;
using Occur = Lucene.Net.Search.Occur;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using _TestUtil = Lucene.Net.Util._TestUtil;
namespace Lucene.Net.Index
{
[TestFixture]
public class TestOmitTf:LuceneTestCase
{
private class AnonymousClassCountingHitCollector:CountingHitCollector
{
public AnonymousClassCountingHitCollector(TestOmitTf enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestOmitTf enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestOmitTf enclosingInstance;
public TestOmitTf Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private Scorer scorer;
public override void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
public override void Collect(int doc)
{
//System.out.println("Q1: Doc=" + doc + " score=" + score);
float score = scorer.Score();
Assert.IsTrue(score == 1.0f);
base.Collect(doc);
}
}
private class AnonymousClassCountingHitCollector1:CountingHitCollector
{
public AnonymousClassCountingHitCollector1(TestOmitTf enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestOmitTf enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestOmitTf enclosingInstance;
public TestOmitTf Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private Scorer scorer;
public override void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
public override void Collect(int doc)
{
//System.out.println("Q2: Doc=" + doc + " score=" + score);
float score = scorer.Score();
Assert.IsTrue(score == 1.0f + doc);
base.Collect(doc);
}
}
private class AnonymousClassCountingHitCollector2:CountingHitCollector
{
public AnonymousClassCountingHitCollector2(TestOmitTf enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestOmitTf enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestOmitTf enclosingInstance;
public TestOmitTf Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private Scorer scorer;
public override void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
public override void Collect(int doc)
{
//System.out.println("Q1: Doc=" + doc + " score=" + score);
float score = scorer.Score();
Assert.IsTrue(score == 1.0f);
Assert.IsFalse(doc % 2 == 0);
base.Collect(doc);
}
}
private class AnonymousClassCountingHitCollector3:CountingHitCollector
{
public AnonymousClassCountingHitCollector3(TestOmitTf enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestOmitTf enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestOmitTf enclosingInstance;
public TestOmitTf Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private Scorer scorer;
public override void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
public override void Collect(int doc)
{
float score = scorer.Score();
//System.out.println("Q1: Doc=" + doc + " score=" + score);
Assert.IsTrue(score == 1.0f);
Assert.IsTrue(doc % 2 == 0);
base.Collect(doc);
}
}
private class AnonymousClassCountingHitCollector4:CountingHitCollector
{
public AnonymousClassCountingHitCollector4(TestOmitTf enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TestOmitTf enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestOmitTf enclosingInstance;
public TestOmitTf Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public override void Collect(int doc)
{
//System.out.println("BQ: Doc=" + doc + " score=" + score);
base.Collect(doc);
}
}
private class AnonymousIDFExplanation : Explanation.IDFExplanation
{
public override float Idf
{
get { return 1.0f; }
}
public override string Explain()
{
return "Inexplicable";
}
}
[Serializable]
public class SimpleSimilarity:Similarity
{
public override float LengthNorm(System.String field, int numTerms)
{
return 1.0f;
}
public override float QueryNorm(float sumOfSquaredWeights)
{
return 1.0f;
}
public override float Tf(float freq)
{
return freq;
}
public override float SloppyFreq(int distance)
{
return 2.0f;
}
public override float Idf(int docFreq, int numDocs)
{
return 1.0f;
}
public override float Coord(int overlap, int maxOverlap)
{
return 1.0f;
}
public override Search.Explanation.IDFExplanation IdfExplain(System.Collections.Generic.ICollection<Term> terms, Searcher searcher)
{
return new AnonymousIDFExplanation();
}
}
// Tests whether the DocumentWriter correctly enable the
// omitTermFreqAndPositions bit in the FieldInfo
public virtual void TestOmitTermFreqAndPositions()
{
Directory ram = new MockRAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
Document d = new Document();
// this field will have Tf
Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
d.Add(f1);
// this field will NOT have Tf
Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.OmitTermFreqAndPositions = true;
d.Add(f2);
writer.AddDocument(d);
writer.Optimize();
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
// keep things constant
d = new Document();
// Reverese
f1.OmitTermFreqAndPositions = true;
d.Add(f1);
f2.OmitTermFreqAndPositions = false;
d.Add(f2);
writer.AddDocument(d);
// force merge
writer.Optimize();
// flush
writer.Close();
_TestUtil.CheckIndex(ram);
SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
FieldInfos fi = reader.FieldInfos();
Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
reader.Close();
ram.Close();
}
// Tests whether merging of docs that have different
// omitTermFreqAndPositions for the same field works
[Test]
public virtual void TestMixedMerge()
{
Directory ram = new MockRAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
writer.SetMaxBufferedDocs(3);
writer.MergeFactor = 2;
Document d = new Document();
// this field will have Tf
Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
d.Add(f1);
// this field will NOT have Tf
Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
f2.OmitTermFreqAndPositions = true;
d.Add(f2);
for (int i = 0; i < 30; i++)
writer.AddDocument(d);
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
// keep things constant
d = new Document();
// Reverese
f1.OmitTermFreqAndPositions = true;
d.Add(f1);
f2.OmitTermFreqAndPositions = false;
d.Add(f2);
for (int i = 0; i < 30; i++)
writer.AddDocument(d);
// force merge
writer.Optimize();
// flush
writer.Close();
_TestUtil.CheckIndex(ram);
SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
FieldInfos fi = reader.FieldInfos();
Assert.IsTrue(fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
reader.Close();
ram.Close();
}
// Make sure first adding docs that do not omitTermFreqAndPositions for
// field X, then adding docs that do omitTermFreqAndPositions for that same
// field,
[Test]
public virtual void TestMixedRAM()
{
Directory ram = new MockRAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
writer.SetMaxBufferedDocs(10);
writer.MergeFactor = 2;
Document d = new Document();
// this field will have Tf
Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
d.Add(f1);
// this field will NOT have Tf
Field f2 = new Field("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
d.Add(f2);
for (int i = 0; i < 5; i++)
writer.AddDocument(d);
f2.OmitTermFreqAndPositions = true;
for (int i = 0; i < 20; i++)
writer.AddDocument(d);
// force merge
writer.Optimize();
// flush
writer.Close();
_TestUtil.CheckIndex(ram);
SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
FieldInfos fi = reader.FieldInfos();
Assert.IsTrue(!fi.FieldInfo("f1").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should not be set.");
Assert.IsTrue(fi.FieldInfo("f2").omitTermFreqAndPositions_ForNUnit, "OmitTermFreqAndPositions field bit should be set.");
reader.Close();
ram.Close();
}
private void AssertNoPrx(Directory dir)
{
System.String[] files = dir.ListAll();
for (int i = 0; i < files.Length; i++)
Assert.IsFalse(files[i].EndsWith(".prx"));
}
// Verifies no *.prx exists when all fields omit term freq:
[Test]
public virtual void TestNoPrxFile()
{
Directory ram = new MockRAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
writer.SetMaxBufferedDocs(3);
writer.MergeFactor = 2;
writer.UseCompoundFile = false;
Document d = new Document();
Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
f1.OmitTermFreqAndPositions = true;
d.Add(f1);
for (int i = 0; i < 30; i++)
writer.AddDocument(d);
writer.Commit();
AssertNoPrx(ram);
// force merge
writer.Optimize();
// flush
writer.Close();
AssertNoPrx(ram);
_TestUtil.CheckIndex(ram);
ram.Close();
}
// Test scores with one field with Term Freqs and one without, otherwise with equal content
[Test]
public virtual void TestBasic()
{
Directory dir = new MockRAMDirectory();
Analyzer analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
writer.MergeFactor = 2;
writer.SetMaxBufferedDocs(2);
writer.SetSimilarity(new SimpleSimilarity());
System.Text.StringBuilder sb = new System.Text.StringBuilder(265);
System.String term = "term";
for (int i = 0; i < 30; i++)
{
Document d = new Document();
sb.Append(term).Append(" ");
System.String content = sb.ToString();
Field noTf = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
noTf.OmitTermFreqAndPositions = true;
d.Add(noTf);
Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
d.Add(tf);
writer.AddDocument(d);
//System.out.println(d);
}
writer.Optimize();
// flush
writer.Close();
_TestUtil.CheckIndex(dir);
/*
* Verify the index
*/
Searcher searcher = new IndexSearcher(dir, true);
searcher.Similarity = new SimpleSimilarity();
Term a = new Term("noTf", term);
Term b = new Term("tf", term);
Term c = new Term("noTf", "notf");
Term d2 = new Term("tf", "tf");
TermQuery q1 = new TermQuery(a);
TermQuery q2 = new TermQuery(b);
TermQuery q3 = new TermQuery(c);
TermQuery q4 = new TermQuery(d2);
searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
//System.out.println(CountingHitCollector.getCount());
searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
//System.out.println(CountingHitCollector.getCount());
searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
//System.out.println(CountingHitCollector.getCount());
searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
//System.out.println(CountingHitCollector.getCount());
BooleanQuery bq = new BooleanQuery();
bq.Add(q1, Occur.MUST);
bq.Add(q4, Occur.MUST);
searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
Assert.IsTrue(15 == CountingHitCollector.GetCount());
searcher.Close();
dir.Close();
}
public class CountingHitCollector:Collector
{
internal static int count = 0;
internal static int sum = 0;
private int docBase = - 1;
internal CountingHitCollector()
{
count = 0; sum = 0;
}
public override void SetScorer(Scorer scorer)
{
}
public override void Collect(int doc)
{
count++;
sum += doc + docBase; // use it to avoid any possibility of being optimized away
}
public static int GetCount()
{
return count;
}
public static int GetSum()
{
return sum;
}
public override void SetNextReader(IndexReader reader, int docBase)
{
this.docBase = docBase;
}
public override bool AcceptsDocsOutOfOrder
{
get { return true; }
}
}
}
}