src/Lucene.Net.Tests/Search/Similarities/TestSimilarityBase.cs - lucenenet - Git at Google

 using System;
 using System.Collections.Generic;
 using Lucene.Net.Documents;
 using NUnit.Framework;
 using JCG = J2N.Collections.Generic;
 using Assert = Lucene.Net.TestFramework.Assert;

 namespace Lucene.Net.Search.Similarities
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using BytesRef = Lucene.Net.Util.BytesRef;
     using Codec = Lucene.Net.Codecs.Codec;
     using Directory = Lucene.Net.Store.Directory;
     using Document = Documents.Document;
     using Field = Field;
     using FieldInvertState = Lucene.Net.Index.FieldInvertState;
     using FieldType = FieldType;
     using IndexReader = Lucene.Net.Index.IndexReader;
     using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
     using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
     using Term = Lucene.Net.Index.Term;
     using TextField = TextField;

     /// <summary>
     /// Tests the <see cref="SimilarityBase"/>-based Similarities. Contains unit tests and
     /// integration tests for all Similarities and correctness tests for a select
     /// few.
     /// <para>this class maintains a list of
     /// <see cref="SimilarityBase"/> subclasses. Each test case performs its test on all
     /// items in the list. If a test case fails, the name of the Similarity that
     /// caused the failure is returned as part of the assertion error message.</para>
     /// <para>Unit testing is performed by constructing statistics manually and calling
     /// the <see cref="SimilarityBase.Score(BasicStats, float, float)"/> method of the
     /// Similarities. The statistics represent corner cases of corpus distributions.
     /// </para>
     /// <para>For the integration tests, a small (8-document) collection is indexed. The
     /// tests verify that for a specific query, all relevant documents are returned
     /// in the correct order. The collection consists of two poems of English poet
     /// <a href="http://en.wikipedia.org/wiki/William_blake">William Blake</a>.</para>
     /// <para>Note: the list of Similarities is maintained by hand. If a new Similarity
     /// is added to the <see cref="Lucene.Net.Search.Similarities"/> package, the
     /// list should be updated accordingly.</para>
     /// <para>
     /// In the correctness tests, the score is verified against the result of manual
     /// computation. Since it would be impossible to test all Similarities
     /// (e.g. all possible DFR combinations, all parameter values for LM), only
     /// the best performing setups in the original papers are verified.
     /// </para>
     /// </summary>
     [TestFixture]
     public class TestSimilarityBase : LuceneTestCase
     {
         private static string FIELD_BODY = "body";
         private static string FIELD_ID = "id";

         /// <summary>
         /// The tolerance range for float equality. </summary>
         private static float FLOAT_EPSILON = 1e-5f;

         /// <summary>
         /// The DFR basic models to test. </summary>
         internal static BasicModel[] BASIC_MODELS =
         {
             new BasicModelBE(), new BasicModelD(), new BasicModelG(),
             new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
             new BasicModelP()
         };

         /// <summary>
         /// The DFR aftereffects to test. </summary>
         internal static AfterEffect[] AFTER_EFFECTS =
         {
             new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
         };

         /// <summary>
         /// The DFR normalizations to test. </summary>
         internal static Normalization[] NORMALIZATIONS =
         {
             new NormalizationH1(), new NormalizationH2(), new NormalizationH3(),
             new NormalizationZ(), new Normalization.NoNormalization()
         };

         /// <summary>
         /// The distributions for IB. </summary>
         internal static Distribution[] DISTRIBUTIONS =
         {
             new DistributionLL(), new DistributionSPL()
         };

         /// <summary>
         /// Lambdas for IB. </summary>
         internal static Lambda[] LAMBDAS =
         {
             new LambdaDF(), new LambdaTTF()
         };

         private IndexSearcher searcher;
         private Directory dir;
         private IndexReader reader;

         /// <summary>
         /// The list of similarities to test. </summary>
         private IList<SimilarityBase> sims;

         [SetUp]
         public override void SetUp()
         {
             base.SetUp();

             dir = NewDirectory();
             RandomIndexWriter writer = new RandomIndexWriter(Random, dir);

             for (int i = 0; i < docs.Length; i++)
             {
                 Document d = new Document();
                 FieldType ft = new FieldType(TextField.TYPE_STORED);
                 ft.IsIndexed = false;
                 d.Add(NewField(FIELD_ID, Convert.ToString(i), ft));
                 d.Add(NewTextField(FIELD_BODY, docs[i], Field.Store.YES));
                 writer.AddDocument(d);
             }

             reader = writer.GetReader();
             searcher = NewSearcher(reader);
             writer.Dispose();

             sims = new JCG.List<SimilarityBase>();
             foreach (BasicModel basicModel in BASIC_MODELS)
             {
                 foreach (AfterEffect afterEffect in AFTER_EFFECTS)
                 {
                     foreach (Normalization normalization in NORMALIZATIONS)
                     {
                         sims.Add(new DFRSimilarity(basicModel, afterEffect, normalization));
                     }
                 }
             }
             foreach (Distribution distribution in DISTRIBUTIONS)
             {
                 foreach (Lambda lambda in LAMBDAS)
                 {
                     foreach (Normalization normalization in NORMALIZATIONS)
                     {
                         sims.Add(new IBSimilarity(distribution, lambda, normalization));
                     }
                 }
             }
             sims.Add(new LMDirichletSimilarity());
             sims.Add(new LMJelinekMercerSimilarity(0.1f));
             sims.Add(new LMJelinekMercerSimilarity(0.7f));
         }

         // ------------------------------- Unit tests --------------------------------

         /// <summary>
         /// The default number of documents in the unit tests. </summary>
         private static int NUMBER_OF_DOCUMENTS = 100;

         /// <summary>
         /// The default total number of tokens in the field in the unit tests. </summary>
         private static long NUMBER_OF_FIELD_TOKENS = 5000;

         /// <summary>
         /// The default average field length in the unit tests. </summary>
         private static float AVG_FIELD_LENGTH = 50;

         /// <summary>
         /// The default document frequency in the unit tests. </summary>
         private static int DOC_FREQ = 10;

         /// <summary>
         /// The default total number of occurrences of this term across all documents
         /// in the unit tests.
         /// </summary>
         private static long TOTAL_TERM_FREQ = 70;

         /// <summary>
         /// The default tf in the unit tests. </summary>
         private static float FREQ = 7;

         /// <summary>
         /// The default document length in the unit tests. </summary>
         private static int DOC_LEN = 40;

         /// <summary>
         /// Creates the default statistics object that the specific tests modify. </summary>
         private BasicStats CreateStats()
         {
             BasicStats stats = new BasicStats("spoof", 1);
             stats.NumberOfDocuments = NUMBER_OF_DOCUMENTS;
             stats.NumberOfFieldTokens = NUMBER_OF_FIELD_TOKENS;
             stats.AvgFieldLength = AVG_FIELD_LENGTH;
             stats.DocFreq = DOC_FREQ;
             stats.TotalTermFreq = TOTAL_TERM_FREQ;
             return stats;
         }

         private CollectionStatistics ToCollectionStats(BasicStats stats)
         {
             return new CollectionStatistics(stats.Field, stats.NumberOfDocuments, -1, stats.NumberOfFieldTokens, -1);
         }

         private TermStatistics ToTermStats(BasicStats stats)
         {
             return new TermStatistics(new BytesRef("spoofyText"), stats.DocFreq, stats.TotalTermFreq);
         }

         /// <summary>
         /// The generic test core called by all unit test methods. It calls the
         /// <seealso cref="SimilarityBase#score(BasicStats, float, float)"/> method of all
         /// Similarities in <seealso cref="#sims"/> and checks if the score is valid; i.e. it
         /// is a finite positive real number.
         /// </summary>
         private void UnitTestCore(BasicStats stats, float freq, int docLen)
         {
             foreach (SimilarityBase sim in sims)
             {
                 BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
                 float score = sim.Score(realStats, freq, docLen);
                 float explScore = sim.Explain(realStats, 1, new Explanation(freq, "freq"), docLen).Value;
                 Assert.IsFalse(float.IsInfinity(score), "Score infinite: " + sim.ToString());
                 Assert.IsFalse(float.IsNaN(score), "Score NaN: " + sim.ToString());
                 Assert.IsTrue(score >= 0, "Score negative: " + sim.ToString());
                 Assert.AreEqual(score, explScore, FLOAT_EPSILON, "score() and explain() return different values: " + sim.ToString());
             }
         }

         /// <summary>
         /// Runs the unit test with the default statistics. </summary>
         [Test]
         public virtual void TestDefault()
         {
             UnitTestCore(CreateStats(), FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>numberOfDocuments = numberOfFieldTokens</c>.
         /// </summary>
         [Test]
         public virtual void TestSparseDocuments()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfFieldTokens = stats.NumberOfDocuments;
             stats.TotalTermFreq = stats.DocFreq;
             stats.AvgFieldLength = (float)stats.NumberOfFieldTokens / stats.NumberOfDocuments;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>numberOfDocuments > numberOfFieldTokens</c>.
         /// </summary>
         [Test]
         public virtual void TestVerySparseDocuments()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfFieldTokens = stats.NumberOfDocuments * 2 / 3;
             stats.TotalTermFreq = stats.DocFreq;
             stats.AvgFieldLength = (float)stats.NumberOfFieldTokens / stats.NumberOfDocuments;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>NumberOfDocuments = 1</c>.
         /// </summary>
         [Test]
         public virtual void TestOneDocument()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfDocuments = 1;
             stats.NumberOfFieldTokens = DOC_LEN;
             stats.AvgFieldLength = DOC_LEN;
             stats.DocFreq = 1;
             stats.TotalTermFreq = (int)FREQ;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>docFreq = numberOfDocuments</c>.
         /// </summary>
         [Test]
         public virtual void TestAllDocumentsRelevant()
         {
             BasicStats stats = CreateStats();
             float mult = (0.0f + stats.NumberOfDocuments) / stats.DocFreq;
             stats.TotalTermFreq = (int)(stats.TotalTermFreq * mult);
             stats.DocFreq = stats.NumberOfDocuments;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>docFreq > numberOfDocuments / 2</c>.
         /// </summary>
         [Test]
         public virtual void TestMostDocumentsRelevant()
         {
             BasicStats stats = CreateStats();
             float mult = (0.6f * stats.NumberOfDocuments) / stats.DocFreq;
             stats.TotalTermFreq = (int)(stats.TotalTermFreq * mult);
             stats.DocFreq = (int)(stats.NumberOfDocuments * 0.6);
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>docFreq = 1</c>.
         /// </summary>
         [Test]
         public virtual void TestOnlyOneRelevantDocument()
         {
             BasicStats stats = CreateStats();
             stats.DocFreq = 1;
             stats.TotalTermFreq = (int)FREQ + 3;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>totalTermFreq = numberOfFieldTokens</c>.
         /// </summary>
         [Test]
         public virtual void TestAllTermsRelevant()
         {
             BasicStats stats = CreateStats();
             stats.TotalTermFreq = stats.NumberOfFieldTokens;
             UnitTestCore(stats, DOC_LEN, DOC_LEN);
             stats.AvgFieldLength = DOC_LEN + 10;
             UnitTestCore(stats, DOC_LEN, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>totalTermFreq > numberOfDocuments</c>.
         /// </summary>
         [Test]
         public virtual void TestMoreTermsThanDocuments()
         {
             BasicStats stats = CreateStats();
             stats.TotalTermFreq = stats.TotalTermFreq + stats.NumberOfDocuments;
             UnitTestCore(stats, 2 * FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when
         /// <c>totalTermFreq = numberOfDocuments</c>.
         /// </summary>
         [Test]
         public virtual void TestNumberOfTermsAsDocuments()
         {
             BasicStats stats = CreateStats();
             stats.TotalTermFreq = stats.NumberOfDocuments;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when <c>totalTermFreq = 1</c>.
         /// </summary>
         [Test]
         public virtual void TestOneTerm()
         {
             BasicStats stats = CreateStats();
             stats.DocFreq = 1;
             stats.TotalTermFreq = 1;
             UnitTestCore(stats, 1, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when <c>totalTermFreq = freq</c>.
         /// </summary>
         [Test]
         public virtual void TestOneRelevantDocument()
         {
             BasicStats stats = CreateStats();
             stats.DocFreq = 1;
             stats.TotalTermFreq = (int)FREQ;
             UnitTestCore(stats, FREQ, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when <c>numberOfFieldTokens = freq</c>.
         /// </summary>
         [Test]
         public virtual void TestAllTermsRelevantOnlyOneDocument()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfDocuments = 10;
             stats.NumberOfFieldTokens = 50;
             stats.AvgFieldLength = 5;
             stats.DocFreq = 1;
             stats.TotalTermFreq = 50;
             UnitTestCore(stats, 50, 50);
         }

         /// <summary>
         /// Tests correct behavior when there is only one document with a single term
         /// in the collection.
         /// </summary>
         [Test]
         public virtual void TestOnlyOneTermOneDocument()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfDocuments = 1;
             stats.NumberOfFieldTokens = 1;
             stats.AvgFieldLength = 1;
             stats.DocFreq = 1;
             stats.TotalTermFreq = 1;
             UnitTestCore(stats, 1, 1);
         }

         /// <summary>
         /// Tests correct behavior when there is only one term in the field, but
         /// more than one documents.
         /// </summary>
         [Test]
         public virtual void TestOnlyOneTerm()
         {
             BasicStats stats = CreateStats();
             stats.NumberOfFieldTokens = 1;
             stats.AvgFieldLength = 1.0f / stats.NumberOfDocuments;
             stats.DocFreq = 1;
             stats.TotalTermFreq = 1;
             UnitTestCore(stats, 1, DOC_LEN);
         }

         /// <summary>
         /// Tests correct behavior when <c>avgFieldLength = docLen</c>.
         /// </summary>
         [Test]
         public virtual void TestDocumentLengthAverage()
         {
             BasicStats stats = CreateStats();
             UnitTestCore(stats, FREQ, (int)stats.AvgFieldLength);
         }

         // ---------------------------- Correctness tests ----------------------------

         /// <summary>
         /// Correctness test for the Dirichlet LM model. </summary>
         [Test]
         public virtual void TestLMDirichlet()
         {
             float p = (FREQ + 2000.0f * (TOTAL_TERM_FREQ + 1) / (NUMBER_OF_FIELD_TOKENS + 1.0f)) / (DOC_LEN + 2000.0f);
             float a = 2000.0f / (DOC_LEN + 2000.0f);
             float gold = (float)(Math.Log(p / (a * (TOTAL_TERM_FREQ + 1) / (NUMBER_OF_FIELD_TOKENS + 1.0f))) + Math.Log(a));
             CorrectnessTestCore(new LMDirichletSimilarity(), gold);
         }

         /// <summary>
         /// Correctness test for the Jelinek-Mercer LM model. </summary>
         [Test]
         public virtual void TestLMJelinekMercer()
         {
             float p = (1 - 0.1f) * FREQ / DOC_LEN + 0.1f * (TOTAL_TERM_FREQ + 1) / (NUMBER_OF_FIELD_TOKENS + 1.0f);
             float gold = (float)(Math.Log(p / (0.1f * (TOTAL_TERM_FREQ + 1) / (NUMBER_OF_FIELD_TOKENS + 1.0f))));
             CorrectnessTestCore(new LMJelinekMercerSimilarity(0.1f), gold);
         }

         /// <summary>
         /// Correctness test for the LL IB model with DF-based lambda and
         /// no normalization.
         /// </summary>
         [Test]
         public virtual void TestLLForIB()
         {
             SimilarityBase sim = new IBSimilarity(new DistributionLL(), new LambdaDF(), new Normalization.NoNormalization());
             CorrectnessTestCore(sim, 4.178574562072754f);
         }

         /// <summary>
         /// Correctness test for the SPL IB model with TTF-based lambda and
         /// no normalization.
         /// </summary>
         [Test]
         public virtual void TestSPLForIB()
         {
             SimilarityBase sim = new IBSimilarity(new DistributionSPL(), new LambdaTTF(), new Normalization.NoNormalization());
             CorrectnessTestCore(sim, 2.2387237548828125f);
         }

         /// <summary>
         /// Correctness test for the PL2 DFR model. </summary>
         [Test]
         public virtual void TestPL2()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
             float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
             float l = 1.0f / (tfn + 1.0f); // 0.108820144666
             float lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS); // 0.7029703
             float p = (float)(tfn * SimilarityBase.Log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.Log2(Math.E) + 0.5 * SimilarityBase.Log2(2 * Math.PI * tfn)); // 21.065619
             float gold = l * p; // 2.2923636
             CorrectnessTestCore(sim, gold);
         }

         /// <summary>
         /// Correctness test for the IneB2 DFR model. </summary>
         [Test]
         public virtual void TestIneB2()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
             CorrectnessTestCore(sim, 5.747603416442871f);
         }

         /// <summary>
         /// Correctness test for the GL1 DFR model. </summary>
         [Test]
         public virtual void TestGL1()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelG(), new AfterEffectL(), new NormalizationH1());
             CorrectnessTestCore(sim, 1.6390540599822998f);
         }

         /// <summary>
         /// Correctness test for the BEB1 DFR model. </summary>
         [Test]
         public virtual void TestBEB1()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
             float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75
             float b = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866
             double f = TOTAL_TERM_FREQ + 1 + tfn;
             double n = f + NUMBER_OF_DOCUMENTS;
             double n1 = n + f - 1; // 258.5
             double m1 = n + f - tfn - 2; // 248.75
             double n2 = f; // 79.75
             double m2 = f - tfn; // 71.0
             float be = (float)(-SimilarityBase.Log2(n - 1) - SimilarityBase.Log2(Math.E) + ((m1 + 0.5f) * SimilarityBase.Log2(n1 / m1) + (n1 - m1) * SimilarityBase.Log2(n1)) - ((m2 + 0.5f) * SimilarityBase.Log2(n2 / m2) + (n2 - m2) * SimilarityBase.Log2(n2))); // 67.26544321004599 -  91.9620374903885 -  -8.924494472554715
             // 15.7720995
             float gold = b * be; // 10.588263
             CorrectnessTestCore(sim, gold);
         }

         /// <summary>
         /// Correctness test for the D DFR model (basic model only). </summary>
         [Test]
         public virtual void TestD()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization());
             double totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1;
             double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099009901
             double phi = FREQ / totalTermFreqNorm; // 0.08974358974358974
             double D = phi * SimilarityBase.Log2(phi / p) + (1 - phi) * SimilarityBase.Log2((1 - phi) / (1 - p)); // 0.17498542370019005
             float gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.Log2(1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257
             CorrectnessTestCore(sim, gold);
         }

         /// <summary>
         /// Correctness test for the In2 DFR model with no aftereffect. </summary>
         [Test]
         public virtual void TestIn2()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2());
             float tfn = (float)(FREQ * SimilarityBase.Log2(1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
             float gold = (float)(tfn * SimilarityBase.Log2((NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5))); // 26.7459577898
             CorrectnessTestCore(sim, gold);
         }

         /// <summary>
         /// Correctness test for the IFB DFR model with no normalization. </summary>
         [Test]
         public virtual void TestIFB()
         {
             SimilarityBase sim = new DFRSimilarity(new BasicModelIF(), new AfterEffectB(), new Normalization.NoNormalization());
             float B = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (FREQ + 1)); // 0.8875
             float IF = (float)(FREQ * SimilarityBase.Log2(1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5))); // 8.97759389642
             float gold = B * IF; // 7.96761458307
             CorrectnessTestCore(sim, gold);
         }

         /// <summary>
         /// The generic test core called by all correctness test methods. It calls the
         /// <see cref="SimilarityBase.Score(BasicStats, float, float)"/> method of all
         /// Similarities in <see cref="sims"/> and compares the score against the manually
         /// computed <c>gold</c>.
         /// </summary>
         private void CorrectnessTestCore(SimilarityBase sim, float gold)
         {
             BasicStats stats = CreateStats();
             BasicStats realStats = (BasicStats)sim.ComputeWeight(stats.TotalBoost, ToCollectionStats(stats), ToTermStats(stats));
             float score = sim.Score(realStats, FREQ, DOC_LEN);
             Assert.AreEqual(gold, score, FLOAT_EPSILON, sim.ToString() + " score not correct.");
         }

         // ---------------------------- Integration tests ----------------------------

         /// <summary>
         /// The "collection" for the integration tests. </summary>
         internal string[] docs = new string[]
         {
             "Tiger, tiger burning bright   In the forest of the night   What immortal hand or eye   Could frame thy fearful symmetry ?",
             "In what distant depths or skies   Burnt the fire of thine eyes ?   On what wings dare he aspire ?   What the hands the seize the fire ?",
             "And what shoulder and what art   Could twist the sinews of thy heart ?   And when thy heart began to beat What dread hand ? And what dread feet ?",
             "What the hammer? What the chain ?   In what furnace was thy brain ?   What the anvil ? And what dread grasp   Dare its deadly terrors clasp ?",
             "And when the stars threw down their spears   And water'd heaven with their tear   Did he smile his work to see ?   Did he, who made the lamb, made thee ?",
             "Tiger, tiger burning bright   In the forest of the night   What immortal hand or eye   Dare frame thy fearful symmetry ?",
             "Cruelty has a human heart   And jealousy a human face   Terror the human form divine   And Secrecy the human dress .",
             "The human dress is forg'd iron   The human form a fiery forge   The human face a furnace seal'd   The human heart its fiery gorge ."
         };

         /// <summary>
         /// Tests whether all similarities return three documents for the query word
         /// "heart".
         /// </summary>
         [Test]
         public virtual void TestHeartList()
         {
             Query q = new TermQuery(new Term(FIELD_BODY, "heart"));

             foreach (SimilarityBase sim in sims)
             {
                 searcher.Similarity = sim;
                 TopDocs topDocs = searcher.Search(q, 1000);
                 Assert.AreEqual(3, topDocs.TotalHits, "Failed: " + sim.ToString());
             }
         }

         /// <summary>
         /// Test whether all similarities return document 3 before documents 7 and 8. </summary>
         [Test]
         public virtual void TestHeartRanking()
         {
             AssumeFalse("PreFlex codec does not support the stats necessary for this test!", "Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal));

             Query q = new TermQuery(new Term(FIELD_BODY, "heart"));

             foreach (SimilarityBase sim in sims)
             {
                 searcher.Similarity = sim;
                 TopDocs topDocs = searcher.Search(q, 1000);
                 Assert.AreEqual("2", reader.Document(topDocs.ScoreDocs[0].Doc).Get(FIELD_ID), "Failed: " + sim.ToString());
             }
         }

         [TearDown]
         public override void TearDown()
         {
             reader.Dispose();
             dir.Dispose();
             base.TearDown();
         }

         // LUCENE-5221
         [Test]
         public virtual void TestDiscountOverlapsBoost()
         {
             DefaultSimilarity expected = new DefaultSimilarity();
             SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
             expected.DiscountOverlaps = false;
             actual.DiscountOverlaps = false;
             FieldInvertState state = new FieldInvertState("foo");
             state.Length = 5;
             state.NumOverlap = 2;
             state.Boost = 3;
             Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
             expected.DiscountOverlaps = true;
             actual.DiscountOverlaps = true;
             Assert.AreEqual(expected.ComputeNorm(state), actual.ComputeNorm(state));
         }
     }
 }