blob: 3ad16ed2169b62b3a113acb884e86e0e325ac310 [file] [log] [blame]
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using NUnit.Framework;
using Assert = Lucene.Net.TestFramework.Assert;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Directory = Lucene.Net.Store.Directory;
using DirectoryReader = Lucene.Net.Index.DirectoryReader;
using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
using Document = Documents.Document;
using English = Lucene.Net.Util.English;
using Field = Field;
using Fields = Lucene.Net.Index.Fields;
using FieldType = FieldType;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using IOUtils = Lucene.Net.Util.IOUtils;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
using OpenMode = Lucene.Net.Index.OpenMode;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using Term = Lucene.Net.Index.Term;
using Terms = Lucene.Net.Index.Terms;
using TermsEnum = Lucene.Net.Index.TermsEnum;
using TextField = TextField;
public class TestTermVectors : LuceneTestCase
{
private static IndexReader reader;
private static Directory directory;
/// <summary>
/// LUCENENET specific
/// Is non-static because NewIndexWriterConfig is no longer static.
/// </summary>
[OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
directory = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy()));
//writer.setNoCFSRatio(1.0);
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++)
{
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_STORED);
int mod3 = i % 3;
int mod2 = i % 2;
if (mod2 == 0 && mod3 == 0)
{
ft.StoreTermVectors = true;
ft.StoreTermVectorOffsets = true;
ft.StoreTermVectorPositions = true;
}
else if (mod2 == 0)
{
ft.StoreTermVectors = true;
ft.StoreTermVectorPositions = true;
}
else if (mod3 == 0)
{
ft.StoreTermVectors = true;
ft.StoreTermVectorOffsets = true;
}
else
{
ft.StoreTermVectors = true;
}
doc.Add(new Field("field", English.Int32ToEnglish(i), ft));
//test no term vectors too
doc.Add(new TextField("noTV", English.Int32ToEnglish(i), Field.Store.YES));
writer.AddDocument(doc);
}
reader = writer.GetReader();
writer.Dispose();
}
[OneTimeTearDown]
public override void AfterClass()
{
reader.Dispose();
directory.Dispose();
reader = null;
directory = null;
base.AfterClass();
}
// In a single doc, for the same field, mix the term
// vectors up
[Test]
public virtual void TestMixedVectrosVectors()
{
RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE));
Document doc = new Document();
FieldType ft2 = new FieldType(TextField.TYPE_STORED);
ft2.StoreTermVectors = true;
FieldType ft3 = new FieldType(TextField.TYPE_STORED);
ft3.StoreTermVectors = true;
ft3.StoreTermVectorPositions = true;
FieldType ft4 = new FieldType(TextField.TYPE_STORED);
ft4.StoreTermVectors = true;
ft4.StoreTermVectorOffsets = true;
FieldType ft5 = new FieldType(TextField.TYPE_STORED);
ft5.StoreTermVectors = true;
ft5.StoreTermVectorOffsets = true;
ft5.StoreTermVectorPositions = true;
doc.Add(NewTextField("field", "one", Field.Store.YES));
doc.Add(NewField("field", "one", ft2));
doc.Add(NewField("field", "one", ft3));
doc.Add(NewField("field", "one", ft4));
doc.Add(NewField("field", "one", ft5));
writer.AddDocument(doc);
IndexReader reader = writer.GetReader();
writer.Dispose();
IndexSearcher searcher = NewSearcher(reader);
Query query = new TermQuery(new Term("field", "one"));
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc);
Assert.IsNotNull(vectors);
Assert.AreEqual(1, vectors.Count);
Terms vector = vectors.GetTerms("field");
Assert.IsNotNull(vector);
Assert.AreEqual(1, vector.Count);
TermsEnum termsEnum = vector.GetEnumerator();
Assert.IsTrue(termsEnum.MoveNext());
Assert.AreEqual("one", termsEnum.Term.Utf8ToString());
Assert.AreEqual(5, termsEnum.TotalTermFreq);
DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
Assert.IsNotNull(dpEnum);
Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
Assert.AreEqual(5, dpEnum.Freq);
for (int i = 0; i < 5; i++)
{
Assert.AreEqual(i, dpEnum.NextPosition());
}
dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
Assert.IsNotNull(dpEnum);
Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
Assert.AreEqual(5, dpEnum.Freq);
for (int i = 0; i < 5; i++)
{
dpEnum.NextPosition();
Assert.AreEqual(4 * i, dpEnum.StartOffset);
Assert.AreEqual(4 * i + 3, dpEnum.EndOffset);
}
reader.Dispose();
}
private IndexWriter CreateWriter(Directory dir)
{
return new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2));
}
private void CreateDir(Directory dir)
{
IndexWriter writer = CreateWriter(dir);
writer.AddDocument(CreateDoc());
writer.Dispose();
}
private Document CreateDoc()
{
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.StoreTermVectors = true;
ft.StoreTermVectorOffsets = true;
ft.StoreTermVectorPositions = true;
doc.Add(NewField("c", "aaa", ft));
return doc;
}
private void VerifyIndex(Directory dir)
{
IndexReader r = DirectoryReader.Open(dir);
int numDocs = r.NumDocs;
for (int i = 0; i < numDocs; i++)
{
Assert.IsNotNull(r.GetTermVectors(i).GetTerms("c"), "term vectors should not have been null for document " + i);
}
r.Dispose();
}
[Test]
public virtual void TestFullMergeAddDocs()
{
Directory target = NewDirectory();
IndexWriter writer = CreateWriter(target);
// with maxBufferedDocs=2, this results in two segments, so that forceMerge
// actually does something.
for (int i = 0; i < 4; i++)
{
writer.AddDocument(CreateDoc());
}
writer.ForceMerge(1);
writer.Dispose();
VerifyIndex(target);
target.Dispose();
}
[Test]
public virtual void TestFullMergeAddIndexesDir()
{
Directory[] input = new Directory[] { NewDirectory(), NewDirectory() };
Directory target = NewDirectory();
foreach (Directory dir in input)
{
CreateDir(dir);
}
IndexWriter writer = CreateWriter(target);
writer.AddIndexes(input);
writer.ForceMerge(1);
writer.Dispose();
VerifyIndex(target);
IOUtils.Dispose(target, input[0], input[1]);
}
[Test]
public virtual void TestFullMergeAddIndexesReader()
{
Directory[] input = new Directory[] { NewDirectory(), NewDirectory() };
Directory target = NewDirectory();
foreach (Directory dir in input)
{
CreateDir(dir);
}
IndexWriter writer = CreateWriter(target);
foreach (Directory dir in input)
{
IndexReader r = DirectoryReader.Open(dir);
writer.AddIndexes(r);
r.Dispose();
}
writer.ForceMerge(1);
writer.Dispose();
VerifyIndex(target);
IOUtils.Dispose(target, input[0], input[1]);
}
}
}