blob: 4faf72df52cb30d993187bf41004374726a5179e [file] [log] [blame]
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Text;
using Assert = Lucene.Net.TestFramework.Assert;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using IBits = Lucene.Net.Util.IBits;
using BytesRef = Lucene.Net.Util.BytesRef;
using Directory = Lucene.Net.Store.Directory;
using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
using Document = Documents.Document;
using Field = Field;
using FieldType = FieldType;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using TestUtil = Lucene.Net.Util.TestUtil;
using TextField = TextField;
[TestFixture]
public class TestDocsAndPositions : LuceneTestCase
{
private string fieldName;
[SetUp]
public override void SetUp()
{
base.SetUp();
fieldName = "field" + Random.Next();
}
/// <summary>
/// Simple testcase for <seealso cref="DocsAndPositionsEnum"/>
/// </summary>
[Test]
public virtual void TestPositionsSimple()
{
Directory directory = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
for (int i = 0; i < 39; i++)
{
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.OmitNorms = true;
doc.Add(NewField(fieldName, "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10", customType));
writer.AddDocument(doc);
}
IndexReader reader = writer.GetReader();
writer.Dispose();
int num = AtLeast(13);
for (int i = 0; i < num; i++)
{
BytesRef bytes = new BytesRef("1");
IndexReaderContext topReaderContext = reader.Context;
foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
{
DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
Assert.IsNotNull(docsAndPosEnum);
if (atomicReaderContext.Reader.MaxDoc == 0)
{
continue;
}
int advance = docsAndPosEnum.Advance(Random.Next(atomicReaderContext.Reader.MaxDoc));
do
{
string msg = "Advanced to: " + advance + " current doc: " + docsAndPosEnum.DocID; // TODO: + " usePayloads: " + usePayload;
Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
Assert.AreEqual(0, docsAndPosEnum.NextPosition(), msg);
Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
Assert.AreEqual(10, docsAndPosEnum.NextPosition(), msg);
Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
Assert.AreEqual(20, docsAndPosEnum.NextPosition(), msg);
Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
Assert.AreEqual(30, docsAndPosEnum.NextPosition(), msg);
} while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}
}
reader.Dispose();
directory.Dispose();
}
public virtual DocsAndPositionsEnum GetDocsAndPositions(AtomicReader reader, BytesRef bytes, IBits liveDocs)
{
Terms terms = reader.GetTerms(fieldName);
if (terms != null)
{
TermsEnum te = terms.GetEnumerator();
if (te.SeekExact(bytes))
{
return te.DocsAndPositions(liveDocs, null);
}
}
return null;
}
/// <summary>
/// this test indexes random numbers within a range into a field and checks
/// their occurrences by searching for a number from that range selected at
/// random. All positions for that number are saved up front and compared to
/// the enums positions.
/// </summary>
[Test]
public virtual void TestRandomPositions()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
int numDocs = AtLeast(47);
int max = 1051;
int term = Random.Next(max);
int?[][] positionsInDoc = new int?[numDocs][];
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.OmitNorms = true;
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
List<int?> positions = new List<int?>();
StringBuilder builder = new StringBuilder();
int num = AtLeast(131);
for (int j = 0; j < num; j++)
{
int nextInt = Random.Next(max);
builder.Append(nextInt).Append(" ");
if (nextInt == term)
{
positions.Add(Convert.ToInt32(j));
}
}
if (positions.Count == 0)
{
builder.Append(term);
positions.Add(num);
}
doc.Add(NewField(fieldName, builder.ToString(), customType));
positionsInDoc[i] = positions.ToArray();
writer.AddDocument(doc);
}
IndexReader reader = writer.GetReader();
writer.Dispose();
int num_ = AtLeast(13);
for (int i = 0; i < num_; i++)
{
BytesRef bytes = new BytesRef("" + term);
IndexReaderContext topReaderContext = reader.Context;
foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
{
DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
Assert.IsNotNull(docsAndPosEnum);
int initDoc = 0;
int maxDoc = atomicReaderContext.Reader.MaxDoc;
// initially advance or do next doc
if (Random.NextBoolean())
{
initDoc = docsAndPosEnum.NextDoc();
}
else
{
initDoc = docsAndPosEnum.Advance(Random.Next(maxDoc));
}
// now run through the scorer and check if all positions are there...
do
{
int docID = docsAndPosEnum.DocID;
if (docID == DocIdSetIterator.NO_MORE_DOCS)
{
break;
}
int?[] pos = positionsInDoc[atomicReaderContext.DocBase + docID];
Assert.AreEqual(pos.Length, docsAndPosEnum.Freq);
// number of positions read should be random - don't read all of them
// allways
int howMany = Random.Next(20) == 0 ? pos.Length - Random.Next(pos.Length) : pos.Length;
for (int j = 0; j < howMany; j++)
{
Assert.AreEqual(pos[j], docsAndPosEnum.NextPosition(), "iteration: " + i + " initDoc: " + initDoc + " doc: " + docID + " base: " + atomicReaderContext.DocBase + " positions: " + pos); /* TODO: + " usePayloads: "
+ usePayload*/
}
if (Random.Next(10) == 0) // once is a while advance
{
if (docsAndPosEnum.Advance(docID + 1 + Random.Next((maxDoc - docID))) == DocIdSetIterator.NO_MORE_DOCS)
{
break;
}
}
} while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}
}
reader.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestRandomDocs()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
int numDocs = AtLeast(49);
int max = 15678;
int term = Random.Next(max);
int[] freqInDoc = new int[numDocs];
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.OmitNorms = true;
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
StringBuilder builder = new StringBuilder();
for (int j = 0; j < 199; j++)
{
int nextInt = Random.Next(max);
builder.Append(nextInt).Append(' ');
if (nextInt == term)
{
freqInDoc[i]++;
}
}
doc.Add(NewField(fieldName, builder.ToString(), customType));
writer.AddDocument(doc);
}
IndexReader reader = writer.GetReader();
writer.Dispose();
int num = AtLeast(13);
for (int i = 0; i < num; i++)
{
BytesRef bytes = new BytesRef("" + term);
IndexReaderContext topReaderContext = reader.Context;
foreach (AtomicReaderContext context in topReaderContext.Leaves)
{
int maxDoc = context.AtomicReader.MaxDoc;
DocsEnum docsEnum = TestUtil.Docs(Random, context.Reader, fieldName, bytes, null, null, DocsFlags.FREQS);
if (FindNext(freqInDoc, context.DocBase, context.DocBase + maxDoc) == int.MaxValue)
{
Assert.IsNull(docsEnum);
continue;
}
Assert.IsNotNull(docsEnum);
docsEnum.NextDoc();
for (int j = 0; j < maxDoc; j++)
{
if (freqInDoc[context.DocBase + j] != 0)
{
Assert.AreEqual(j, docsEnum.DocID);
Assert.AreEqual(docsEnum.Freq, freqInDoc[context.DocBase + j]);
if (i % 2 == 0 && Random.Next(10) == 0)
{
int next = FindNext(freqInDoc, context.DocBase + j + 1, context.DocBase + maxDoc) - context.DocBase;
int advancedTo = docsEnum.Advance(next);
if (next >= maxDoc)
{
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, advancedTo);
}
else
{
Assert.IsTrue(next >= advancedTo, "advanced to: " + advancedTo + " but should be <= " + next);
}
}
else
{
docsEnum.NextDoc();
}
}
}
Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.DocID, "DocBase: " + context.DocBase + " maxDoc: " + maxDoc + " " + docsEnum.GetType());
}
}
reader.Dispose();
dir.Dispose();
}
private static int FindNext(int[] docs, int pos, int max)
{
for (int i = pos; i < max; i++)
{
if (docs[i] != 0)
{
return i;
}
}
return int.MaxValue;
}
/// <summary>
/// tests retrieval of positions for terms that have a large number of
/// occurrences to force test of buffer refill during positions iteration.
/// </summary>
[Test]
public virtual void TestLargeNumberOfPositions()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
int howMany = 1000;
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.OmitNorms = true;
for (int i = 0; i < 39; i++)
{
Document doc = new Document();
StringBuilder builder = new StringBuilder();
for (int j = 0; j < howMany; j++)
{
if (j % 2 == 0)
{
builder.Append("even ");
}
else
{
builder.Append("odd ");
}
}
doc.Add(NewField(fieldName, builder.ToString(), customType));
writer.AddDocument(doc);
}
// now do searches
IndexReader reader = writer.GetReader();
writer.Dispose();
int num = AtLeast(13);
for (int i = 0; i < num; i++)
{
BytesRef bytes = new BytesRef("even");
IndexReaderContext topReaderContext = reader.Context;
foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
{
DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
Assert.IsNotNull(docsAndPosEnum);
int initDoc = 0;
int maxDoc = atomicReaderContext.Reader.MaxDoc;
// initially advance or do next doc
if (Random.NextBoolean())
{
initDoc = docsAndPosEnum.NextDoc();
}
else
{
initDoc = docsAndPosEnum.Advance(Random.Next(maxDoc));
}
string msg = "Iteration: " + i + " initDoc: " + initDoc; // TODO: + " payloads: " + usePayload;
Assert.AreEqual(howMany / 2, docsAndPosEnum.Freq);
for (int j = 0; j < howMany; j += 2)
{
Assert.AreEqual(j, docsAndPosEnum.NextPosition(), "position missmatch index: " + j + " with freq: " + docsAndPosEnum.Freq + " -- " + msg);
}
}
}
reader.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestDocsEnumStart()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewStringField("foo", "bar", Field.Store.NO));
writer.AddDocument(doc);
DirectoryReader reader = writer.GetReader();
AtomicReader r = GetOnlySegmentReader(reader);
DocsEnum disi = TestUtil.Docs(Random, r, "foo", new BytesRef("bar"), null, null, DocsFlags.NONE);
int docid = disi.DocID;
Assert.AreEqual(-1, docid);
Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
TermsEnum te = r.GetTerms("foo").GetEnumerator();
Assert.IsTrue(te.SeekExact(new BytesRef("bar")));
disi = TestUtil.Docs(Random, te, null, disi, DocsFlags.NONE);
docid = disi.DocID;
Assert.AreEqual(-1, docid);
Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
writer.Dispose();
r.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestDocsAndPositionsEnumStart()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Document doc = new Document();
doc.Add(NewTextField("foo", "bar", Field.Store.NO));
writer.AddDocument(doc);
DirectoryReader reader = writer.GetReader();
AtomicReader r = GetOnlySegmentReader(reader);
DocsAndPositionsEnum disi = r.GetTermPositionsEnum(new Term("foo", "bar"));
int docid = disi.DocID;
Assert.AreEqual(-1, docid);
Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
TermsEnum te = r.GetTerms("foo").GetEnumerator();
Assert.IsTrue(te.SeekExact(new BytesRef("bar")));
disi = te.DocsAndPositions(null, disi);
docid = disi.DocID;
Assert.AreEqual(-1, docid);
Assert.IsTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
writer.Dispose();
r.Dispose();
dir.Dispose();
}
}
}