using J2N;
using Lucene.Net.Index.Extensions;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
using Assert = Lucene.Net.TestFramework.Assert;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Index
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
using BaseDirectoryWrapper = Lucene.Net.Store.BaseDirectoryWrapper;
using BinaryDocValuesField = Lucene.Net.Documents.BinaryDocValuesField;
using BytesRef = Lucene.Net.Util.BytesRef;
using Constants = Lucene.Net.Util.Constants;
using Directory = Lucene.Net.Store.Directory;
//using IndexOptions = Lucene.Net.Index.IndexOptions;
using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
using Document = Lucene.Net.Documents.Document;
using DoubleDocValuesField = Lucene.Net.Documents.DoubleDocValuesField;
using Field = Lucene.Net.Documents.Field;
using FieldCache = Lucene.Net.Search.FieldCache;
using FieldType = Lucene.Net.Documents.FieldType;
using IBits = Lucene.Net.Util.IBits;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using Int32Field = Lucene.Net.Documents.Int32Field;
using Int64Field = Lucene.Net.Documents.Int64Field;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
using NumericDocValuesField = Lucene.Net.Documents.NumericDocValuesField;
using NumericRangeQuery = Lucene.Net.Search.NumericRangeQuery;
using PhraseQuery = Lucene.Net.Search.PhraseQuery;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using SingleDocValuesField = Lucene.Net.Documents.SingleDocValuesField;
using SortedDocValuesField = Lucene.Net.Documents.SortedDocValuesField;
using StringField = Lucene.Net.Documents.StringField;
using StringHelper = Lucene.Net.Util.StringHelper;
using TermQuery = Lucene.Net.Search.TermQuery;
using TestUtil = Lucene.Net.Util.TestUtil;
using TextField = Lucene.Net.Documents.TextField;
using TopDocs = Lucene.Net.Search.TopDocs;
Verify we can read the pre-4.0 file format, do searches
against it, and add documents to it.
// don't use 3.x codec, its unrealistic since it means
// we won't even be running the actual code, only the impostor
// Sep codec cannot yet handle the offsets we add when changing indexes!
[SuppressCodecs("Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41", "Appending", "Lucene42", "Lucene45")]
public class TestBackwardsCompatibility3x : LuceneTestCase
// Uncomment these cases & run them on an older Lucene
// version, to generate an index to test backwards
// compatibility. Then, cd to build/test/index.cfs and
// run "zip index.<VERSION> *"; cd to
// build/test/index.nocfs and run "zip
// index.<VERSION> *". Then move those 2 zip
// files to your trunk checkout and add them to the
// oldNames array.
public void testCreateCFS() throws IOException {
createIndex("index.cfs", true, false);
public void testCreateNoCFS() throws IOException {
createIndex("index.nocfs", false, false);
// These are only needed for the special upgrade test to verify
// that also single-segment indexes are correctly upgraded by IndexUpgrader.
// You don't need them to be build for non-3.1 (the test is happy with just one
// "old" segment format, version is unimportant:
public void testCreateSingleSegmentCFS() throws IOException {
createIndex("index.singlesegment.cfs", true, true);
public void testCreateSingleSegmentNoCFS() throws IOException {
createIndex("index.singlesegment.nocfs", false, true);
// LUCENENET specific to load resources for this type
internal const string CURRENT_RESOURCE_DIRECTORY = "Lucene.Net.Tests.Index.";
internal static readonly string[] oldNames = new string[] {
"30.cfs", "30.nocfs", "31.cfs", "31.nocfs", "32.cfs",
"32.nocfs", "34.cfs", "34.nocfs"
internal readonly string[] unsupportedNames = new string[] {
"19.cfs", "19.nocfs", "20.cfs", "20.nocfs", "21.cfs",
"21.nocfs", "22.cfs", "22.nocfs", "23.cfs", "23.nocfs",
"24.cfs", "24.nocfs", "29.cfs", "29.nocfs"
internal static readonly string[] oldSingleSegmentNames = new string[] {
"31.optimized.cfs", "31.optimized.nocfs"
internal static IDictionary<string, Directory> oldIndexDirs;
public override void BeforeClass()
assertFalse("test infra is broken!", OldFormatImpersonationIsActive);
List<string> names = new List<string>(oldNames.Length + oldSingleSegmentNames.Length);
oldIndexDirs = new Dictionary<string, Directory>();
foreach (string name in names)
DirectoryInfo dir = CreateTempDir(name);
using (Stream zipFileStream = this.GetType().FindAndGetManifestResourceStream("index." + name + ".zip"))
TestUtil.Unzip(zipFileStream, dir);
oldIndexDirs[name] = NewFSDirectory(dir);
public override void AfterClass()
foreach (Directory d in oldIndexDirs.Values)
oldIndexDirs = null;
/// <summary>
/// this test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate on too old indexes! </summary>
public virtual void TestUnsupportedOldIndexes()
for (int i = 0; i < unsupportedNames.Length; i++)
if (Verbose)
Console.WriteLine("TEST: index " + unsupportedNames[i]);
DirectoryInfo oldIndexDir = CreateTempDir(unsupportedNames[i]);
using (Stream dataFile = this.GetType().FindAndGetManifestResourceStream("unsupported." + unsupportedNames[i] + ".zip"))
TestUtil.Unzip(dataFile, oldIndexDir);
BaseDirectoryWrapper dir = NewFSDirectory(oldIndexDir);
// don't checkindex, these are intentionally not supported
dir.CheckIndexOnDispose = false;
IndexReader reader = null;
IndexWriter writer = null;
reader = DirectoryReader.Open(dir);
Assert.Fail(" should not pass for " + unsupportedNames[i]);
#pragma warning disable 168
catch (IndexFormatTooOldException e)
#pragma warning restore 168
// pass
if (reader != null)
reader = null;
writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
Assert.Fail("IndexWriter creation should not pass for " + unsupportedNames[i]);
catch (IndexFormatTooOldException e)
// pass
if (Verbose)
Console.WriteLine("TEST: got expected exc:");
// Make sure exc message includes a path=
Assert.IsTrue(e.Message.IndexOf("path=\"", StringComparison.Ordinal) != -1, "got exc message: " + e.Message);
// we should fail to open IW, and so it should be null when we get here.
// However, if the test fails (i.e., IW did not fail on open), we need
// to close IW. However, if merges are run, IW may throw
// IndexFormatTooOldException, and we don't want to mask the Assert.Fail()
// above, so close without waiting for merges.
if (writer != null)
writer = null;
StringBuilder bos = new StringBuilder();
CheckIndex checker = new CheckIndex(dir);
checker.InfoStream = new StringWriter(bos);
CheckIndex.Status indexStatus = checker.DoCheckIndex();
public virtual void TestFullyMergeOldIndex()
foreach (string name in oldNames)
if (Verbose)
Console.WriteLine("\nTEST: index=" + name);
Directory dir = NewDirectory(oldIndexDirs[name]);
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
public virtual void TestAddOldIndexes()
foreach (string name in oldNames)
if (Verbose)
Console.WriteLine("\nTEST: old index " + name);
Directory targetDir = NewDirectory();
IndexWriter w = new IndexWriter(targetDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
if (Verbose)
Console.WriteLine("\nTEST: done adding indices; now close");
public virtual void TestAddOldIndexesReader()
foreach (string name in oldNames)
IndexReader reader = DirectoryReader.Open(oldIndexDirs[name]);
Directory targetDir = NewDirectory();
IndexWriter w = new IndexWriter(targetDir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
public virtual void TestSearchOldIndex()
foreach (string name in oldNames)
SearchIndex(oldIndexDirs[name], name);
public virtual void TestIndexOldIndexNoAdds()
foreach (string name in oldNames)
Directory dir = NewDirectory(oldIndexDirs[name]);
ChangeIndexNoAdds(Random, dir);
public virtual void TestIndexOldIndex()
foreach (string name in oldNames)
if (Verbose)
Console.WriteLine("TEST: oldName=" + name);
Directory dir = NewDirectory(oldIndexDirs[name]);
ChangeIndexWithAdds(Random, dir, name);
/// @deprecated 3.x transition mechanism
[Obsolete("3.x transition mechanism")]
public virtual void TestDeleteOldIndex()
foreach (string name in oldNames)
if (Verbose)
Console.WriteLine("TEST: oldName=" + name);
// Try one delete:
Directory dir = NewDirectory(oldIndexDirs[name]);
IndexReader ir = DirectoryReader.Open(dir);
Assert.AreEqual(35, ir.NumDocs);
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
iw.DeleteDocuments(new Term("id", "3"));
ir = DirectoryReader.Open(dir);
Assert.AreEqual(34, ir.NumDocs);
// Delete all but 1 document:
iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
for (int i = 0; i < 35; i++)
iw.DeleteDocuments(new Term("id", "" + i));
// Verify NRT reader takes:
ir = DirectoryReader.Open(iw, true);
Assert.AreEqual(1, ir.NumDocs, "index " + name);
// Verify non-NRT reader takes:
ir = DirectoryReader.Open(dir);
Assert.AreEqual(1, ir.NumDocs, "index " + name);
private void DoTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader)
int hitCount = hits.Length;
Assert.AreEqual(expectedCount, hitCount, "wrong number of hits");
for (int i = 0; i < hitCount; i++)
public virtual void SearchIndex(Directory dir, string oldName)
//QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
//Query query = parser.parse("handle:1");
IndexReader reader = DirectoryReader.Open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
// true if this is a 4.0+ index
bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
IBits liveDocs = MultiFields.GetLiveDocs(reader);
for (int i = 0; i < 35; i++)
if (liveDocs.Get(i))
Document d = reader.Document(i);
IList<IIndexableField> fields = d.Fields;
bool isProxDoc = d.GetField("content3") == null;
if (isProxDoc)
int numFields = is40Index ? 7 : 5;
Assert.AreEqual(numFields, fields.Count);
IIndexableField f = d.GetField("id");
Assert.AreEqual("" + i, f.GetStringValue());
f = d.GetField("utf8");
Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());
f = d.GetField("autf8");
Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue());
f = d.GetField("content2");
Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue());
f = d.GetField("fie\u2C77ld");
Assert.AreEqual("field with non-ascii name", f.GetStringValue());
Fields tfvFields = reader.GetTermVectors(i);
Assert.IsNotNull(tfvFields, "i=" + i);
Terms tfv = tfvFields.GetTerms("utf8");
Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
// Only ID 7 is deleted
Assert.AreEqual(7, i);
if (is40Index)
// check docvalues fields
NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte");
BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble");
NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat");
NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt");
NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong");
NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked");
NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort");
for (int i = 0; i < 35; i++)
int id = Convert.ToInt32(reader.Document(i).Get("id"));
Assert.AreEqual(id, dvByte.Get(i));
sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
BytesRef scratch = new BytesRef();
dvBytesDerefFixed.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
dvBytesDerefVar.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
dvBytesSortedFixed.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
dvBytesSortedVar.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
dvBytesStraightFixed.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
dvBytesStraightVar.Get(i, scratch);
Assert.AreEqual(expectedRef, scratch);
Assert.AreEqual((double)id, J2N.BitConversion.Int64BitsToDouble(dvDouble.Get(i)), 0D);
Assert.AreEqual((float)id, J2N.BitConversion.Int32BitsToSingle((int)dvFloat.Get(i)), 0F);
Assert.AreEqual(id, dvInt.Get(i));
Assert.AreEqual(id, dvLong.Get(i));
Assert.AreEqual(id, dvPacked.Get(i));
Assert.AreEqual(id, dvShort.Get(i));
ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
// First document should be #21 since it's norm was
// increased:
Document d_ = searcher.IndexReader.Document(hits[0].Doc);
assertEquals("didn't get the right document first", "21", d_.Get("id"));
DoTestHits(hits, 34, searcher.IndexReader);
if (is40Index)
hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;
DoTestHits(hits, 34, searcher.IndexReader);
hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;
DoTestHits(hits, 34, searcher.IndexReader);
hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
Assert.AreEqual(34, hits.Length);
hits = searcher.Search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
Assert.AreEqual(34, hits.Length);
hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
Assert.AreEqual(34, hits.Length);
private int Compare(string name, string v)
int v0 = Convert.ToInt32(name.Substring(0, 2));
int v1 = Convert.ToInt32(v);
return v0 - v1;
public virtual void ChangeIndexWithAdds(Random random, Directory dir, string origOldName)
// open writer
IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.APPEND));
// add 10 docs
for (int i = 0; i < 10; i++)
AddDoc(writer, 35 + i);
// make sure writer sees right total -- writer seems not to know about deletes in .del?
int expected;
if (Compare(origOldName, "24") < 0)
expected = 44;
expected = 45;
Assert.AreEqual(expected, writer.NumDocs, "wrong doc count");
// make sure searching sees right # hits
IndexReader reader = DirectoryReader.Open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
Document d = searcher.IndexReader.Document(hits[0].Doc);
assertEquals("wrong first document", "21", d.Get("id"));
DoTestHits(hits, 44, searcher.IndexReader);
// fully merge
writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.APPEND));
reader = DirectoryReader.Open(dir);
searcher = new IndexSearcher(reader);
hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
Assert.AreEqual(44, hits.Length, "wrong number of hits");
d = searcher.Doc(hits[0].Doc);
DoTestHits(hits, 44, searcher.IndexReader);
assertEquals("wrong first document", "21", d.Get("id"));
public virtual void ChangeIndexNoAdds(Random random, Directory dir)
// make sure searching sees right # hits
DirectoryReader reader = DirectoryReader.Open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
Assert.AreEqual(34, hits.Length, "wrong number of hits");
Document d = searcher.Doc(hits[0].Doc);
assertEquals("wrong first document", "21", d.Get("id"));
// fully merge
IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetOpenMode(OpenMode.APPEND));
reader = DirectoryReader.Open(dir);
searcher = new IndexSearcher(reader);
hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
Assert.AreEqual(34, hits.Length, "wrong number of hits");
DoTestHits(hits, 34, searcher.IndexReader);
public virtual DirectoryInfo CreateIndex(string dirName, bool doCFS, bool fullyMerged)
// we use a real directory name that is not cleaned up, because this method is only used to create backwards indexes:
DirectoryInfo indexDir = new DirectoryInfo(Path.Combine("/tmp/4x/", dirName));
Directory dir = NewFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
mp.MaxCFSSegmentSizeMB = double.PositiveInfinity;
// TODO: remove randomness
IndexWriterConfig conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS);
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 35; i++)
AddDoc(writer, i);
Assert.AreEqual(35, writer.MaxDoc, "wrong doc count");
if (fullyMerged)
if (!fullyMerged)
// open fresh writer so we get no prx file in the added segment
mp = new LogByteSizeMergePolicy();
mp.NoCFSRatio = doCFS ? 1.0 : 0.0;
// TODO: remove randomness
conf = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMaxBufferedDocs(10).SetMergePolicy(mp).SetUseCompoundFile(doCFS);
writer = new IndexWriter(dir, conf);
writer = new IndexWriter(dir, conf.SetMergePolicy(doCFS ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES));
Term searchTerm = new Term("id", "7");
return indexDir;
private void AddDoc(IndexWriter writer, int id)
Document doc = new Document();
doc.Add(new TextField("content", "aaa", Field.Store.NO));
doc.Add(new StringField("id", Convert.ToString(id), Field.Store.YES));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.StoreTermVectors = true;
customType2.StoreTermVectorPositions = true;
customType2.StoreTermVectorOffsets = true;
doc.Add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
doc.Add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
doc.Add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.Add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
// add numeric fields, to test if flex preserves encoding
doc.Add(new Int32Field("trieInt", id, Field.Store.NO));
doc.Add(new Int64Field("trieLong", (long)id, Field.Store.NO));
// add docvalues fields
doc.Add(new NumericDocValuesField("dvByte", (sbyte)id));
sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
BytesRef @ref = new BytesRef((byte[])(Array)bytes);
doc.Add(new BinaryDocValuesField("dvBytesDerefFixed", @ref));
doc.Add(new BinaryDocValuesField("dvBytesDerefVar", @ref));
doc.Add(new SortedDocValuesField("dvBytesSortedFixed", @ref));
doc.Add(new SortedDocValuesField("dvBytesSortedVar", @ref));
doc.Add(new BinaryDocValuesField("dvBytesStraightFixed", @ref));
doc.Add(new BinaryDocValuesField("dvBytesStraightVar", @ref));
doc.Add(new DoubleDocValuesField("dvDouble", (double)id));
doc.Add(new SingleDocValuesField("dvFloat", (float)id));
doc.Add(new NumericDocValuesField("dvInt", id));
doc.Add(new NumericDocValuesField("dvLong", id));
doc.Add(new NumericDocValuesField("dvPacked", id));
doc.Add(new NumericDocValuesField("dvShort", (short)id));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.StoreTermVectors = true;
customType3.StoreTermVectorPositions = true;
customType3.StoreTermVectorOffsets = true;
customType3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
doc.Add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.StoreTermVectors = true;
customType4.StoreTermVectorPositions = false;
customType4.StoreTermVectorOffsets = true;
customType4.IndexOptions = IndexOptions.DOCS_AND_FREQS;
doc.Add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
private void AddNoProxDoc(IndexWriter writer)
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.IndexOptions = IndexOptions.DOCS_ONLY;
Field f = new Field("content3", "aaa", customType);
FieldType customType2 = new FieldType();
customType2.IsStored = true;
customType2.IndexOptions = IndexOptions.DOCS_ONLY;
f = new Field("content4", "aaa", customType2);
private int CountDocs(DocsEnum docs)
int count = 0;
while ((docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
return count;
// flex: test basics of TermsEnum api on non-flex index
public virtual void TestNextIntoWrongField()
foreach (string name in oldNames)
Directory dir = oldIndexDirs[name];
IndexReader r = DirectoryReader.Open(dir);
TermsEnum terms = MultiFields.GetFields(r).GetTerms("content").GetIterator(null);
BytesRef t = terms.Next();
// content field only has term aaa:
Assert.AreEqual("aaa", t.Utf8ToString());
BytesRef aaaTerm = new BytesRef("aaa");
// should be found exactly
Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random, terms, null, null, 0)));
// should hit end of field
Assert.AreEqual(TermsEnum.SeekStatus.END, terms.SeekCeil(new BytesRef("bbb")));
// should seek to aaa
Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, terms.SeekCeil(new BytesRef("a")));
Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random, terms, null, null, 0)));
Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(aaaTerm));
Assert.AreEqual(35, CountDocs(TestUtil.Docs(Random, terms, null, null, 0)));
/// <summary>
/// Test that we didn't forget to bump the current Constants.LUCENE_MAIN_VERSION.
/// this is important so that we can determine which version of lucene wrote the segment.
/// </summary>
public virtual void TestOldVersions()
// first create a little index with the current code and get the version
Directory currentDir = NewDirectory();
RandomIndexWriter riw = new RandomIndexWriter(
Random, currentDir);
riw.AddDocument(new Document());
DirectoryReader ir = DirectoryReader.Open(currentDir);
SegmentReader air = (SegmentReader)ir.Leaves[0].Reader;
string currentVersion = air.SegmentInfo.Info.Version;
Assert.IsNotNull(currentVersion); // only 3.0 segments can have a null version
IComparer<string> comparer = StringHelper.VersionComparer;
// now check all the old indexes, their version should be < the current version
foreach (string name in oldNames)
Directory dir = oldIndexDirs[name];
DirectoryReader r = DirectoryReader.Open(dir);
foreach (AtomicReaderContext context in r.Leaves)
air = (SegmentReader)context.Reader;
string oldVersion = air.SegmentInfo.Info.Version;
// TODO: does preflex codec actually set "3.0" here? this is safe to do I think.
// Assert.IsNotNull(oldVersion);
Assert.IsTrue(oldVersion == null || comparer.Compare(oldVersion, currentVersion) < 0, "current Constants.LUCENE_MAIN_VERSION is <= an old index: did you forget to bump it?!");
public virtual void TestNumericFields()
foreach (string name in oldNames)
Directory dir = oldIndexDirs[name];
IndexReader reader = DirectoryReader.Open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
for (int id = 10; id < 15; id++)
ScoreDoc[] hits = searcher.Search(NumericRangeQuery.NewInt32Range("trieInt", 4, Convert.ToInt32(id), Convert.ToInt32(id), true, true), 100).ScoreDocs;
Assert.AreEqual(1, hits.Length, "wrong number of hits");
Document d = searcher.Doc(hits[0].Doc);
Assert.AreEqual(Convert.ToString(id), d.Get("id"));
hits = searcher.Search(NumericRangeQuery.NewInt64Range("trieLong", 4, Convert.ToInt64(id), Convert.ToInt64(id), true, true), 100).ScoreDocs;
Assert.AreEqual(1, hits.Length, "wrong number of hits");
d = searcher.Doc(hits[0].Doc);
Assert.AreEqual(Convert.ToString(id), d.Get("id"));
// check that also lower-precision fields are ok
ScoreDoc[] hits_ = searcher.Search(NumericRangeQuery.NewInt32Range("trieInt", 4, int.MinValue, int.MaxValue, false, false), 100).ScoreDocs;
Assert.AreEqual(34, hits_.Length, "wrong number of hits");
hits_ = searcher.Search(NumericRangeQuery.NewInt64Range("trieLong", 4, long.MinValue, long.MaxValue, false, false), 100).ScoreDocs;
Assert.AreEqual(34, hits_.Length, "wrong number of hits");
// check decoding into field cache
FieldCache.Int32s fci = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(searcher.IndexReader), "trieInt", false);
int maxDoc = searcher.IndexReader.MaxDoc;
for (int doc = 0; doc < maxDoc; doc++)
int val = fci.Get(doc);
Assert.IsTrue(val >= 0 && val < 35, "value in id bounds");
FieldCache.Int64s fcl = FieldCache.DEFAULT.GetInt64s(SlowCompositeReaderWrapper.Wrap(searcher.IndexReader), "trieLong", false);
for (int doc = 0; doc < maxDoc; doc++)
long val = fcl.Get(doc);
Assert.IsTrue(val >= 0L && val < 35L, "value in id bounds");
private int CheckAllSegmentsUpgraded(Directory dir)
SegmentInfos infos = new SegmentInfos();
if (Verbose)
Console.WriteLine("checkAllSegmentsUpgraded: " + infos);
foreach (SegmentCommitInfo si in infos.Segments)
Assert.AreEqual(Constants.LUCENE_MAIN_VERSION, si.Info.Version);
return infos.Count;
private int GetNumberOfSegments(Directory dir)
SegmentInfos infos = new SegmentInfos();
return infos.Count;
public virtual void TestUpgradeOldIndex()
List<string> names = new List<string>(oldNames.Length + oldSingleSegmentNames.Length);
foreach (string name in names)
if (Verbose)
Console.WriteLine("testUpgradeOldIndex: index=" + name);
Directory dir = NewDirectory(oldIndexDirs[name]);
(new IndexUpgrader(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false)).Upgrade();
public virtual void TestUpgradeOldSingleSegmentIndexWithAdditions()
foreach (string name in oldSingleSegmentNames)
if (Verbose)
Console.WriteLine("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name);
Directory dir = NewDirectory(oldIndexDirs[name]);
Assert.AreEqual(1, GetNumberOfSegments(dir), "Original index must be single segment");
// create a bunch of dummy segments
int id = 40;
RAMDirectory ramDir = new RAMDirectory();
for (int i = 0; i < 3; i++)
// only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge:
MergePolicy mp = Random.NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy();
IndexWriterConfig iwc = (new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))).SetMergePolicy(mp);
IndexWriter w = new IndexWriter(ramDir, iwc);
// add few more docs:
for (int j = 0; j < RandomMultiplier * Random.Next(30); j++)
AddDoc(w, id++);
// add dummy segments (which are all in current
// version) to single segment index
MergePolicy mp_ = Random.NextBoolean() ? (MergePolicy)NewLogMergePolicy() : NewTieredMergePolicy();
IndexWriterConfig iwc_ = (new IndexWriterConfig(TEST_VERSION_CURRENT, null)).SetMergePolicy(mp_);
IndexWriter w_ = new IndexWriter(dir, iwc_);
// determine count of segments in modified index
int origSegCount = GetNumberOfSegments(dir);
(new IndexUpgrader(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null), false)).Upgrade();
int segCount = CheckAllSegmentsUpgraded(dir);
Assert.AreEqual(origSegCount, segCount, "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged");
public const string SurrogatesIndexName = "";
public virtual void TestSurrogates()
DirectoryInfo oldIndexDir = CreateTempDir("surrogates");
using (Stream dataFile = this.GetType().FindAndGetManifestResourceStream(SurrogatesIndexName))
TestUtil.Unzip(dataFile, oldIndexDir);
Directory dir = NewFSDirectory(oldIndexDir);
// TODO: more tests
* Index with negative positions (LUCENE-1542)
* Created with this code, using a 2.4.0 jar, then upgraded with 3.6 upgrader:
* public class CreateBogusIndexes {
* public static void main(String args[]) throws Exception {
* Directory d = FSDirectory.getDirectory("/tmp/bogus24");
* IndexWriter iw = new IndexWriter(d, new StandardAnalyzer());
* Document doc = new Document();
* Token brokenToken = new Token("broken", 0, 3);
* brokenToken.setPositionIncrement(0);
* Token okToken = new Token("ok", 0, 2);
* doc.Add(new Field("field1", new CannedTokenStream(brokenToken), Field.TermVector.NO));
* doc.Add(new Field("field2", new CannedTokenStream(brokenToken), Field.TermVector.WITH_POSITIONS));
* doc.Add(new Field("field3", new CannedTokenStream(brokenToken, okToken), Field.TermVector.NO));
* doc.Add(new Field("field4", new CannedTokenStream(brokenToken, okToken), Field.TermVector.WITH_POSITIONS));
* iw.AddDocument(doc);
* doc = new Document();
* doc.Add(new Field("field1", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
* doc.Add(new Field("field2", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
* doc.Add(new Field("field3", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED));
* doc.Add(new Field("field4", "just more text, not broken", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
* iw.AddDocument(doc);
* iw.Dispose();
* d.Dispose();
* }
* static class CannedTokenStream extends TokenStream {
* private final Token[] tokens;
* private int upto = 0;
* CannedTokenStream(Token... tokens) {
* this.tokens = tokens;
* }
* @Override
* public Token next() {
* if (upto < tokens.Length) {
* return tokens[upto++];
* } else {
* return null;
* }
* }
* }
* }
public const string Bogus24IndexName = "";
public virtual void TestNegativePositions()
DirectoryInfo oldIndexDir = CreateTempDir("negatives");
using (Stream dataFile = this.GetType().FindAndGetManifestResourceStream(Bogus24IndexName))
TestUtil.Unzip(dataFile, oldIndexDir);
Directory dir = NewFSDirectory(oldIndexDir);
DirectoryReader ir = DirectoryReader.Open(dir);
IndexSearcher @is = new IndexSearcher(ir);
PhraseQuery pq = new PhraseQuery();
pq.Add(new Term("field3", "more"));
pq.Add(new Term("field3", "text"));
TopDocs td = @is.Search(pq, 10);
Assert.AreEqual(1, td.TotalHits);
AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(ir);
DocsAndPositionsEnum de = wrapper.GetTermPositionsEnum(new Term("field3", "broken"));
Debug.Assert(de != null);
Assert.AreEqual(0, de.NextDoc());
Assert.AreEqual(0, de.NextPosition());