blob: 72a70e3833f8b1b0a52a65328933aa9a60fd267e [file] [log] [blame]
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Lucene.Net.Util;
namespace Lucene.Net.Index
{
using Lucene.Net.Support;
using AssertingDocValuesFormat = Lucene.Net.Codecs.asserting.AssertingDocValuesFormat;
using AssertingPostingsFormat = Lucene.Net.Codecs.asserting.AssertingPostingsFormat;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using DocValuesFormat = Lucene.Net.Codecs.DocValuesFormat;
//using TestBloomFilteredLucene41Postings = Lucene.Net.Codecs.bloom.TestBloomFilteredLucene41Postings;
//using DiskDocValuesFormat = Lucene.Net.Codecs.diskdv.DiskDocValuesFormat;
using Lucene41PostingsFormat = Lucene.Net.Codecs.Lucene41.Lucene41PostingsFormat;
//using Lucene41WithOrds = Lucene.Net.Codecs.Lucene41ords.Lucene41WithOrds;
using Lucene45DocValuesFormat = Lucene.Net.Codecs.Lucene45.Lucene45DocValuesFormat;
using Lucene46Codec = Lucene.Net.Codecs.Lucene46.Lucene46Codec;
using PostingsFormat = Lucene.Net.Codecs.PostingsFormat;
//using DirectPostingsFormat = Lucene.Net.Codecs.memory.DirectPostingsFormat;
//using MemoryDocValuesFormat = Lucene.Net.Codecs.memory.MemoryDocValuesFormat;
//using MemoryPostingsFormat = Lucene.Net.Codecs.memory.MemoryPostingsFormat;
//using MockFixedIntBlockPostingsFormat = Lucene.Net.Codecs.mockintblock.MockFixedIntBlockPostingsFormat;
//using MockVariableIntBlockPostingsFormat = Lucene.Net.Codecs.mockintblock.MockVariableIntBlockPostingsFormat;
//using MockRandomPostingsFormat = Lucene.Net.Codecs.mockrandom.MockRandomPostingsFormat;
//using MockSepPostingsFormat = Lucene.Net.Codecs.mocksep.MockSepPostingsFormat;
//using NestedPulsingPostingsFormat = Lucene.Net.Codecs.nestedpulsing.NestedPulsingPostingsFormat;
//using Pulsing41PostingsFormat = Lucene.Net.Codecs.pulsing.Pulsing41PostingsFormat;
//using SimpleTextDocValuesFormat = Lucene.Net.Codecs.simpletext.SimpleTextDocValuesFormat;
//using SimpleTextPostingsFormat = Lucene.Net.Codecs.simpletext.SimpleTextPostingsFormat;
//using FSTOrdPostingsFormat = Lucene.Net.Codecs.memory.FSTOrdPostingsFormat;
//using FSTOrdPulsing41PostingsFormat = Lucene.Net.Codecs.memory.FSTOrdPulsing41PostingsFormat;
//using FSTPostingsFormat = Lucene.Net.Codecs.memory.FSTPostingsFormat;
//using FSTPulsing41PostingsFormat = Lucene.Net.Codecs.memory.FSTPulsing41PostingsFormat;
using TestUtil = Lucene.Net.Util.TestUtil;
/// <summary>
/// Codec that assigns per-field random postings formats.
/// <p>
/// The same field/format assignment will happen regardless of order,
/// a hash is computed up front that determines the mapping.
/// this means fields can be put into things like HashSets and added to
/// documents in different orders and the test will still be deterministic
/// and reproducable.
/// </summary>
public class RandomCodec : Lucene46Codec
{
/// <summary>
/// Shuffled list of postings formats to use for new mappings </summary>
private IList<PostingsFormat> Formats = new List<PostingsFormat>();
/// <summary>
/// Shuffled list of docvalues formats to use for new mappings </summary>
private IList<DocValuesFormat> DvFormats = new List<DocValuesFormat>();
/// <summary>
/// unique set of format names this codec knows about </summary>
public HashSet<string> FormatNames = new HashSet<string>();
/// <summary>
/// unique set of docvalues format names this codec knows about </summary>
public HashSet<string> DvFormatNames = new HashSet<string>();
/// <summary>
/// memorized field->postingsformat mappings </summary>
// note: we have to sync this map even though its just for debugging/toString,
// otherwise DWPT's .toString() calls that iterate over the map can
// cause concurrentmodificationexception if indexwriter's infostream is on
private readonly IDictionary<string, PostingsFormat> PreviousMappings = new ConcurrentHashMapWrapper<string, PostingsFormat>(new Dictionary<string, PostingsFormat>());
private IDictionary<string, DocValuesFormat> PreviousDVMappings = new ConcurrentHashMapWrapper<string, DocValuesFormat>(new Dictionary<string, DocValuesFormat>());
private readonly int PerFieldSeed;
public override PostingsFormat GetPostingsFormatForField(string name)
{
PostingsFormat codec = PreviousMappings[name];
if (codec == null)
{
codec = Formats[Math.Abs(PerFieldSeed ^ name.GetHashCode()) % Formats.Count];
/*if (codec is SimpleTextPostingsFormat && PerFieldSeed % 5 != 0)
{
// make simpletext rarer, choose again
codec = Formats[Math.Abs(PerFieldSeed ^ name.ToUpper(CultureInfo.InvariantCulture).GetHashCode()) % Formats.Count];
}*/
PreviousMappings[name] = codec;
// Safety:
Debug.Assert(PreviousMappings.Count < 10000, "test went insane");
}
return codec;
}
public override DocValuesFormat GetDocValuesFormatForField(string name)
{
DocValuesFormat codec = PreviousDVMappings[name];
if (codec == null)
{
codec = DvFormats[Math.Abs(PerFieldSeed ^ name.GetHashCode()) % DvFormats.Count];
/*if (codec is SimpleTextDocValuesFormat && PerFieldSeed % 5 != 0)
{
// make simpletext rarer, choose again
codec = DvFormats[Math.Abs(PerFieldSeed ^ name.ToUpper(CultureInfo.InvariantCulture).GetHashCode()) % DvFormats.Count];
}*/
PreviousDVMappings[name] = codec;
// Safety:
Debug.Assert(PreviousDVMappings.Count < 10000, "test went insane");
}
return codec;
}
public RandomCodec(Random random, ISet<string> avoidCodecs)
{
this.PerFieldSeed = random.Next();
// TODO: make it possible to specify min/max iterms per
// block via CL:
int minItemsPerBlock = TestUtil.NextInt(random, 2, 100);
int maxItemsPerBlock = 2 * (Math.Max(2, minItemsPerBlock - 1)) + random.Next(100);
int lowFreqCutoff = TestUtil.NextInt(random, 2, 100);
Add(avoidCodecs,
new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
/*
new FSTPostingsFormat(),
new FSTOrdPostingsFormat(),
new FSTPulsing41PostingsFormat(1 + random.Next(20)), new FSTOrdPulsing41PostingsFormat(1 + random.Next(20)),
new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock), LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)),
new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock),
new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 2000)),
new MockVariableIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 127)), new MockRandomPostingsFormat(random),
new NestedPulsingPostingsFormat(), new Lucene41WithOrds(), new SimpleTextPostingsFormat(),
*/
new AssertingPostingsFormat()
/*new MemoryPostingsFormat(true, random.nextFloat()), new MemoryPostingsFormat(false, random.nextFloat())*/
);
// add pulsing again with (usually) different parameters
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
//with such "wrapper" classes?
AddDocValues(avoidCodecs, new Lucene45DocValuesFormat(), /*new DiskDocValuesFormat(), new MemoryDocValuesFormat(), new SimpleTextDocValuesFormat(),*/ new AssertingDocValuesFormat());
Formats = CollectionsHelper.Shuffle(Formats);
DvFormats = CollectionsHelper.Shuffle(DvFormats);
// Avoid too many open files:
if (Formats.Count > 4)
{
Formats = Formats.SubList(0, 4);
}
if (DvFormats.Count > 4)
{
DvFormats = DvFormats.SubList(0, 4);
}
}
public RandomCodec(Random random)
: this(random, new HashSet<string>())
{
}
private void Add(ISet<string> avoidCodecs, params PostingsFormat[] postings)
{
foreach (PostingsFormat p in postings)
{
if (!avoidCodecs.Contains(p.Name))
{
Formats.Add(p);
FormatNames.Add(p.Name);
}
}
}
private void AddDocValues(ISet<string> avoidCodecs, params DocValuesFormat[] docvalues)
{
foreach (DocValuesFormat d in docvalues)
{
if (!avoidCodecs.Contains(d.Name))
{
DvFormats.Add(d);
DvFormatNames.Add(d.Name);
}
}
}
public override string ToString()
{
return base.ToString() + ": " + PreviousMappings.ToString() + ", docValues:" + PreviousDVMappings.ToString();
}
}
}