blob: 64f0484b059c97649cdeedbdfd6e70ad0fa7d40b [file] [log] [blame]
using Lucene.Net.Codecs;
using Lucene.Net.Codecs.Asserting;
using Lucene.Net.Codecs.Bloom;
using Lucene.Net.Codecs.DiskDV;
using Lucene.Net.Codecs.Lucene41;
using Lucene.Net.Codecs.Lucene41Ords;
using Lucene.Net.Codecs.Lucene45;
using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Codecs.Memory;
using Lucene.Net.Codecs.MockIntBlock;
using Lucene.Net.Codecs.MockRandom;
using Lucene.Net.Codecs.MockSep;
using Lucene.Net.Codecs.NestedPulsing;
using Lucene.Net.Codecs.Pulsing;
using Lucene.Net.Codecs.SimpleText;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
using Console = Lucene.Net.Support.SystemConsole;
using Debug = Lucene.Net.Diagnostics.Debug; // LUCENENET NOTE: We cannot use System.Diagnostics.Debug because those calls will be optimized out of the release!
using J2N.Collections.Generic.Extensions;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// <see cref="Codec"/> that assigns per-field random <see cref="Codecs.PostingsFormat"/>s.
/// <para/>
/// The same field/format assignment will happen regardless of order,
/// a hash is computed up front that determines the mapping.
/// This means fields can be put into things like <see cref="HashSet{T}"/>s and added to
/// documents in different orders and the test will still be deterministic
/// and reproducable.
/// </summary>
[ExcludeCodecFromScan] // LUCENENET specific - we don't want this codec to replace Lucene46Codec during testing - some of these codecs are read-only
public class RandomCodec : Lucene46Codec
{
/// <summary>
/// Shuffled list of postings formats to use for new mappings </summary>
private IList<PostingsFormat> formats = new List<PostingsFormat>();
/// <summary>
/// Shuffled list of docvalues formats to use for new mappings </summary>
private IList<DocValuesFormat> dvFormats = new List<DocValuesFormat>();
/// <summary>
/// unique set of format names this codec knows about </summary>
public ISet<string> FormatNames { get; set; } = new JCG.HashSet<string>();
/// <summary>
/// unique set of docvalues format names this codec knows about </summary>
public ISet<string> DvFormatNames { get; set; } = new JCG.HashSet<string>();
/// <summary>
/// memorized field->postingsformat mappings </summary>
// note: we have to sync this map even though its just for debugging/toString,
// otherwise DWPT's .toString() calls that iterate over the map can
// cause concurrentmodificationexception if indexwriter's infostream is on
private readonly IDictionary<string, PostingsFormat> previousMappings = new ConcurrentDictionary<string, PostingsFormat>(StringComparer.Ordinal);
private IDictionary<string, DocValuesFormat> previousDVMappings = new ConcurrentDictionary<string, DocValuesFormat>(StringComparer.Ordinal);
private readonly int perFieldSeed;
public override PostingsFormat GetPostingsFormatForField(string name)
{
if (!previousMappings.TryGetValue(name, out PostingsFormat codec) || codec == null)
{
codec = formats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % formats.Count];
if (codec is SimpleTextPostingsFormat && perFieldSeed % 5 != 0)
{
// make simpletext rarer, choose again
codec = formats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % formats.Count];
}
previousMappings[name] = codec;
// Safety:
Debug.Assert(previousMappings.Count < 10000, "test went insane");
}
//if (LuceneTestCase.VERBOSE)
//{
Console.WriteLine("RandomCodec.GetPostingsFormatForField(\"" + name + "\") returned '" + codec.Name + "' with underlying type '" + codec.GetType().ToString() + "'.");
//}
return codec;
}
public override DocValuesFormat GetDocValuesFormatForField(string name)
{
if (!previousDVMappings.TryGetValue(name, out DocValuesFormat codec) || codec == null)
{
codec = dvFormats[Math.Abs(perFieldSeed ^ name.GetHashCode()) % dvFormats.Count];
if (codec is SimpleTextDocValuesFormat && perFieldSeed % 5 != 0)
{
// make simpletext rarer, choose again
codec = dvFormats[Math.Abs(perFieldSeed ^ name.ToUpperInvariant().GetHashCode()) % dvFormats.Count];
}
previousDVMappings[name] = codec;
// Safety:
Debug.Assert(previousDVMappings.Count < 10000, "test went insane");
}
//if (LuceneTestCase.VERBOSE)
//{
Console.WriteLine("RandomCodec.GetDocValuesFormatForField(\"" + name + "\") returned '" + codec.Name + "' with underlying type '" + codec.GetType().ToString() + "'.");
//}
return codec;
}
public RandomCodec(Random random, ISet<string> avoidCodecs)
{
this.perFieldSeed = random.Next();
// TODO: make it possible to specify min/max iterms per
// block via CL:
int minItemsPerBlock = TestUtil.NextInt32(random, 2, 100);
int maxItemsPerBlock = 2 * (Math.Max(2, minItemsPerBlock - 1)) + random.Next(100);
int lowFreqCutoff = TestUtil.NextInt32(random, 2, 100);
Add(avoidCodecs,
new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
new FSTPostingsFormat(),
new FSTOrdPostingsFormat(),
new FSTPulsing41PostingsFormat(1 + random.Next(20)), new FSTOrdPulsing41PostingsFormat(1 + random.Next(20)),
new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock),
LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)),
new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock),
// add pulsing again with (usually) different parameters
new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock),
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
//with such "wrapper" classes?
new TestBloomFilteredLucene41Postings(),
new MockSepPostingsFormat(),
new MockFixedInt32BlockPostingsFormat(TestUtil.NextInt32(random, 1, 2000)),
new MockVariableInt32BlockPostingsFormat(TestUtil.NextInt32(random, 1, 127)),
new MockRandomPostingsFormat(random),
new NestedPulsingPostingsFormat(),
new Lucene41WithOrds(),
new SimpleTextPostingsFormat(),
new AssertingPostingsFormat(),
new MemoryPostingsFormat(true, random.nextFloat()),
new MemoryPostingsFormat(false, random.nextFloat())
);
AddDocValues(avoidCodecs,
new Lucene45DocValuesFormat(),
new DiskDocValuesFormat(),
new MemoryDocValuesFormat(),
new SimpleTextDocValuesFormat(),
new AssertingDocValuesFormat());
formats.Shuffle(random);
dvFormats.Shuffle(random);
// Avoid too many open files:
if (formats.Count > 4)
{
formats = formats.SubList(0, 4);
}
if (dvFormats.Count > 4)
{
dvFormats = dvFormats.SubList(0, 4);
}
}
public RandomCodec(Random random)
: this(random, new JCG.HashSet<string>())
{
}
private void Add(ISet<string> avoidCodecs, params PostingsFormat[] postings)
{
foreach (PostingsFormat p in postings)
{
if (!avoidCodecs.Contains(p.Name))
{
formats.Add(p);
FormatNames.Add(p.Name);
}
}
}
private void AddDocValues(ISet<string> avoidCodecs, params DocValuesFormat[] docvalues)
{
foreach (DocValuesFormat d in docvalues)
{
if (!avoidCodecs.Contains(d.Name))
{
dvFormats.Add(d);
DvFormatNames.Add(d.Name);
}
}
}
public override string ToString()
{
// LUCENENET NOTE: using StringFormatter on dictionaries to print out their contents
return string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}: {1}, docValues:{2}", base.ToString(), previousMappings, previousDVMappings);
}
}
}