blob: 1c4bd1b6a08066e047b9462513aafea9ad145ad2 [file] [log] [blame]
using J2N;
using J2N.IO;
using J2N.Text;
using Lucene.Net.Codecs;
using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Codecs.PerField;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Support.IO;
using RandomizedTesting.Generators;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;
using Assert = Lucene.Net.TestFramework.Assert;
using Console = Lucene.Net.Util.SystemConsole;
using Directory = Lucene.Net.Store.Directory;
using JCG = J2N.Collections.Generic;
using RandomInts = RandomizedTesting.Generators.RandomNumbers;
namespace Lucene.Net.Util
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// General utility methods for Lucene unit tests.
/// </summary>
public static class TestUtil // LUCENENET specific - made static rather than making a private constructor
{
/// <summary>
/// Deletes one or more files or directories (and everything underneath it).
/// </summary>
/// <exception cref="IOException">If any of the given files (or their subhierarchy files in case
/// of directories) cannot be removed.</exception>
public static void Rm(params FileSystemInfo[] locations)
{
ISet<FileSystemInfo> unremoved = Rm(new JCG.HashSet<FileSystemInfo>(), locations);
if (unremoved.Count > 0)
{
StringBuilder b = new StringBuilder("Could not remove the following files (in the order of attempts):\n");
foreach (var f in unremoved)
{
b.Append(" ")
.Append(f.FullName)
.Append('\n');
}
throw new IOException(b.ToString());
}
}
private static ISet<FileSystemInfo> Rm(ISet<FileSystemInfo> unremoved, params FileSystemInfo[] locations)
{
foreach (FileSystemInfo location in locations)
{
// LUCENENET: Refresh the state of the FileSystemInfo object so we can be sure
// the Exists property is (somewhat) current.
location.Refresh();
if (location.Exists)
{
if (location is DirectoryInfo directory)
{
// Try to delete all of the files and folders in the directory
Rm(unremoved, directory.GetFileSystemInfos());
}
try
{
location.Delete();
}
catch (Exception)
{
unremoved.Add(location);
}
}
}
return unremoved;
}
/// <summary>
/// Convenience method unzipping <paramref name="zipFileStream"/> into <paramref name="destDir"/>, cleaning up
/// <paramref name="destDir"/> first.
/// </summary>
public static void Unzip(Stream zipFileStream, DirectoryInfo destDir)
{
Rm(destDir);
destDir.Create();
using ZipArchive zip = new ZipArchive(zipFileStream);
foreach (var entry in zip.Entries)
{
// Ignore internal folders - these are tacked onto the FullName anyway
if (entry.FullName.EndsWith("/", StringComparison.Ordinal) || entry.FullName.EndsWith("\\", StringComparison.Ordinal))
{
continue;
}
using Stream input = entry.Open();
FileInfo targetFile = new FileInfo(CorrectPath(Path.Combine(destDir.FullName, entry.FullName)));
if (!targetFile.Directory.Exists)
{
targetFile.Directory.Create();
}
using Stream output = new FileStream(targetFile.FullName, FileMode.Create, FileAccess.Write);
input.CopyTo(output);
}
}
/// <summary>
/// LUCENENET specific method for normalizing file path names
/// for the current operating system.
/// </summary>
private static string CorrectPath(string input)
{
if (Path.DirectorySeparatorChar.Equals('/'))
{
return input.Replace('\\', '/');
}
return input.Replace('/', '\\');
}
public static void SyncConcurrentMerges(IndexWriter writer)
{
SyncConcurrentMerges(writer.Config.MergeScheduler);
}
public static void SyncConcurrentMerges(IMergeScheduler ms)
{
if (ms is IConcurrentMergeScheduler concurrentMergeScheduler)
{
concurrentMergeScheduler.Sync();
}
}
/// <summary>
/// This runs the <see cref="Index.CheckIndex"/> tool on the index in. If any
/// issues are hit, an <see cref="Exception"/> is thrown; else,
/// true is returned.
/// </summary>
public static CheckIndex.Status CheckIndex(Directory dir)
{
return CheckIndex(dir, true);
}
public static CheckIndex.Status CheckIndex(Directory dir, bool crossCheckTermVectors)
{
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.CrossCheckTermVectors = crossCheckTermVectors;
checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
if (indexStatus is null || indexStatus.Clean == false)
{
Console.WriteLine("CheckIndex failed");
checker.FlushInfoStream();
Console.WriteLine(bos.ToString());
throw RuntimeException.Create("CheckIndex failed");
}
else
{
if (LuceneTestCase.UseInfoStream)
{
checker.FlushInfoStream();
Console.WriteLine(bos.ToString());
}
return indexStatus;
}
}
/// <summary>
/// This runs the <see cref="Index.CheckIndex"/> tool on the <see cref="IndexReader"/>. If any
/// issues are hit, an <see cref="Exception"/> is thrown.
/// </summary>
public static void CheckReader(IndexReader reader)
{
foreach (AtomicReaderContext context in reader.Leaves)
{
CheckReader(context.AtomicReader, true);
}
}
public static void CheckReader(AtomicReader reader, bool crossCheckTermVectors)
{
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
StreamWriter infoStream = new StreamWriter(bos, Encoding.UTF8);
reader.CheckIntegrity();
CheckIndex.Status.FieldNormStatus fieldNormStatus = Index.CheckIndex.TestFieldNorms(reader, infoStream);
CheckIndex.Status.TermIndexStatus termIndexStatus = Index.CheckIndex.TestPostings(reader, infoStream);
CheckIndex.Status.StoredFieldStatus storedFieldStatus = Index.CheckIndex.TestStoredFields(reader, infoStream);
CheckIndex.Status.TermVectorStatus termVectorStatus = Index.CheckIndex.TestTermVectors(reader, infoStream, false, crossCheckTermVectors);
CheckIndex.Status.DocValuesStatus docValuesStatus = Index.CheckIndex.TestDocValues(reader, infoStream);
if (fieldNormStatus.Error != null || termIndexStatus.Error != null || storedFieldStatus.Error != null || termVectorStatus.Error != null || docValuesStatus.Error != null)
{
Console.WriteLine("CheckReader failed");
infoStream.Flush();
Console.WriteLine(bos.ToString());
throw RuntimeException.Create("CheckReader failed");
}
else
{
if (LuceneTestCase.UseInfoStream)
{
Console.WriteLine(bos.ToString());
}
}
}
/// <summary>
/// Returns a random <see cref="int"/> from <paramref name="minValue"/> (inclusive) to <paramref name="maxValue"/> (inclusive).
/// </summary>
/// <param name="random">A <see cref="Random"/> instance.</param>
/// <param name="minValue">The inclusive start of the range.</param>
/// <param name="maxValue">The inclusive end of the range.</param>
/// <returns>A random <see cref="int"/> from <paramref name="minValue"/> (inclusive) to <paramref name="maxValue"/> (inclusive).</returns>
/// <exception cref="ArgumentException"><paramref name="minValue"/> is greater than <paramref name="maxValue"/>.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int NextInt32(Random random, int minValue, int maxValue)
{
return RandomInts.RandomInt32Between(random, minValue, maxValue); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random <see cref="long"/> from <paramref name="minValue"/> to <paramref name="maxValue"/> (inclusive).
/// </summary>
/// <param name="random">A <see cref="Random"/> instance.</param>
/// <param name="minValue">The inclusive start of the range.</param>
/// <param name="maxValue">The inclusive end of the range.</param>
/// <returns>A random <see cref="long"/> from <paramref name="minValue"/> to <paramref name="maxValue"/> (inclusive).</returns>
/// <exception cref="ArgumentException"><paramref name="minValue"/> is greater than <paramref name="maxValue"/>.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long NextInt64(Random random, long minValue, long maxValue)
{
return RandomInts.RandomInt64Between(random, minValue, maxValue); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random string consisting only of lowercase characters 'a' through 'z'. May be an empty string.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="maxLength">The maximum length of the string to return (inclusive).</param>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomSimpleString(Random random, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextSimpleString(random, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random string consisting only of lowercase characters 'a' through 'z'. May be an empty string.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="minLength">The minimum length of the string to return (inclusive).</param>
/// <param name="maxLength">The maximum length of the string to return (inclusive).</param>
/// <exception cref="ArgumentException"><paramref name="minLength"/> is greater than <paramref name="maxLength"/>.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="minLength"/> or <paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomSimpleString(Random random, int minLength, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextSimpleString(random, minLength, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random string consisting only of characters between <paramref name="minChar"/> (inclusive)
/// and <paramref name="maxChar"/> (inclusive).
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="minChar">The minimum <see cref="char"/> value of the range (inclusive).</param>
/// <param name="maxChar">The maximum <see cref="char"/> value of the range (inclusive).</param>
/// <param name="maxLength">The maximum length of the string to generate.</param>
/// <returns>a random string consisting only of characters between <paramref name="minChar"/> (inclusive)
/// and <paramref name="maxChar"/> (inclusive).</returns>
/// <exception cref="ArgumentException"><paramref name="minChar"/> is greater than <paramref name="maxChar"/>.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="minChar"/> or <paramref name="maxChar"/> is not in
/// the range between <see cref="char.MinValue"/> and <see cref="char.MaxValue"/>.
/// <para/>
/// -or-
/// <para/>
/// <paramref name="maxLength"/> is less than 0.
/// </exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomSimpleStringRange(Random random, char minChar, char maxChar, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextSimpleStringRange(random, minChar, maxChar, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random string consisting only of lowercase characters 'a' through 'z',
/// between 0 and 10 characters in length.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomSimpleString(Random random)
{
return RandomizedTesting.Generators.RandomExtensions.NextSimpleString(random); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns random string with up to 20 characters, including full unicode range.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomUnicodeString(Random random)
{
return RandomizedTesting.Generators.RandomExtensions.NextUnicodeString(random); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random string up to a certain length.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="maxLength">The maximum length of the string to return.</param>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomUnicodeString(Random random, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextUnicodeString(random, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Fills provided <see cref="T:char[]"/> with valid random unicode code
/// unit sequence.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="chars">A <see cref="T:char[]"/> with preallocated space to put the characters.</param>
/// <param name="startIndex">The index of <paramref name="chars"/> to begin populating with characters.</param>
/// <param name="length">The number of characters to populate.</param>
/// <exception cref="ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> or <paramref name="length"/> is less than 0.
/// <para/>
/// -or-
/// <para/>
/// <paramref name="startIndex"/> + <paramref name="length"/> refers to a position outside of the range of <paramref name="chars"/>.
/// </exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> or <paramref name="chars"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void RandomFixedLengthUnicodeString(Random random, char[] chars, int startIndex, int length)
{
RandomizedTesting.Generators.RandomExtensions.NextFixedLengthUnicodeString(random, chars, startIndex, length); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a <see cref="string"/> thats "regexish" (contains lots of operators typically found in regular expressions)
/// If you call this enough times, you might get a valid regex!
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomRegexishString(Random random) // LUCENENET: Renamed from regexpish to make the name .NET-like
{
return RandomizedTesting.Generators.RandomExtensions.NextRegexishString(random); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a <see cref="string"/> thats "regexish" (contains lots of operators typically found in regular expressions)
/// If you call this enough times, you might get a valid regex!
///
/// <para/>Note: to avoid practically endless backtracking patterns we replace asterisk and plus
/// operators with bounded repetitions. See LUCENE-4111 for more info.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="maxLength"> A hint about maximum length of the regexpish string. It may be exceeded by a few characters. </param>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomRegexpishString(Random random, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextRegexishString(random, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns a random HTML-like string.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="numElements">The maximum number of HTML elements to include in the string.</param>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="numElements"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomHtmlishString(Random random, int numElements)
{
return RandomizedTesting.Generators.RandomExtensions.NextHtmlishString(random, numElements); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Randomly upcases, downcases, or leaves intact each code point in the given string in the current culture.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="value">The string to recase randomly.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/> or <paramref name="value"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomlyRecaseString(Random random, string value)
{
return RandomizedTesting.Generators.RandomExtensions.NextStringRecasing(random, value); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Randomly upcases, downcases, or leaves intact each code point in the given string in the specified <paramref name="culture"/>.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="value">The string to recase randomly.</param>
/// <param name="culture">The culture to use when recasing the string.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/>, <paramref name="value"/> or <paramref name="culture"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomlyRecaseString(Random random, string value, CultureInfo culture) // LUCENENET specific - allow a way to pass culture
{
return RandomizedTesting.Generators.RandomExtensions.NextStringRecasing(random, value, culture); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns random string of length between 0-20 codepoints, all codepoints within the same unicode block.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomRealisticUnicodeString(Random random)
{
return RandomizedTesting.Generators.RandomExtensions.NextRealisticUnicodeString(random); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns random string of length up to maxLength codepoints, all codepoints within the same unicode block.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="maxLength">The maximum length of the string to return (inclusive).</param>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomRealisticUnicodeString(Random random, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextRealisticUnicodeString(random, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns random string of length between min and max codepoints, all codepoints within the same unicode block.
/// </summary>
/// <param name="random">This <see cref="Random"/>.</param>
/// <param name="minLength">The minimum length of the string to return (inclusive).</param>
/// <param name="maxLength">The maximum length of the string to return (inclusive).</param>
/// <exception cref="ArgumentException"><paramref name="minLength"/> is greater than <paramref name="maxLength"/>.</exception>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="minLength"/> or <paramref name="maxLength"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomRealisticUnicodeString(Random random, int minLength, int maxLength)
{
return RandomizedTesting.Generators.RandomExtensions.NextRealisticUnicodeString(random, minLength, maxLength); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Returns random string, with a given UTF-8 byte <paramref name="length"/>.
/// </summary>
/// <exception cref="ArgumentOutOfRangeException"><paramref name="length"/> is less than 0.</exception>
/// <exception cref="ArgumentNullException"><paramref name="random"/> is <c>null</c>.</exception>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static string RandomFixedByteLengthUnicodeString(Random random, int length)
{
return RandomizedTesting.Generators.RandomExtensions.NextFixedByteLengthUnicodeString(random, length); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
/// <summary>
/// Return a <see cref="Codec"/> that can read any of the
/// default codecs and formats, but always writes in the specified
/// format.
/// </summary>
public static Codec AlwaysPostingsFormat(PostingsFormat format)
{
// TODO: we really need for postings impls etc to announce themselves
// (and maybe their params, too) to infostream on flush and merge.
// otherwise in a real debugging situation we won't know whats going on!
if (LuceneTestCase.Verbose)
{
Console.WriteLine("forcing postings format to:" + format);
}
return new Lucene46CodecAnonymousClass(format);
}
private sealed class Lucene46CodecAnonymousClass : Lucene46Codec
{
private readonly PostingsFormat format;
public Lucene46CodecAnonymousClass(PostingsFormat format)
{
this.format = format;
}
public override PostingsFormat GetPostingsFormatForField(string field)
{
return format;
}
}
/// <summary>
/// Return a <see cref="Codec"/> that can read any of the
/// default codecs and formats, but always writes in the specified
/// format.
/// </summary>
public static Codec AlwaysDocValuesFormat(DocValuesFormat format)
{
// TODO: we really need for docvalues impls etc to announce themselves
// (and maybe their params, too) to infostream on flush and merge.
// otherwise in a real debugging situation we won't know whats going on!
if (LuceneTestCase.Verbose)
{
Console.WriteLine("forcing docvalues format to:" + format);
}
return new Lucene46CodecAnonymousClass2(format);
}
private sealed class Lucene46CodecAnonymousClass2 : Lucene46Codec
{
private readonly DocValuesFormat format;
public Lucene46CodecAnonymousClass2(DocValuesFormat format)
{
this.format = format;
}
public override DocValuesFormat GetDocValuesFormatForField(string field)
{
return format;
}
}
// TODO: generalize all 'test-checks-for-crazy-codecs' to
// annotations (LUCENE-3489)
public static string GetPostingsFormat(string field)
{
return GetPostingsFormat(Codec.Default, field);
}
public static string GetPostingsFormat(Codec codec, string field)
{
PostingsFormat p = codec.PostingsFormat;
if (p is PerFieldPostingsFormat perFieldPostingsFormat)
{
return perFieldPostingsFormat.GetPostingsFormatForField(field).Name;
}
else
{
return p.Name;
}
}
public static string GetDocValuesFormat(string field)
{
return GetDocValuesFormat(Codec.Default, field);
}
public static string GetDocValuesFormat(Codec codec, string field)
{
DocValuesFormat f = codec.DocValuesFormat;
if (f is PerFieldDocValuesFormat perFieldDocValuesFormat)
{
return perFieldDocValuesFormat.GetDocValuesFormatForField(field).Name;
}
else
{
return f.Name;
}
}
// TODO: remove this, push this test to Lucene40/Lucene42 codec tests
public static bool FieldSupportsHugeBinaryDocValues(string field)
{
string dvFormat = GetDocValuesFormat(field);
if (dvFormat.Equals("Lucene40", StringComparison.Ordinal)
|| dvFormat.Equals("Lucene42", StringComparison.Ordinal)
|| dvFormat.Equals("Memory", StringComparison.Ordinal))
{
return false;
}
return true;
}
public static bool AnyFilesExceptWriteLock(Directory dir)
{
string[] files = dir.ListAll();
if (files.Length > 1 || (files.Length == 1 && !files[0].Equals("write.lock", StringComparison.Ordinal)))
{
return true;
}
else
{
return false;
}
}
/// <summary>
/// Just tries to configure things to keep the open file
/// count lowish.
/// </summary>
public static void ReduceOpenFiles(IndexWriter w)
{
// keep number of open files lowish
MergePolicy mp = w.Config.MergePolicy;
if (mp is LogMergePolicy lmp)
{
lmp.MergeFactor = Math.Min(5, lmp.MergeFactor);
lmp.NoCFSRatio = 1.0;
}
else if (mp is TieredMergePolicy tmp)
{
tmp.MaxMergeAtOnce = Math.Min(5, tmp.MaxMergeAtOnce);
tmp.SegmentsPerTier = Math.Min(5, tmp.SegmentsPerTier);
tmp.NoCFSRatio = 1.0;
}
IMergeScheduler ms = w.Config.MergeScheduler;
if (ms is IConcurrentMergeScheduler concurrentMergeScheduler)
{
// wtf... shouldnt it be even lower since its 1 by default?!?!
concurrentMergeScheduler.SetMaxMergesAndThreads(3, 2);
}
}
/// <summary>
/// Checks some basic behaviour of an <see cref="Attribute"/>. </summary>
/// <param name="att"><see cref="Attribute"/> to reflect</param>
/// <param name="reflectedValues"> Contains a <see cref="IDictionary{String, Object}"/> with "AttributeSubclassType/key" as values.</param>
public static void AssertAttributeReflection(Attribute att, IDictionary<string, object> reflectedValues)
{
IDictionary<string, object> map = new JCG.Dictionary<string, object>();
att.ReflectWith(new AttributeReflectorAnonymousClass(map));
Assert.AreEqual(reflectedValues, map, aggressive: false, "Reflection does not produce same map");
}
private sealed class AttributeReflectorAnonymousClass : IAttributeReflector
{
private readonly IDictionary<string, object> map;
public AttributeReflectorAnonymousClass(IDictionary<string, object> map)
{
this.map = map;
}
public void Reflect<T>(string key, object value)
where T : IAttribute
{
Reflect(typeof(T), key, value);
}
public void Reflect(Type attClass, string key, object value)
{
map[attClass.Name + '#' + key] = value;
}
}
public static void AssertEquals(TopDocs expected, TopDocs actual)
{
Assert.AreEqual(expected.TotalHits, actual.TotalHits, "wrong total hits");
Assert.AreEqual(expected.MaxScore, actual.MaxScore, "wrong maxScore");
Assert.AreEqual(expected.ScoreDocs.Length, actual.ScoreDocs.Length, "wrong hit count");
for (int hitIDX = 0; hitIDX < expected.ScoreDocs.Length; hitIDX++)
{
ScoreDoc expectedSD = expected.ScoreDocs[hitIDX];
ScoreDoc actualSD = actual.ScoreDocs[hitIDX];
Assert.AreEqual(expectedSD.Doc, actualSD.Doc, "wrong hit docID");
Assert.AreEqual(expectedSD.Score, actualSD.Score, "wrong hit score");
if (expectedSD is FieldDoc expectedFieldDoc)
{
Assert.IsTrue(actualSD is FieldDoc);
Assert.AreEqual(expectedFieldDoc.Fields, ((FieldDoc)actualSD).Fields, "wrong sort field values");
}
else
{
Assert.IsFalse(actualSD is FieldDoc);
}
}
}
// NOTE: this is likely buggy, and cannot clone fields
// with tokenStreamValues, etc. Use at your own risk!!
// TODO: is there a pre-existing way to do this!!!
public static Document CloneDocument(Document doc1)
{
Document doc2 = new Document();
foreach (IIndexableField f in doc1.Fields)
{
Field field1 = (Field)f;
Field field2;
DocValuesType dvType = field1.FieldType.DocValueType;
NumericType numType = field1.FieldType.NumericType;
if (dvType != DocValuesType.NONE)
{
switch (dvType)
{
case DocValuesType.NUMERIC:
field2 = new NumericDocValuesField(field1.Name, field1.GetInt64Value().Value);
break;
case DocValuesType.BINARY:
field2 = new BinaryDocValuesField(field1.Name, field1.GetBinaryValue());
break;
case DocValuesType.SORTED:
field2 = new SortedDocValuesField(field1.Name, field1.GetBinaryValue());
break;
default:
throw IllegalStateException.Create("unknown Type: " + dvType);
}
}
else if (numType != NumericType.NONE)
{
switch (numType)
{
case NumericType.INT32:
field2 = new Int32Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
break;
case NumericType.SINGLE:
field2 = new SingleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
break;
case NumericType.INT64:
field2 = new Int64Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
break;
case NumericType.DOUBLE:
field2 = new DoubleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType);
break;
default:
throw IllegalStateException.Create("unknown Type: " + numType);
}
}
else
{
field2 = new Field(field1.Name, field1.GetStringValue(), field1.FieldType);
}
doc2.Add(field2);
}
return doc2;
}
/// <summary>
/// Returns a <see cref="DocsEnum"/>, but randomly sometimes uses a
/// <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>. Returns null
/// if field/term doesn't exist.
/// </summary>
public static DocsEnum Docs(Random random, IndexReader r, string field, BytesRef term, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
{
Terms terms = MultiFields.GetTerms(r, field);
if (terms is null)
{
return null;
}
TermsEnum termsEnum = terms.GetEnumerator();
if (!termsEnum.SeekExact(term))
{
return null;
}
return Docs(random, termsEnum, liveDocs, reuse, flags);
}
/// <summary>
/// Returns a <see cref="DocsEnum"/> from a positioned <see cref="TermsEnum"/>, but
/// randomly sometimes uses a <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>.
/// </summary>
public static DocsEnum Docs(Random random, TermsEnum termsEnum, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
{
if (random.NextBoolean())
{
if (random.NextBoolean())
{
DocsAndPositionsFlags posFlags;
switch (random.Next(4))
{
case 0:
posFlags = 0;
break;
case 1:
posFlags = DocsAndPositionsFlags.OFFSETS;
break;
case 2:
posFlags = DocsAndPositionsFlags.PAYLOADS;
break;
default:
posFlags = DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS;
break;
}
// TODO: cast to DocsAndPositionsEnum?
DocsAndPositionsEnum docsAndPositions = termsEnum.DocsAndPositions(liveDocs, null, posFlags);
if (docsAndPositions != null)
{
return docsAndPositions;
}
}
flags |= DocsFlags.FREQS;
}
return termsEnum.Docs(liveDocs, reuse, flags);
}
public static ICharSequence StringToCharSequence(string @string, Random random)
{
return BytesToCharSequence(new BytesRef(@string), random);
}
public static ICharSequence BytesToCharSequence(BytesRef @ref, Random random)
{
switch (random.Next(5))
{
case 4:
CharsRef chars = new CharsRef(@ref.Length);
UnicodeUtil.UTF8toUTF16(@ref.Bytes, @ref.Offset, @ref.Length, chars);
return chars;
case 3:
return CharBuffer.Wrap(@ref.Utf8ToString());
default:
return new StringCharSequence(@ref.Utf8ToString());
}
}
///// <summary>
///// Shutdown <see cref="TaskScheduler"/> and wait for its.
///// </summary>
//public static void ShutdownExecutorService(TaskScheduler ex) // LUCENENET: TaskScheduler doesn't have a way to terminate in .NET
//{
// /*if (ex != null)
// {
// try
// {
// ex.shutdown();
// ex.awaitTermination(1, TimeUnit.SECONDS);
// }
// catch (Exception e) when (e.IsInterruptedException())
// {
// // Just report it on the syserr.
// Console.Error.WriteLine("Could not properly shutdown executor service.");
// Console.Error.WriteLine(e.StackTrace);
// }
// }*/
//}
/// <summary>
/// Returns a valid (compiling) <see cref="Regex"/> instance with random stuff inside. Be careful
/// when applying random patterns to longer strings as certain types of patterns
/// may explode into exponential times in backtracking implementations (such as Java's).
/// </summary>
public static Regex RandomRegex(Random random) // LUCENENET specific - renamed from RandomPattern()
{
return RandomizedTesting.Generators.RandomExtensions.NextRegex(random); // LUCENENET: Moved general random data generation to RandomizedTesting.Generators
}
public static FilteredQuery.FilterStrategy RandomFilterStrategy(Random random)
{
switch (random.Next(6))
{
case 5:
case 4:
return new RandomAccessFilterStrategyAnonymousClass();
case 3:
return FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY;
case 2:
return FilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY;
case 1:
return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
case 0:
return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY;
default:
return FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY;
}
}
private sealed class RandomAccessFilterStrategyAnonymousClass : FilteredQuery.RandomAccessFilterStrategy
{
protected override bool UseRandomAccess(IBits bits, int firstFilterDoc)
{
return LuceneTestCase.Random.NextBoolean();
}
}
/// <summary>
/// Returns a random string in the specified length range consisting
/// entirely of whitespace characters.
/// </summary>
/// <seealso cref="WHITESPACE_CHARACTERS"/>
public static string RandomWhitespace(Random random, int minLength, int maxLength)
{
int end = NextInt32(random, minLength, maxLength);
StringBuilder @out = new StringBuilder();
for (int i = 0; i < end; i++)
{
int offset = NextInt32(random, 0, WHITESPACE_CHARACTERS.Length - 1);
char c = WHITESPACE_CHARACTERS[offset];
// sanity check
Assert.IsTrue(char.IsWhiteSpace(c), "Not really whitespace? (@" + offset + "): " + c);
@out.Append(c);
}
return @out.ToString();
}
public static string RandomAnalysisString(Random random, int maxLength, bool simple)
{
Assert.True(maxLength >= 0);
maxLength = maxLength == 0 ? 0 : random.Next(maxLength); // LUCENENET: Lucene bug - random.Next(int) in Java must be > 0, and so must J2N.Randomizer. So, just pass through 0 as .NET would.
// sometimes just a purely random string
if (random.Next(31) == 0)
{
// LUCENENET specific - We need to pass the value from the random class or 0, just like the remainder of the code
return RandomSubString(random, maxLength, simple);
}
// otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer
// first decide how big the string will really be: 0..n
int avgWordLength = TestUtil.NextInt32(random, 3, 8);
StringBuilder sb = new StringBuilder();
while (sb.Length < maxLength)
{
if (sb.Length > 0)
{
sb.Append(' ');
}
int wordLength = -1;
while (wordLength < 0)
{
wordLength = (int)(random.NextGaussian() * 3 + avgWordLength);
}
wordLength = Math.Min(wordLength, maxLength - sb.Length);
sb.Append(RandomSubString(random, wordLength, simple));
}
return sb.ToString();
}
public static string RandomSubString(Random random, int wordLength, bool simple)
{
if (wordLength == 0)
{
return "";
}
int evilness = TestUtil.NextInt32(random, 0, 20);
StringBuilder sb = new StringBuilder();
while (sb.Length < wordLength)
{
;
if (simple)
{
sb.Append(random.NextBoolean() ? TestUtil.RandomSimpleString(random, wordLength) : TestUtil.RandomHtmlishString(random, wordLength));
}
else
{
if (evilness < 10)
{
sb.Append(TestUtil.RandomSimpleString(random, wordLength));
}
else if (evilness < 15)
{
Assert.AreEqual(0, sb.Length); // we should always get wordLength back!
sb.Append(TestUtil.RandomRealisticUnicodeString(random, wordLength, wordLength));
}
else if (evilness == 16)
{
sb.Append(TestUtil.RandomHtmlishString(random, wordLength));
}
else if (evilness == 17)
{
// gives a lot of punctuation
sb.Append(TestUtil.RandomRegexpishString(random, wordLength));
}
else
{
sb.Append(TestUtil.RandomUnicodeString(random, wordLength));
}
}
}
if (sb.Length > wordLength)
{
sb.Length = wordLength;
if (char.IsHighSurrogate(sb[wordLength - 1]))
{
sb.Length = wordLength - 1;
}
}
if (random.Next(17) == 0)
{
// mix up case
string mixedUp = TestUtil.RandomlyRecaseString(random, sb.ToString());
Assert.True(mixedUp.Length == sb.Length, "Lengths are not the same: mixedUp = " + mixedUp + ", length = " + mixedUp.Length + ", sb = " + sb + ", length = " + sb.Length);
return mixedUp;
}
else
{
return sb.ToString();
}
}
/// <summary>
/// List of characters that match <see cref="char.IsWhiteSpace(char)"/>.</summary>
public static readonly char[] WHITESPACE_CHARACTERS = new char[]
{
// :TODO: is this list exhaustive?
'\u0009',
'\n',
'\u000B',
'\u000C',
'\r',
'\u001C',
'\u001D',
'\u001E',
'\u001F',
'\u0020',
// '\u0085', faild sanity check?
'\u1680',
'\u180E',
'\u2000',
'\u2001',
'\u2002',
'\u2003',
'\u2004',
'\u2005',
'\u2006',
'\u2008',
'\u2009',
'\u200A',
'\u2028',
'\u2029',
'\u205F',
'\u3000'
};
}
}