lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java - lucene - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.util;

 import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.CharBuffer;
 import java.nio.file.FileSystem;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Random;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.asserting.AssertingCodec;
 import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
 import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90Codec;
 import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MergeScheduler;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SlowCodecReaderWrapper;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.mockfile.FilterFileSystem;
 import org.apache.lucene.mockfile.VirusCheckingFS;
 import org.apache.lucene.mockfile.WindowsFS;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.ByteBuffersDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.FilterDirectory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.NoLockFactory;
 import org.junit.Assert;

 /** General utility methods for Lucene unit tests. */
 public final class TestUtil {
   private TestUtil() {
     //
   }

   /**
    * A comparator that compares UTF-16 strings / char sequences according to Unicode code point
    * order. This can be used to verify {@link BytesRef} order.
    *
    * <p><b>Warning:</b> This comparator is rather inefficient, because it converts the strings to a
    * {@code int[]} array on each invocation.
    */
   public static final Comparator<CharSequence> STRING_CODEPOINT_COMPARATOR =
       (a, b) -> {
         final int[] aCodePoints = a.codePoints().toArray();
         final int[] bCodePoints = b.codePoints().toArray();
         for (int i = 0, c = Math.min(aCodePoints.length, bCodePoints.length); i < c; i++) {
           if (aCodePoints[i] < bCodePoints[i]) {
             return -1;
           } else if (aCodePoints[i] > bCodePoints[i]) {
             return 1;
           }
         }
         return aCodePoints.length - bCodePoints.length;
       };

   /**
    * Convenience method unzipping zipName into destDir. You must pass it a clean destDir.
    *
    * <p>Closes the given InputStream after extracting!
    */
   public static void unzip(InputStream in, Path destDir) throws IOException {
     in = new BufferedInputStream(in);

     try (ZipInputStream zipInput = new ZipInputStream(in)) {
       ZipEntry entry;
       byte[] buffer = new byte[8192];
       while ((entry = zipInput.getNextEntry()) != null) {
         Path targetFile = destDir.resolve(entry.getName());

         // be on the safe side: do not rely on that directories are always extracted
         // before their children (although this makes sense, but is it guaranteed?)
         Files.createDirectories(targetFile.getParent());
         if (!entry.isDirectory()) {
           OutputStream out = Files.newOutputStream(targetFile);
           int len;
           while ((len = zipInput.read(buffer)) >= 0) {
             out.write(buffer, 0, len);
           }
           out.close();
         }
         zipInput.closeEntry();
       }
     }
   }

   /**
    * Checks that the provided iterator is well-formed.
    *
    * <ul>
    *   <li>is read-only: does not allow {@code remove}
    *   <li>returns {@code expectedSize} number of elements
    *   <li>does not return null elements, unless {@code allowNull} is true.
    *   <li>throws NoSuchElementException if {@code next} is called after {@code hasNext} returns
    *       false.
    * </ul>
    */
   public static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
     for (long i = 0; i < expectedSize; i++) {
       boolean hasNext = iterator.hasNext();
       assert hasNext;
       T v = iterator.next();
       assert allowNull || v != null;
       // for the first element, check that remove is not supported
       if (i == 0) {
         try {
           iterator.remove();
           throw new AssertionError("broken iterator (supports remove): " + iterator);
         } catch (
             @SuppressWarnings("unused")
             UnsupportedOperationException expected) {
           // ok
         }
       }
     }
     assert !iterator.hasNext();
     try {
       iterator.next();
       throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
     } catch (
         @SuppressWarnings("unused")
         NoSuchElementException expected) {
       // ok
     }
   }

   /**
    * Checks that the provided iterator is well-formed.
    *
    * <ul>
    *   <li>is read-only: does not allow {@code remove}
    *   <li>does not return null elements.
    *   <li>throws NoSuchElementException if {@code next} is called after {@code hasNext} returns
    *       false.
    * </ul>
    */
   public static <T> void checkIterator(Iterator<T> iterator) {
     while (iterator.hasNext()) {
       T v = iterator.next();
       assert v != null;
       try {
         iterator.remove();
         throw new AssertionError("broken iterator (supports remove): " + iterator);
       } catch (
           @SuppressWarnings("unused")
           UnsupportedOperationException expected) {
         // ok
       }
     }
     try {
       iterator.next();
       throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
     } catch (
         @SuppressWarnings("unused")
         NoSuchElementException expected) {
       // ok
     }
   }

   /**
    * Checks that the provided collection is read-only.
    *
    * @see #checkIterator(Iterator)
    */
   public static <T> void checkReadOnly(Collection<T> coll) {
     int size = 0;
     for (Iterator<?> it = coll.iterator(); it.hasNext(); ) {
       it.next();
       size += 1;
     }
     if (size != coll.size()) {
       throw new AssertionError(
           "broken collection, reported size is "
               + coll.size()
               + " but iterator has "
               + size
               + " elements: "
               + coll);
     }

     if (coll.isEmpty() == false) {
       try {
         coll.remove(coll.iterator().next());
         throw new AssertionError("broken collection (supports remove): " + coll);
       } catch (
           @SuppressWarnings("unused")
           UnsupportedOperationException e) {
         // ok
       }
     }

     try {
       coll.add(null);
       throw new AssertionError("broken collection (supports add): " + coll);
     } catch (
         @SuppressWarnings("unused")
         UnsupportedOperationException e) {
       // ok
     }

     try {
       coll.addAll(Collections.singleton(null));
       throw new AssertionError("broken collection (supports addAll): " + coll);
     } catch (
         @SuppressWarnings("unused")
         UnsupportedOperationException e) {
       // ok
     }

     checkIterator(coll.iterator());
   }

   public static void syncConcurrentMerges(IndexWriter writer) {
     syncConcurrentMerges(writer.getConfig().getMergeScheduler());
   }

   public static void syncConcurrentMerges(MergeScheduler ms) {
     if (ms instanceof ConcurrentMergeScheduler) ((ConcurrentMergeScheduler) ms).sync();
   }

   /**
    * This runs the CheckIndex tool on the index in. If any issues are hit, a RuntimeException is
    * thrown; else, true is returned.
    */
   public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
     return checkIndex(dir, true);
   }

   public static CheckIndex.Status checkIndex(Directory dir, boolean doSlowChecks)
       throws IOException {
     return checkIndex(dir, doSlowChecks, false, null);
   }

   /**
    * If failFast is true, then throw the first exception when index corruption is hit, instead of
    * moving on to other fields/segments to look for any other corruption.
    */
   public static CheckIndex.Status checkIndex(
       Directory dir, boolean doSlowChecks, boolean failFast, ByteArrayOutputStream output)
       throws IOException {
     if (output == null) {
       output = new ByteArrayOutputStream(1024);
     }
     // TODO: actually use the dir's locking, unless test uses a special method?
     // some tests e.g. exception tests become much more complicated if they have to close the writer
     try (CheckIndex checker =
         new CheckIndex(dir, NoLockFactory.INSTANCE.obtainLock(dir, "bogus"))) {
       checker.setDoSlowChecks(doSlowChecks);
       checker.setFailFast(failFast);
       checker.setInfoStream(new PrintStream(output, false, IOUtils.UTF_8), false);
       CheckIndex.Status indexStatus = checker.checkIndex(null);

       if (indexStatus == null || indexStatus.clean == false) {
         System.out.println("CheckIndex failed");
         System.out.println(output.toString(IOUtils.UTF_8));
         throw new RuntimeException("CheckIndex failed");
       } else {
         if (LuceneTestCase.INFOSTREAM) {
           System.out.println(output.toString(IOUtils.UTF_8));
         }
         return indexStatus;
       }
     }
   }

   /**
    * This runs the CheckIndex tool on the Reader. If any issues are hit, a RuntimeException is
    * thrown
    */
   public static void checkReader(IndexReader reader) throws IOException {
     for (LeafReaderContext context : reader.leaves()) {
       checkReader(context.reader(), true);
     }
   }

   public static void checkReader(LeafReader reader, boolean doSlowChecks) throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
     PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);

     final CodecReader codecReader;
     if (reader instanceof CodecReader) {
       codecReader = (CodecReader) reader;
       reader.checkIntegrity();
     } else {
       codecReader = SlowCodecReaderWrapper.wrap(reader);
     }
     CheckIndex.testLiveDocs(codecReader, infoStream, true);
     CheckIndex.testFieldInfos(codecReader, infoStream, true);
     CheckIndex.testFieldNorms(codecReader, infoStream, true);
     CheckIndex.testPostings(codecReader, infoStream, false, doSlowChecks, true);
     CheckIndex.testStoredFields(codecReader, infoStream, true);
     CheckIndex.testTermVectors(codecReader, infoStream, false, doSlowChecks, true);
     CheckIndex.testDocValues(codecReader, infoStream, true);
     CheckIndex.testPoints(codecReader, infoStream, true);

     // some checks really against the reader API
     checkReaderSanity(reader);

     if (LuceneTestCase.INFOSTREAM) {
       System.out.println(bos.toString(IOUtils.UTF_8));
     }

     // FieldInfos should be cached at the reader and always return the same instance
     if (reader.getFieldInfos() != reader.getFieldInfos()) {
       throw new RuntimeException(
           "getFieldInfos() returned different instances for class: " + reader.getClass());
     }
   }

   // used by TestUtil.checkReader to check some things really unrelated to the index,
   // just looking for bugs in indexreader implementations.
   private static void checkReaderSanity(LeafReader reader) throws IOException {
     for (FieldInfo info : reader.getFieldInfos()) {

       // reader shouldn't return normValues if the field does not have them
       if (!info.hasNorms()) {
         if (reader.getNormValues(info.name) != null) {
           throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
         }
       }

       // reader shouldn't return docValues if the field does not have them
       // reader shouldn't return multiple docvalues types for the same field.
       switch (info.getDocValuesType()) {
         case NONE:
           if (reader.getBinaryDocValues(info.name) != null
               || reader.getNumericDocValues(info.name) != null
               || reader.getSortedDocValues(info.name) != null
               || reader.getSortedSetDocValues(info.name) != null) {
             throw new RuntimeException(
                 "field: " + info.name + " has docvalues but should omit them!");
           }
           break;
         case SORTED:
           if (reader.getBinaryDocValues(info.name) != null
               || reader.getNumericDocValues(info.name) != null
               || reader.getSortedNumericDocValues(info.name) != null
               || reader.getSortedSetDocValues(info.name) != null) {
             throw new RuntimeException(info.name + " returns multiple docvalues types!");
           }
           break;
         case SORTED_NUMERIC:
           if (reader.getBinaryDocValues(info.name) != null
               || reader.getNumericDocValues(info.name) != null
               || reader.getSortedSetDocValues(info.name) != null
               || reader.getSortedDocValues(info.name) != null) {
             throw new RuntimeException(info.name + " returns multiple docvalues types!");
           }
           break;
         case SORTED_SET:
           if (reader.getBinaryDocValues(info.name) != null
               || reader.getNumericDocValues(info.name) != null
               || reader.getSortedNumericDocValues(info.name) != null
               || reader.getSortedDocValues(info.name) != null) {
             throw new RuntimeException(info.name + " returns multiple docvalues types!");
           }
           break;
         case BINARY:
           if (reader.getNumericDocValues(info.name) != null
               || reader.getSortedDocValues(info.name) != null
               || reader.getSortedNumericDocValues(info.name) != null
               || reader.getSortedSetDocValues(info.name) != null) {
             throw new RuntimeException(info.name + " returns multiple docvalues types!");
           }
           break;
         case NUMERIC:
           if (reader.getBinaryDocValues(info.name) != null
               || reader.getSortedDocValues(info.name) != null
               || reader.getSortedNumericDocValues(info.name) != null
               || reader.getSortedSetDocValues(info.name) != null) {
             throw new RuntimeException(info.name + " returns multiple docvalues types!");
           }
           break;
         default:
           throw new AssertionError();
       }
     }
   }

   /** start and end are BOTH inclusive */
   public static int nextInt(Random r, int start, int end) {
     return RandomNumbers.randomIntBetween(r, start, end);
   }

   /** start and end are BOTH inclusive */
   public static long nextLong(Random r, long start, long end) {
     assert end >= start : "start=" + start + ",end=" + end;
     final BigInteger range =
         BigInteger.valueOf(end).add(BigInteger.valueOf(1)).subtract(BigInteger.valueOf(start));
     if (range.compareTo(BigInteger.valueOf(Integer.MAX_VALUE)) <= 0) {
       return start + r.nextInt(range.intValue());
     } else {
       // probably not evenly distributed when range is large, but OK for tests
       final BigInteger augend =
           new BigDecimal(range).multiply(new BigDecimal(r.nextDouble())).toBigInteger();
       final long result = BigInteger.valueOf(start).add(augend).longValue();
       assert result >= start;
       assert result <= end;
       return result;
     }
   }

   /** Returns a randomish big integer with {@code 1 .. maxBytes} storage. */
   public static BigInteger nextBigInteger(Random random, int maxBytes) {
     int length = TestUtil.nextInt(random, 1, maxBytes);
     byte[] buffer = new byte[length];
     random.nextBytes(buffer);
     return new BigInteger(buffer);
   }

   public static String randomSimpleString(Random r, int maxLength) {
     return randomSimpleString(r, 0, maxLength);
   }

   public static String randomSimpleString(Random r, int minLength, int maxLength) {
     final int end = nextInt(r, minLength, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     for (int i = 0; i < end; i++) {
       buffer[i] = (char) TestUtil.nextInt(r, 'a', 'z');
     }
     return new String(buffer, 0, end);
   }

   public static String randomSimpleStringRange(
       Random r, char minChar, char maxChar, int maxLength) {
     final int end = nextInt(r, 0, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     for (int i = 0; i < end; i++) {
       buffer[i] = (char) TestUtil.nextInt(r, minChar, maxChar);
     }
     return new String(buffer, 0, end);
   }

   public static String randomSimpleString(Random r) {
     return randomSimpleString(r, 0, 10);
   }

   /** Returns random string, including full unicode range. */
   public static String randomUnicodeString(Random r) {
     return randomUnicodeString(r, 20);
   }

   /** Returns a random string up to a certain length. */
   public static String randomUnicodeString(Random r, int maxLength) {
     final int end = nextInt(r, 0, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     randomFixedLengthUnicodeString(r, buffer, 0, buffer.length);
     return new String(buffer, 0, end);
   }

   /** Fills provided char[] with valid random unicode code unit sequence. */
   public static void randomFixedLengthUnicodeString(
       Random random, char[] chars, int offset, int length) {
     int i = offset;
     final int end = offset + length;
     while (i < end) {
       final int t = random.nextInt(5);
       if (0 == t && i < length - 1) {
         // Make a surrogate pair
         // High surrogate
         chars[i++] = (char) nextInt(random, 0xd800, 0xdbff);
         // Low surrogate
         chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff);
       } else if (t <= 1) {
         chars[i++] = (char) random.nextInt(0x80);
       } else if (2 == t) {
         chars[i++] = (char) nextInt(random, 0x80, 0x7ff);
       } else if (3 == t) {
         chars[i++] = (char) nextInt(random, 0x800, 0xd7ff);
       } else if (4 == t) {
         chars[i++] = (char) nextInt(random, 0xe000, 0xffff);
       }
     }
   }

   /**
    * Returns a String thats "regexpish" (contains lots of operators typically found in regular
    * expressions) If you call this enough times, you might get a valid regex!
    */
   public static String randomRegexpishString(Random r) {
     return randomRegexpishString(r, 20);
   }

   /**
    * Maximum recursion bound for '+' and '*' replacements in {@link #randomRegexpishString(Random,
    * int)}.
    */
   private static final int maxRecursionBound = 5;

   /** Operators for {@link #randomRegexpishString(Random, int)}. */
   private static final List<String> ops =
       Arrays.asList(
           ".",
           "?",
           "{0," + maxRecursionBound + "}", // bounded replacement for '*'
           "{1," + maxRecursionBound + "}", // bounded replacement for '+'
           "(",
           ")",
           "-",
           "[",
           "]",
           "|");

   /**
    * Returns a String thats "regexpish" (contains lots of operators typically found in regular
    * expressions) If you call this enough times, you might get a valid regex!
    *
    * <p>Note: to avoid practically endless backtracking patterns we replace asterisk and plus
    * operators with bounded repetitions. See LUCENE-4111 for more info.
    *
    * @param maxLength A hint about maximum length of the regexpish string. It may be exceeded by a
    *     few characters.
    */
   public static String randomRegexpishString(Random r, int maxLength) {
     final StringBuilder regexp = new StringBuilder(maxLength);
     for (int i = nextInt(r, 0, maxLength); i > 0; i--) {
       if (r.nextBoolean()) {
         regexp.append((char) RandomNumbers.randomIntBetween(r, 'a', 'z'));
       } else {
         regexp.append(RandomPicks.randomFrom(r, ops));
       }
     }
     return regexp.toString();
   }

   private static final String[] HTML_CHAR_ENTITIES = {
     "AElig",
     "Aacute",
     "Acirc",
     "Agrave",
     "Alpha",
     "AMP",
     "Aring",
     "Atilde",
     "Auml",
     "Beta",
     "COPY",
     "Ccedil",
     "Chi",
     "Dagger",
     "Delta",
     "ETH",
     "Eacute",
     "Ecirc",
     "Egrave",
     "Epsilon",
     "Eta",
     "Euml",
     "Gamma",
     "GT",
     "Iacute",
     "Icirc",
     "Igrave",
     "Iota",
     "Iuml",
     "Kappa",
     "Lambda",
     "LT",
     "Mu",
     "Ntilde",
     "Nu",
     "OElig",
     "Oacute",
     "Ocirc",
     "Ograve",
     "Omega",
     "Omicron",
     "Oslash",
     "Otilde",
     "Ouml",
     "Phi",
     "Pi",
     "Prime",
     "Psi",
     "QUOT",
     "REG",
     "Rho",
     "Scaron",
     "Sigma",
     "THORN",
     "Tau",
     "Theta",
     "Uacute",
     "Ucirc",
     "Ugrave",
     "Upsilon",
     "Uuml",
     "Xi",
     "Yacute",
     "Yuml",
     "Zeta",
     "aacute",
     "acirc",
     "acute",
     "aelig",
     "agrave",
     "alefsym",
     "alpha",
     "amp",
     "and",
     "ang",
     "apos",
     "aring",
     "asymp",
     "atilde",
     "auml",
     "bdquo",
     "beta",
     "brvbar",
     "bull",
     "cap",
     "ccedil",
     "cedil",
     "cent",
     "chi",
     "circ",
     "clubs",
     "cong",
     "copy",
     "crarr",
     "cup",
     "curren",
     "dArr",
     "dagger",
     "darr",
     "deg",
     "delta",
     "diams",
     "divide",
     "eacute",
     "ecirc",
     "egrave",
     "empty",
     "emsp",
     "ensp",
     "epsilon",
     "equiv",
     "eta",
     "eth",
     "euml",
     "euro",
     "exist",
     "fnof",
     "forall",
     "frac12",
     "frac14",
     "frac34",
     "frasl",
     "gamma",
     "ge",
     "gt",
     "hArr",
     "harr",
     "hearts",
     "hellip",
     "iacute",
     "icirc",
     "iexcl",
     "igrave",
     "image",
     "infin",
     "int",
     "iota",
     "iquest",
     "isin",
     "iuml",
     "kappa",
     "lArr",
     "lambda",
     "lang",
     "laquo",
     "larr",
     "lceil",
     "ldquo",
     "le",
     "lfloor",
     "lowast",
     "loz",
     "lrm",
     "lsaquo",
     "lsquo",
     "lt",
     "macr",
     "mdash",
     "micro",
     "middot",
     "minus",
     "mu",
     "nabla",
     "nbsp",
     "ndash",
     "ne",
     "ni",
     "not",
     "notin",
     "nsub",
     "ntilde",
     "nu",
     "oacute",
     "ocirc",
     "oelig",
     "ograve",
     "oline",
     "omega",
     "omicron",
     "oplus",
     "or",
     "ordf",
     "ordm",
     "oslash",
     "otilde",
     "otimes",
     "ouml",
     "para",
     "part",
     "permil",
     "perp",
     "phi",
     "pi",
     "piv",
     "plusmn",
     "pound",
     "prime",
     "prod",
     "prop",
     "psi",
     "quot",
     "rArr",
     "radic",
     "rang",
     "raquo",
     "rarr",
     "rceil",
     "rdquo",
     "real",
     "reg",
     "rfloor",
     "rho",
     "rlm",
     "rsaquo",
     "rsquo",
     "sbquo",
     "scaron",
     "sdot",
     "sect",
     "shy",
     "sigma",
     "sigmaf",
     "sim",
     "spades",
     "sub",
     "sube",
     "sum",
     "sup",
     "sup1",
     "sup2",
     "sup3",
     "supe",
     "szlig",
     "tau",
     "there4",
     "theta",
     "thetasym",
     "thinsp",
     "thorn",
     "tilde",
     "times",
     "trade",
     "uArr",
     "uacute",
     "uarr",
     "ucirc",
     "ugrave",
     "uml",
     "upsih",
     "upsilon",
     "uuml",
     "weierp",
     "xi",
     "yacute",
     "yen",
     "yuml",
     "zeta",
     "zwj",
     "zwnj"
   };

   public static String randomHtmlishString(Random random, int numElements) {
     final int end = nextInt(random, 0, numElements);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < end; i++) {
       int val = random.nextInt(25);
       switch (val) {
         case 0:
           sb.append("<p>");
           break;
         case 1:
           {
             sb.append("<");
             sb.append("    ".substring(nextInt(random, 0, 4)));
             sb.append(randomSimpleString(random));
             for (int j = 0; j < nextInt(random, 0, 10); ++j) {
               sb.append(' ');
               sb.append(randomSimpleString(random));
               sb.append(" ".substring(nextInt(random, 0, 1)));
               sb.append('=');
               sb.append(" ".substring(nextInt(random, 0, 1)));
               sb.append("\"".substring(nextInt(random, 0, 1)));
               sb.append(randomSimpleString(random));
               sb.append("\"".substring(nextInt(random, 0, 1)));
             }
             sb.append("    ".substring(nextInt(random, 0, 4)));
             sb.append("/".substring(nextInt(random, 0, 1)));
             sb.append(">".substring(nextInt(random, 0, 1)));
             break;
           }
         case 2:
           {
             sb.append("</");
             sb.append("    ".substring(nextInt(random, 0, 4)));
             sb.append(randomSimpleString(random));
             sb.append("    ".substring(nextInt(random, 0, 4)));
             sb.append(">".substring(nextInt(random, 0, 1)));
             break;
           }
         case 3:
           sb.append(">");
           break;
         case 4:
           sb.append("</p>");
           break;
         case 5:
           sb.append("<!--");
           break;
         case 6:
           sb.append("<!--#");
           break;
         case 7:
           sb.append("<script><!-- f('");
           break;
         case 8:
           sb.append("</script>");
           break;
         case 9:
           sb.append("<?");
           break;
         case 10:
           sb.append("?>");
           break;
         case 11:
           sb.append("\"");
           break;
         case 12:
           sb.append("\\\"");
           break;
         case 13:
           sb.append("'");
           break;
         case 14:
           sb.append("\\'");
           break;
         case 15:
           sb.append("-->");
           break;
         case 16:
           {
             sb.append("&");
             switch (nextInt(random, 0, 2)) {
               case 0:
                 sb.append(randomSimpleString(random));
                 break;
               case 1:
                 sb.append(HTML_CHAR_ENTITIES[random.nextInt(HTML_CHAR_ENTITIES.length)]);
                 break;
             }
             sb.append(";".substring(nextInt(random, 0, 1)));
             break;
           }
         case 17:
           {
             sb.append("&#");
             if (0 == nextInt(random, 0, 1)) {
               sb.append(nextInt(random, 0, Integer.MAX_VALUE - 1));
               sb.append(";".substring(nextInt(random, 0, 1)));
             }
             break;
           }
         case 18:
           {
             sb.append("&#x");
             if (0 == nextInt(random, 0, 1)) {
               sb.append(Integer.toString(nextInt(random, 0, Integer.MAX_VALUE - 1), 16));
               sb.append(";".substring(nextInt(random, 0, 1)));
             }
             break;
           }

         case 19:
           sb.append(";");
           break;
         case 20:
           sb.append(nextInt(random, 0, Integer.MAX_VALUE - 1));
           break;
         case 21:
           sb.append("\n");
           break;
         case 22:
           sb.append("          ".substring(nextInt(random, 0, 10)));
           break;
         case 23:
           {
             sb.append("<");
             if (0 == nextInt(random, 0, 3)) {
               sb.append("          ".substring(nextInt(random, 1, 10)));
             }
             if (0 == nextInt(random, 0, 1)) {
               sb.append("/");
               if (0 == nextInt(random, 0, 3)) {
                 sb.append("          ".substring(nextInt(random, 1, 10)));
               }
             }
             switch (nextInt(random, 0, 3)) {
               case 0:
                 sb.append(randomlyRecaseCodePoints(random, "script"));
                 break;
               case 1:
                 sb.append(randomlyRecaseCodePoints(random, "style"));
                 break;
               case 2:
                 sb.append(randomlyRecaseCodePoints(random, "br"));
                 break;
                 // default: append nothing
             }
             sb.append(">".substring(nextInt(random, 0, 1)));
             break;
           }
         default:
           sb.append(randomSimpleString(random));
       }
     }
     return sb.toString();
   }

   /** Randomly upcases, downcases, or leaves intact each code point in the given string */
   public static String randomlyRecaseCodePoints(Random random, String str) {
     StringBuilder builder = new StringBuilder();
     int pos = 0;
     while (pos < str.length()) {
       int codePoint = str.codePointAt(pos);
       pos += Character.charCount(codePoint);
       switch (nextInt(random, 0, 2)) {
         case 0:
           builder.appendCodePoint(Character.toUpperCase(codePoint));
           break;
         case 1:
           builder.appendCodePoint(Character.toLowerCase(codePoint));
           break;
         case 2:
           builder.appendCodePoint(codePoint); // leave intact
       }
     }
     return builder.toString();
   }

   private static final int[] blockStarts = {
     0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, 0x0500, 0x0530, 0x0590,
     0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
     0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380,
     0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, 0x1800, 0x18B0, 0x1900,
     0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00, 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80,
     0x1DC0, 0x1E00, 0x1F00, 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300,
     0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, 0x27F0, 0x2800, 0x2900,
     0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80,
     0x2F00, 0x2FF0, 0x3000, 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200,
     0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640, 0xA6A0, 0xA700, 0xA720,
     0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900, 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80,
     0xABC0, 0xAC00, 0xD7B0, 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10, 0xFE20, 0xFE30, 0xFE50,
     0xFE70, 0xFF00, 0xFFF0, 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0,
     0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800, 0x10840, 0x10900,
     0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60, 0x10C00, 0x10E60, 0x11080, 0x12000,
     0x12400, 0x13000, 0x1D000, 0x1D100, 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030,
     0x1F100, 0x1F200, 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000
   };

   private static final int[] blockEnds = {
     0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, 0x052F, 0x058F, 0x05FF,
     0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF,
     0x0C7F, 0x0CFF, 0x0D7F, 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F,
     0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF, 0x18AF, 0x18FF, 0x194F,
     0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F, 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF,
     0x1DFF, 0x1EFF, 0x1FFF, 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF,
     0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, 0x27FF, 0x28FF, 0x297F,
     0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF,
     0x2FDF, 0x2FFF, 0x303F, 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF,
     0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, 0xA6FF, 0xA71F, 0xA7FF,
     0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF,
     0xABFF, 0xD7AF, 0xD7FF, 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, 0xFE2F, 0xFE4F, 0xFE6F,
     0xFEFF, 0xFFEF, 0xFFFF, 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF,
     0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F, 0x1085F, 0x1091F,
     0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F, 0x10C4F, 0x10E7F, 0x110CF, 0x123FF,
     0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F,
     0x1F1FF, 0x1F2FF, 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
   };

   /**
    * Returns random string of length between 0-20 codepoints, all codepoints within the same unicode
    * block.
    */
   public static String randomRealisticUnicodeString(Random r) {
     return randomRealisticUnicodeString(r, 20);
   }

   /**
    * Returns random string of length up to maxLength codepoints , all codepoints within the same
    * unicode block.
    */
   public static String randomRealisticUnicodeString(Random r, int maxLength) {
     return randomRealisticUnicodeString(r, 0, maxLength);
   }

   /**
    * Returns random string of length between min and max codepoints, all codepoints within the same
    * unicode block.
    */
   public static String randomRealisticUnicodeString(Random r, int minLength, int maxLength) {
     final int end = nextInt(r, minLength, maxLength);
     final int block = r.nextInt(blockStarts.length);
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < end; i++)
       sb.appendCodePoint(nextInt(r, blockStarts[block], blockEnds[block]));
     return sb.toString();
   }

   /** Returns random string, with a given UTF-8 byte length */
   public static String randomFixedByteLengthUnicodeString(Random r, int length) {

     final char[] buffer = new char[length * 3];
     int bytes = length;
     int i = 0;
     for (; i < buffer.length && bytes != 0; i++) {
       int t;
       if (bytes >= 4) {
         t = r.nextInt(5);
       } else if (bytes >= 3) {
         t = r.nextInt(4);
       } else if (bytes >= 2) {
         t = r.nextInt(2);
       } else {
         t = 0;
       }
       if (t == 0) {
         buffer[i] = (char) r.nextInt(0x80);
         bytes--;
       } else if (1 == t) {
         buffer[i] = (char) nextInt(r, 0x80, 0x7ff);
         bytes -= 2;
       } else if (2 == t) {
         buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
         bytes -= 3;
       } else if (3 == t) {
         buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
         bytes -= 3;
       } else if (4 == t) {
         // Make a surrogate pair
         // High surrogate
         buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
         // Low surrogate
         buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
         bytes -= 4;
       }
     }
     return new String(buffer, 0, i);
   }

   /** Returns a random binary term. */
   public static BytesRef randomBinaryTerm(Random r) {
     int length = r.nextInt(15);
     BytesRef b = new BytesRef(length);
     r.nextBytes(b.bytes);
     b.length = length;
     return b;
   }

   /**
    * Return a Codec that can read any of the default codecs and formats, but always writes in the
    * specified format.
    */
   public static Codec alwaysPostingsFormat(final PostingsFormat format) {
     // TODO: we really need for postings impls etc to announce themselves
     // (and maybe their params, too) to infostream on flush and merge.
     // otherwise in a real debugging situation we won't know whats going on!
     if (LuceneTestCase.VERBOSE) {
       System.out.println("forcing postings format to:" + format);
     }
     return new AssertingCodec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         return format;
       }
     };
   }

   /**
    * Return a Codec that can read any of the default codecs and formats, but always writes in the
    * specified format.
    */
   public static Codec alwaysDocValuesFormat(final DocValuesFormat format) {
     // TODO: we really need for docvalues impls etc to announce themselves
     // (and maybe their params, too) to infostream on flush and merge.
     // otherwise in a real debugging situation we won't know whats going on!
     if (LuceneTestCase.VERBOSE) {
       System.out.println("TestUtil: forcing docvalues format to:" + format);
     }
     return new AssertingCodec() {
       @Override
       public DocValuesFormat getDocValuesFormatForField(String field) {
         return format;
       }
     };
   }

   /**
    * Returns the actual default codec (e.g. LuceneMNCodec) for this version of Lucene. This may be
    * different than {@link Codec#getDefault()} because that is randomized.
    */
   public static Codec getDefaultCodec() {
     return new Lucene90Codec();
   }

   /**
    * Returns the actual default postings format (e.g. LuceneMNPostingsFormat for this version of
    * Lucene.
    */
   public static PostingsFormat getDefaultPostingsFormat() {
     return new Lucene90PostingsFormat();
   }

   /**
    * Returns the actual default postings format (e.g. LuceneMNPostingsFormat for this version of
    * Lucene.
    *
    * @lucene.internal this may disappear at any time
    */
   public static PostingsFormat getDefaultPostingsFormat(
       int minItemsPerBlock, int maxItemsPerBlock) {
     return new Lucene90PostingsFormat(minItemsPerBlock, maxItemsPerBlock);
   }

   /** Returns a random postings format that supports term ordinals */
   public static PostingsFormat getPostingsFormatWithOrds(Random r) {
     switch (r.nextInt(2)) {
       case 0:
         return new LuceneFixedGap();
       case 1:
         return new BlockTreeOrdsPostingsFormat();
         // TODO: these don't actually support ords!
         // case 2: return new FSTOrdPostingsFormat();
       default:
         throw new AssertionError();
     }
   }

   /**
    * Returns the actual default docvalues format (e.g. LuceneMNDocValuesFormat for this version of
    * Lucene.
    */
   public static DocValuesFormat getDefaultDocValuesFormat() {
     return new Lucene90DocValuesFormat();
   }

   // TODO: generalize all 'test-checks-for-crazy-codecs' to
   // annotations (LUCENE-3489)
   public static String getPostingsFormat(String field) {
     return getPostingsFormat(Codec.getDefault(), field);
   }

   public static String getPostingsFormat(Codec codec, String field) {
     PostingsFormat p = codec.postingsFormat();
     if (p instanceof PerFieldPostingsFormat) {
       return ((PerFieldPostingsFormat) p).getPostingsFormatForField(field).getName();
     } else {
       return p.getName();
     }
   }

   public static String getDocValuesFormat(String field) {
     return getDocValuesFormat(Codec.getDefault(), field);
   }

   public static String getDocValuesFormat(Codec codec, String field) {
     DocValuesFormat f = codec.docValuesFormat();
     if (f instanceof PerFieldDocValuesFormat) {
       return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName();
     } else {
       return f.getName();
     }
   }

   // TODO: remove this, push this test to Lucene40/Lucene42 codec tests
   public static boolean fieldSupportsHugeBinaryDocValues(String field) {
     String dvFormat = getDocValuesFormat(field);
     if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
       return false;
     }
     return true;
   }

   /**
    * Returns the actual default vector format (e.g. LuceneMNVectorFormat for this version of Lucene.
    */
   public static VectorFormat getDefaultVectorFormat() {
     return new Lucene90HnswVectorFormat();
   }

   public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
     String[] files = dir.listAll();
     if (files.length > 1 || (files.length == 1 && !files[0].equals("write.lock"))) {
       return true;
     } else {
       return false;
     }
   }

   public static void addIndexesSlowly(IndexWriter writer, DirectoryReader... readers)
       throws IOException {
     List<CodecReader> leaves = new ArrayList<>();
     for (DirectoryReader reader : readers) {
       for (LeafReaderContext context : reader.leaves()) {
         leaves.add(SlowCodecReaderWrapper.wrap(context.reader()));
       }
     }
     writer.addIndexes(leaves.toArray(new CodecReader[leaves.size()]));
   }

   /** just tries to configure things to keep the open file count lowish */
   public static void reduceOpenFiles(IndexWriter w) {
     // keep number of open files lowish
     MergePolicy mp = w.getConfig().getMergePolicy();
     mp.setNoCFSRatio(1.0);
     if (mp instanceof LogMergePolicy) {
       LogMergePolicy lmp = (LogMergePolicy) mp;
       lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
     } else if (mp instanceof TieredMergePolicy) {
       TieredMergePolicy tmp = (TieredMergePolicy) mp;
       tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
       tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
     }
     MergeScheduler ms = w.getConfig().getMergeScheduler();
     if (ms instanceof ConcurrentMergeScheduler) {
       // wtf... shouldnt it be even lower since it's 1 by default?!?!
       ((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
     }
   }

   /**
    * Checks some basic behaviour of an AttributeImpl
    *
    * @param reflectedValues contains a map with "AttributeClass#key" as values
    */
   public static <T> void assertAttributeReflection(
       final AttributeImpl att, Map<String, T> reflectedValues) {
     final Map<String, Object> map = new HashMap<>();
     att.reflectWith(
         new AttributeReflector() {
           @Override
           public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
             map.put(attClass.getName() + '#' + key, value);
           }
         });
     Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
   }

   /** Assert that the given {@link TopDocs} have the same top docs and consistent hit counts. */
   public static void assertConsistent(TopDocs expected, TopDocs actual) {
     Assert.assertEquals(
         "wrong total hits", expected.totalHits.value == 0, actual.totalHits.value == 0);
     if (expected.totalHits.relation == TotalHits.Relation.EQUAL_TO) {
       if (actual.totalHits.relation == TotalHits.Relation.EQUAL_TO) {
         Assert.assertEquals("wrong total hits", expected.totalHits.value, actual.totalHits.value);
       } else {
         Assert.assertTrue("wrong total hits", expected.totalHits.value >= actual.totalHits.value);
       }
     } else if (actual.totalHits.relation == TotalHits.Relation.EQUAL_TO) {
       Assert.assertTrue("wrong total hits", expected.totalHits.value <= actual.totalHits.value);
     }
     Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length);
     for (int hitIDX = 0; hitIDX < expected.scoreDocs.length; hitIDX++) {
       final ScoreDoc expectedSD = expected.scoreDocs[hitIDX];
       final ScoreDoc actualSD = actual.scoreDocs[hitIDX];
       Assert.assertEquals("wrong hit docID", expectedSD.doc, actualSD.doc);
       Assert.assertEquals("wrong hit score", expectedSD.score, actualSD.score, 0.0);
       if (expectedSD instanceof FieldDoc) {
         Assert.assertTrue(actualSD instanceof FieldDoc);
         Assert.assertArrayEquals(
             "wrong sort field values",
             ((FieldDoc) expectedSD).fields,
             ((FieldDoc) actualSD).fields);
       } else {
         Assert.assertFalse(actualSD instanceof FieldDoc);
       }
     }
   }

   // NOTE: this is likely buggy, and cannot clone fields
   // with tokenStreamValues, etc.  Use at your own risk!!

   // TODO: is there a pre-existing way to do this!!!
   public static Document cloneDocument(Document doc1) {
     final Document doc2 = new Document();
     for (IndexableField f : doc1.getFields()) {
       final Field field1 = (Field) f;
       final Field field2;
       final DocValuesType dvType = field1.fieldType().docValuesType();
       final int dimCount = field1.fieldType().pointDimensionCount();
       if (dvType != DocValuesType.NONE) {
         switch (dvType) {
           case NUMERIC:
             field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue());
             break;
           case BINARY:
             field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue());
             break;
           case SORTED:
             field2 = new SortedDocValuesField(field1.name(), field1.binaryValue());
             break;
           case NONE:
           case SORTED_SET:
           case SORTED_NUMERIC:
           default:
             throw new IllegalStateException("unknown Type: " + dvType);
         }
       } else if (dimCount != 0) {
         BytesRef br = field1.binaryValue();
         byte[] bytes = new byte[br.length];
         System.arraycopy(br.bytes, br.offset, bytes, 0, br.length);
         field2 = new BinaryPoint(field1.name(), bytes, field1.fieldType());
       } else {
         field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
       }
       doc2.add(field2);
     }

     return doc2;
   }

   // Returns a DocsEnum, but randomly sometimes uses a
   // DocsAndFreqsEnum, DocsAndPositionsEnum.  Returns null
   // if field/term doesn't exist:
   public static PostingsEnum docs(
       Random random, IndexReader r, String field, BytesRef term, PostingsEnum reuse, int flags)
       throws IOException {
     final Terms terms = MultiTerms.getTerms(r, field);
     if (terms == null) {
       return null;
     }
     final TermsEnum termsEnum = terms.iterator();
     if (!termsEnum.seekExact(term)) {
       return null;
     }
     return docs(random, termsEnum, reuse, flags);
   }

   // Returns a PostingsEnum with random features available
   public static PostingsEnum docs(Random random, TermsEnum termsEnum, PostingsEnum reuse, int flags)
       throws IOException {
     // TODO: simplify this method? it would be easier to randomly either use the flags passed, or do
     // the random selection,
     // FREQS should be part fo the random selection instead of outside on its own?
     if (random.nextBoolean()) {
       if (random.nextBoolean()) {
         final int posFlags;
         switch (random.nextInt(4)) {
           case 0:
             posFlags = PostingsEnum.POSITIONS;
             break;
           case 1:
             posFlags = PostingsEnum.OFFSETS;
             break;
           case 2:
             posFlags = PostingsEnum.PAYLOADS;
             break;
           default:
             posFlags = PostingsEnum.ALL;
             break;
         }
         return termsEnum.postings(null, posFlags);
       }
       flags |= PostingsEnum.FREQS;
     }
     return termsEnum.postings(reuse, flags);
   }

   public static CharSequence stringToCharSequence(String string, Random random) {
     return bytesToCharSequence(new BytesRef(string), random);
   }

   public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
     switch (random.nextInt(5)) {
       case 4:
         final char[] chars = new char[ref.length];
         final int len = UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
         return new CharsRef(chars, 0, len);
       case 3:
         return CharBuffer.wrap(ref.utf8ToString());
       default:
         return ref.utf8ToString();
     }
   }

   /** Shutdown {@link ExecutorService} and wait for its. */
   public static void shutdownExecutorService(ExecutorService ex) {
     if (ex != null) {
       try {
         ex.shutdown();
         ex.awaitTermination(1, TimeUnit.SECONDS);
       } catch (InterruptedException e) {
         // Just report it on the syserr.
         System.err.println("Could not properly close executor service.");
         e.printStackTrace(System.err);
       }
     }
   }

   /**
    * Returns a valid (compiling) Pattern instance with random stuff inside. Be careful when applying
    * random patterns to longer strings as certain types of patterns may explode into exponential
    * times in backtracking implementations (such as Java's).
    */
   public static Pattern randomPattern(Random random) {
     final String nonBmpString = "AB\uD840\uDC00C";
     while (true) {
       try {
         Pattern p = Pattern.compile(TestUtil.randomRegexpishString(random));
         String replacement = null;
         // ignore bugs in Sun's regex impl
         try {
           replacement = p.matcher(nonBmpString).replaceAll("_");
         } catch (
             @SuppressWarnings("unused")
             StringIndexOutOfBoundsException jdkBug) {
           System.out.println("WARNING: your jdk is buggy!");
           System.out.println(
               "Pattern.compile(\""
                   + p.pattern()
                   + "\").matcher(\"AB\\uD840\\uDC00C\").replaceAll(\"_\"); should not throw IndexOutOfBounds!");
         }
         // Make sure the result of applying the pattern to a string with extended
         // unicode characters is a valid utf16 string. See LUCENE-4078 for discussion.
         if (replacement != null && UnicodeUtil.validUTF16String(replacement)) {
           return p;
         }
       } catch (
           @SuppressWarnings("unused")
           PatternSyntaxException ignored) {
         // Loop trying until we hit something that compiles.
       }
     }
   }

   public static String randomAnalysisString(Random random, int maxLength, boolean simple) {
     assert maxLength >= 0;

     // sometimes just a purely random string
     if (random.nextInt(31) == 0) {
       return randomSubString(random, random.nextInt(maxLength), simple);
     }

     // otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer
     // first decide how big the string will really be: 0..n
     maxLength = random.nextInt(maxLength);
     int avgWordLength = TestUtil.nextInt(random, 3, 8);
     StringBuilder sb = new StringBuilder();
     while (sb.length() < maxLength) {
       if (sb.length() > 0) {
         sb.append(' ');
       }
       int wordLength = -1;
       while (wordLength < 0) {
         wordLength = (int) (random.nextGaussian() * 3 + avgWordLength);
       }
       wordLength = Math.min(wordLength, maxLength - sb.length());
       sb.append(randomSubString(random, wordLength, simple));
     }
     return sb.toString();
   }

   public static String randomSubString(Random random, int wordLength, boolean simple) {
     if (wordLength == 0) {
       return "";
     }

     int evilness = TestUtil.nextInt(random, 0, 20);

     StringBuilder sb = new StringBuilder();
     while (sb.length() < wordLength) {
       if (simple) {
         sb.append(
             random.nextBoolean()
                 ? TestUtil.randomSimpleString(random, wordLength)
                 : TestUtil.randomHtmlishString(random, wordLength));
       } else {
         if (evilness < 10) {
           sb.append(TestUtil.randomSimpleString(random, wordLength));
         } else if (evilness < 15) {
           assert sb.length() == 0; // we should always get wordLength back!
           sb.append(TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength));
         } else if (evilness == 16) {
           sb.append(TestUtil.randomHtmlishString(random, wordLength));
         } else if (evilness == 17) {
           // gives a lot of punctuation
           sb.append(TestUtil.randomRegexpishString(random, wordLength));
         } else {
           sb.append(TestUtil.randomUnicodeString(random, wordLength));
         }
       }
     }
     if (sb.length() > wordLength) {
       sb.setLength(wordLength);
       if (Character.isHighSurrogate(sb.charAt(wordLength - 1))) {
         sb.setLength(wordLength - 1);
       }
     }

     if (random.nextInt(17) == 0) {
       // mix up case
       String mixedUp = TestUtil.randomlyRecaseCodePoints(random, sb.toString());
       assert mixedUp.length() == sb.length();
       return mixedUp;
     } else {
       return sb.toString();
     }
   }

   /**
    * For debugging: tries to include br.utf8ToString(), but if that fails (because it's not valid
    * utf8, which is fine!), just use ordinary toString.
    */
   public static String bytesRefToString(BytesRef br) {
     if (br == null) {
       return "(null)";
     } else {
       try {
         return br.utf8ToString() + " " + br.toString();
       } catch (@SuppressWarnings("unused") AssertionError | IllegalArgumentException t) {
         // If BytesRef isn't actually UTF8, or it's eg a
         // prefix of UTF8 that ends mid-unicode-char, we
         // fallback to hex:
         return br.toString();
       }
     }
   }

   /** Returns a copy of the source directory, with file contents stored in RAM. */
   public static Directory ramCopyOf(Directory dir) throws IOException {
     Directory ram = new ByteBuffersDirectory();
     for (String file : dir.listAll()) {
       if (file.startsWith(IndexFileNames.SEGMENTS)
           || IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) {
         ram.copyFrom(dir, file, file, IOContext.DEFAULT);
       }
     }
     return ram;
   }

   public static boolean hasWindowsFS(Directory dir) {
     dir = FilterDirectory.unwrap(dir);
     if (dir instanceof FSDirectory) {
       Path path = ((FSDirectory) dir).getDirectory();
       FileSystem fs = path.getFileSystem();
       while (fs instanceof FilterFileSystem) {
         FilterFileSystem ffs = (FilterFileSystem) fs;
         if (ffs.getParent() instanceof WindowsFS) {
           return true;
         }
         fs = ffs.getDelegate();
       }
     }

     return false;
   }

   public static boolean hasWindowsFS(Path path) {
     FileSystem fs = path.getFileSystem();
     while (fs instanceof FilterFileSystem) {
       FilterFileSystem ffs = (FilterFileSystem) fs;
       if (ffs.getParent() instanceof WindowsFS) {
         return true;
       }
       fs = ffs.getDelegate();
     }

     return false;
   }

   public static boolean hasVirusChecker(Directory dir) {
     dir = FilterDirectory.unwrap(dir);
     if (dir instanceof FSDirectory) {
       return hasVirusChecker(((FSDirectory) dir).getDirectory());
     } else {
       return false;
     }
   }

   public static boolean hasVirusChecker(Path path) {
     FileSystem fs = path.getFileSystem();
     while (fs instanceof FilterFileSystem) {
       FilterFileSystem ffs = (FilterFileSystem) fs;
       if (ffs.getParent() instanceof VirusCheckingFS) {
         return true;
       }
       fs = ffs.getDelegate();
     }

     return false;
   }

   /** Returns true if VirusCheckingFS is in use and was in fact already enabled */
   public static boolean disableVirusChecker(Directory in) {
     Directory dir = FilterDirectory.unwrap(in);
     if (dir instanceof FSDirectory) {

       FileSystem fs = ((FSDirectory) dir).getDirectory().getFileSystem();
       while (fs instanceof FilterFileSystem) {
         FilterFileSystem ffs = (FilterFileSystem) fs;
         if (ffs.getParent() instanceof VirusCheckingFS) {
           VirusCheckingFS vfs = (VirusCheckingFS) ffs.getParent();
           boolean isEnabled = vfs.isEnabled();
           vfs.disable();
           return isEnabled;
         }
         fs = ffs.getDelegate();
       }
     }

     return false;
   }

   public static void enableVirusChecker(Directory in) {
     Directory dir = FilterDirectory.unwrap(in);
     if (dir instanceof FSDirectory) {

       FileSystem fs = ((FSDirectory) dir).getDirectory().getFileSystem();
       while (fs instanceof FilterFileSystem) {
         FilterFileSystem ffs = (FilterFileSystem) fs;
         if (ffs.getParent() instanceof VirusCheckingFS) {
           VirusCheckingFS vfs = (VirusCheckingFS) ffs.getParent();
           vfs.enable();
           return;
         }
         fs = ffs.getDelegate();
       }
     }
   }
 }