lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java - lucene-solr - Git at Google

 package org.apache.lucene.util;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.BufferedOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 import java.lang.reflect.Method;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.CharBuffer;
 import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;

 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene42.Lucene42Codec;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType.NumericType;
 import org.apache.lucene.document.FloatField;
 import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.LongField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus;
 import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus;
 import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus;
 import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MergeScheduler;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.FilteredQuery.FilterStrategy;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.CompoundFileDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.junit.Assert;

 import com.carrotsearch.randomizedtesting.RandomizedContext;
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;

 /**
  * General utility methods for Lucene unit tests.
  */
 public class _TestUtil {

   // the max number of retries we're going to do in getTempDir
   private static final int GET_TEMP_DIR_RETRY_THRESHOLD = 1000;

   /**
    * Returns a temp directory, based on the given description. Creates the
    * directory.
    */
   public static File getTempDir(String desc) {
     if (desc.length() < 3) {
       throw new IllegalArgumentException("description must be at least 3 characters");
     }
     // always pull a long from master random. that way, the randomness of the test
     // is not affected by whether it initialized the counter (in genTempFile) or not.
     // note that the Random used by genTempFile is *not* the master Random, and therefore
     // does not affect the randomness of the test.
     final Random random = new Random(RandomizedContext.current().getRandom().nextLong());
     int attempt = 0;
     File f;
     do {
       f = genTempFile(random, desc, "tmp", LuceneTestCase.TEMP_DIR);
     } while (!f.mkdir() && (attempt++) < GET_TEMP_DIR_RETRY_THRESHOLD);

     if (attempt > GET_TEMP_DIR_RETRY_THRESHOLD) {
       throw new RuntimeException(
           "failed to get a temporary dir too many times. check your temp directory and consider manually cleaning it.");
     }

     LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker));
     return f;
   }

   /**
    * Deletes a directory and everything underneath it.
    */
   public static void rmDir(File dir) throws IOException {
     if (dir.exists()) {
       if (dir.isFile() && !dir.delete()) {
         throw new IOException("could not delete " + dir);
       }
       for (File f : dir.listFiles()) {
         if (f.isDirectory()) {
           rmDir(f);
         } else {
           if (!f.delete()) {
             throw new IOException("could not delete " + f);
           }
         }
       }
       if (!dir.delete()) {
         throw new IOException("could not delete " + dir);
       }
     }
   }

   /**
    * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first
    */
   public static void unzip(File zipName, File destDir) throws IOException {

     ZipFile zipFile = new ZipFile(zipName);

     Enumeration<? extends ZipEntry> entries = zipFile.entries();

     rmDir(destDir);

     destDir.mkdir();
     LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker));

     while (entries.hasMoreElements()) {
       ZipEntry entry = entries.nextElement();

       InputStream in = zipFile.getInputStream(entry);
       File targetFile = new File(destDir, entry.getName());
       if (entry.isDirectory()) {
         // allow unzipping with directory structure
         targetFile.mkdirs();
       } else {
         if (targetFile.getParentFile()!=null) {
           // be on the safe side: do not rely on that directories are always extracted
           // before their children (although this makes sense, but is it guaranteed?)
           targetFile.getParentFile().mkdirs();
         }
         OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile));

         byte[] buffer = new byte[8192];
         int len;
         while((len = in.read(buffer)) >= 0) {
           out.write(buffer, 0, len);
         }

         in.close();
         out.close();
       }
     }

     zipFile.close();
   }

   public static void syncConcurrentMerges(IndexWriter writer) {
     syncConcurrentMerges(writer.getConfig().getMergeScheduler());
   }

   public static void syncConcurrentMerges(MergeScheduler ms) {
     if (ms instanceof ConcurrentMergeScheduler)
       ((ConcurrentMergeScheduler) ms).sync();
   }

   /** This runs the CheckIndex tool on the index in.  If any
    *  issues are hit, a RuntimeException is thrown; else,
    *  true is returned. */
   public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
     return checkIndex(dir, true);
   }

   public static CheckIndex.Status checkIndex(Directory dir, boolean crossCheckTermVectors) throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
     CheckIndex checker = new CheckIndex(dir);
     checker.setCrossCheckTermVectors(crossCheckTermVectors);
     checker.setInfoStream(new PrintStream(bos, false, "UTF-8"), false);
     CheckIndex.Status indexStatus = checker.checkIndex(null);
     if (indexStatus == null || indexStatus.clean == false) {
       System.out.println("CheckIndex failed");
       System.out.println(bos.toString("UTF-8"));
       throw new RuntimeException("CheckIndex failed");
     } else {
       if (LuceneTestCase.INFOSTREAM) {
         System.out.println(bos.toString("UTF-8"));
       }
       return indexStatus;
     }
   }

   /** This runs the CheckIndex tool on the Reader.  If any
    *  issues are hit, a RuntimeException is thrown */
   public static void checkReader(IndexReader reader) throws IOException {
     for (AtomicReaderContext context : reader.leaves()) {
       checkReader(context.reader(), true);
     }
   }

   public static void checkReader(AtomicReader reader, boolean crossCheckTermVectors) throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
     PrintStream infoStream = new PrintStream(bos, false, "UTF-8");

     FieldNormStatus fieldNormStatus = CheckIndex.testFieldNorms(reader, infoStream);
     TermIndexStatus termIndexStatus = CheckIndex.testPostings(reader, infoStream);
     StoredFieldStatus storedFieldStatus = CheckIndex.testStoredFields(reader, infoStream);
     TermVectorStatus termVectorStatus = CheckIndex.testTermVectors(reader, infoStream, false, crossCheckTermVectors);
     DocValuesStatus docValuesStatus = CheckIndex.testDocValues(reader, infoStream);

     if (fieldNormStatus.error != null ||
       termIndexStatus.error != null ||
       storedFieldStatus.error != null ||
       termVectorStatus.error != null ||
       docValuesStatus.error != null) {
       System.out.println("CheckReader failed");
       System.out.println(bos.toString("UTF-8"));
       throw new RuntimeException("CheckReader failed");
     } else {
       if (LuceneTestCase.INFOSTREAM) {
         System.out.println(bos.toString("UTF-8"));
       }
     }
   }

   /** start and end are BOTH inclusive */
   public static int nextInt(Random r, int start, int end) {
     return RandomInts.randomIntBetween(r, start, end);
   }

   /** start and end are BOTH inclusive */
   public static long nextLong(Random r, long start, long end) {
     assert end >= start;
     final BigInteger range = BigInteger.valueOf(end).add(BigInteger.valueOf(1)).subtract(BigInteger.valueOf(start));
     if (range.compareTo(BigInteger.valueOf(Integer.MAX_VALUE)) <= 0) {
       return start + r.nextInt(range.intValue());
     } else {
       // probably not evenly distributed when range is large, but OK for tests
       final BigInteger augend = new BigDecimal(range).multiply(new BigDecimal(r.nextDouble())).toBigInteger();
       final long result = BigInteger.valueOf(start).add(augend).longValue();
       assert result >= start;
       assert result <= end;
       return result;
     }
   }

   public static String randomSimpleString(Random r, int maxLength) {
     return randomSimpleString(r, 0, maxLength);
   }

   public static String randomSimpleString(Random r, int minLength, int maxLength) {
     final int end = nextInt(r, minLength, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     for (int i = 0; i < end; i++) {
       buffer[i] = (char) _TestUtil.nextInt(r, 'a', 'z');
     }
     return new String(buffer, 0, end);
   }

   public static String randomSimpleStringRange(Random r, char minChar, char maxChar, int maxLength) {
     final int end = nextInt(r, 0, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     for (int i = 0; i < end; i++) {
       buffer[i] = (char) _TestUtil.nextInt(r, minChar, maxChar);
     }
     return new String(buffer, 0, end);
   }

   public static String randomSimpleString(Random r) {
     return randomSimpleString(r, 0, 10);
   }

   /** Returns random string, including full unicode range. */
   public static String randomUnicodeString(Random r) {
     return randomUnicodeString(r, 20);
   }

   /**
    * Returns a random string up to a certain length.
    */
   public static String randomUnicodeString(Random r, int maxLength) {
     final int end = nextInt(r, 0, maxLength);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     final char[] buffer = new char[end];
     randomFixedLengthUnicodeString(r, buffer, 0, buffer.length);
     return new String(buffer, 0, end);
   }

   /**
    * Fills provided char[] with valid random unicode code
    * unit sequence.
    */
   public static void randomFixedLengthUnicodeString(Random random, char[] chars, int offset, int length) {
     int i = offset;
     final int end = offset + length;
     while(i < end) {
       final int t = random.nextInt(5);
       if (0 == t && i < length - 1) {
         // Make a surrogate pair
         // High surrogate
         chars[i++] = (char) nextInt(random, 0xd800, 0xdbff);
         // Low surrogate
         chars[i++] = (char) nextInt(random, 0xdc00, 0xdfff);
       } else if (t <= 1) {
         chars[i++] = (char) random.nextInt(0x80);
       } else if (2 == t) {
         chars[i++] = (char) nextInt(random, 0x80, 0x7ff);
       } else if (3 == t) {
         chars[i++] = (char) nextInt(random, 0x800, 0xd7ff);
       } else if (4 == t) {
         chars[i++] = (char) nextInt(random, 0xe000, 0xffff);
       }
     }
   }

   /**
    * Returns a String thats "regexpish" (contains lots of operators typically found in regular expressions)
    * If you call this enough times, you might get a valid regex!
    */
   public static String randomRegexpishString(Random r) {
     return randomRegexpishString(r, 20);
   }

   /**
    * Maximum recursion bound for '+' and '*' replacements in
    * {@link #randomRegexpishString(Random, int)}.
    */
   private final static int maxRecursionBound = 5;

   /**
    * Operators for {@link #randomRegexpishString(Random, int)}.
    */
   private final static List<String> ops = Arrays.asList(
       ".", "?",
       "{0," + maxRecursionBound + "}",  // bounded replacement for '*'
       "{1," + maxRecursionBound + "}",  // bounded replacement for '+'
       "(",
       ")",
       "-",
       "[",
       "]",
       "|"
   );

   /**
    * Returns a String thats "regexpish" (contains lots of operators typically found in regular expressions)
    * If you call this enough times, you might get a valid regex!
    *
    * <P>Note: to avoid practically endless backtracking patterns we replace asterisk and plus
    * operators with bounded repetitions. See LUCENE-4111 for more info.
    *
    * @param maxLength A hint about maximum length of the regexpish string. It may be exceeded by a few characters.
    */
   public static String randomRegexpishString(Random r, int maxLength) {
     final StringBuilder regexp = new StringBuilder(maxLength);
     for (int i = nextInt(r, 0, maxLength); i > 0; i--) {
       if (r.nextBoolean()) {
         regexp.append((char) RandomInts.randomIntBetween(r, 'a', 'z'));
       } else {
         regexp.append(RandomPicks.randomFrom(r, ops));
       }
     }
     return regexp.toString();
   }

   private static final String[] HTML_CHAR_ENTITIES = {
       "AElig", "Aacute", "Acirc", "Agrave", "Alpha", "AMP", "Aring", "Atilde",
       "Auml", "Beta", "COPY", "Ccedil", "Chi", "Dagger", "Delta", "ETH",
       "Eacute", "Ecirc", "Egrave", "Epsilon", "Eta", "Euml", "Gamma", "GT",
       "Iacute", "Icirc", "Igrave", "Iota", "Iuml", "Kappa", "Lambda", "LT",
       "Mu", "Ntilde", "Nu", "OElig", "Oacute", "Ocirc", "Ograve", "Omega",
       "Omicron", "Oslash", "Otilde", "Ouml", "Phi", "Pi", "Prime", "Psi",
       "QUOT", "REG", "Rho", "Scaron", "Sigma", "THORN", "Tau", "Theta",
       "Uacute", "Ucirc", "Ugrave", "Upsilon", "Uuml", "Xi", "Yacute", "Yuml",
       "Zeta", "aacute", "acirc", "acute", "aelig", "agrave", "alefsym",
       "alpha", "amp", "and", "ang", "apos", "aring", "asymp", "atilde",
       "auml", "bdquo", "beta", "brvbar", "bull", "cap", "ccedil", "cedil",
       "cent", "chi", "circ", "clubs", "cong", "copy", "crarr", "cup",
       "curren", "dArr", "dagger", "darr", "deg", "delta", "diams", "divide",
       "eacute", "ecirc", "egrave", "empty", "emsp", "ensp", "epsilon",
       "equiv", "eta", "eth", "euml", "euro", "exist", "fnof", "forall",
       "frac12", "frac14", "frac34", "frasl", "gamma", "ge", "gt", "hArr",
       "harr", "hearts", "hellip", "iacute", "icirc", "iexcl", "igrave",
       "image", "infin", "int", "iota", "iquest", "isin", "iuml", "kappa",
       "lArr", "lambda", "lang", "laquo", "larr", "lceil", "ldquo", "le",
       "lfloor", "lowast", "loz", "lrm", "lsaquo", "lsquo", "lt", "macr",
       "mdash", "micro", "middot", "minus", "mu", "nabla", "nbsp", "ndash",
       "ne", "ni", "not", "notin", "nsub", "ntilde", "nu", "oacute", "ocirc",
       "oelig", "ograve", "oline", "omega", "omicron", "oplus", "or", "ordf",
       "ordm", "oslash", "otilde", "otimes", "ouml", "para", "part", "permil",
       "perp", "phi", "pi", "piv", "plusmn", "pound", "prime", "prod", "prop",
       "psi", "quot", "rArr", "radic", "rang", "raquo", "rarr", "rceil",
       "rdquo", "real", "reg", "rfloor", "rho", "rlm", "rsaquo", "rsquo",
       "sbquo", "scaron", "sdot", "sect", "shy", "sigma", "sigmaf", "sim",
       "spades", "sub", "sube", "sum", "sup", "sup1", "sup2", "sup3", "supe",
       "szlig", "tau", "there4", "theta", "thetasym", "thinsp", "thorn",
       "tilde", "times", "trade", "uArr", "uacute", "uarr", "ucirc", "ugrave",
       "uml", "upsih", "upsilon", "uuml", "weierp", "xi", "yacute", "yen",
       "yuml", "zeta", "zwj", "zwnj"
   };

   public static String randomHtmlishString(Random random, int numElements) {
     final int end = nextInt(random, 0, numElements);
     if (end == 0) {
       // allow 0 length
       return "";
     }
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < end; i++) {
       int val = random.nextInt(25);
       switch(val) {
         case 0: sb.append("<p>"); break;
         case 1: {
           sb.append("<");
           sb.append("    ".substring(nextInt(random, 0, 4)));
           sb.append(randomSimpleString(random));
           for (int j = 0 ; j < nextInt(random, 0, 10) ; ++j) {
             sb.append(' ');
             sb.append(randomSimpleString(random));
             sb.append(" ".substring(nextInt(random, 0, 1)));
             sb.append('=');
             sb.append(" ".substring(nextInt(random, 0, 1)));
             sb.append("\"".substring(nextInt(random, 0, 1)));
             sb.append(randomSimpleString(random));
             sb.append("\"".substring(nextInt(random, 0, 1)));
           }
           sb.append("    ".substring(nextInt(random, 0, 4)));
           sb.append("/".substring(nextInt(random, 0, 1)));
           sb.append(">".substring(nextInt(random, 0, 1)));
           break;
         }
         case 2: {
           sb.append("</");
           sb.append("    ".substring(nextInt(random, 0, 4)));
           sb.append(randomSimpleString(random));
           sb.append("    ".substring(nextInt(random, 0, 4)));
           sb.append(">".substring(nextInt(random, 0, 1)));
           break;
         }
         case 3: sb.append(">"); break;
         case 4: sb.append("</p>"); break;
         case 5: sb.append("<!--"); break;
         case 6: sb.append("<!--#"); break;
         case 7: sb.append("<script><!-- f('"); break;
         case 8: sb.append("</script>"); break;
         case 9: sb.append("<?"); break;
         case 10: sb.append("?>"); break;
         case 11: sb.append("\""); break;
         case 12: sb.append("\\\""); break;
         case 13: sb.append("'"); break;
         case 14: sb.append("\\'"); break;
         case 15: sb.append("-->"); break;
         case 16: {
           sb.append("&");
           switch(nextInt(random, 0, 2)) {
             case 0: sb.append(randomSimpleString(random)); break;
             case 1: sb.append(HTML_CHAR_ENTITIES[random.nextInt(HTML_CHAR_ENTITIES.length)]); break;
           }
           sb.append(";".substring(nextInt(random, 0, 1)));
           break;
         }
         case 17: {
           sb.append("&#");
           if (0 == nextInt(random, 0, 1)) {
             sb.append(nextInt(random, 0, Integer.MAX_VALUE - 1));
             sb.append(";".substring(nextInt(random, 0, 1)));
           }
           break;
         }
         case 18: {
           sb.append("&#x");
           if (0 == nextInt(random, 0, 1)) {
             sb.append(Integer.toString(nextInt(random, 0, Integer.MAX_VALUE - 1), 16));
             sb.append(";".substring(nextInt(random, 0, 1)));
           }
           break;
         }

         case 19: sb.append(";"); break;
         case 20: sb.append(nextInt(random, 0, Integer.MAX_VALUE - 1)); break;
         case 21: sb.append("\n"); break;
         case 22: sb.append("          ".substring(nextInt(random, 0, 10))); break;
         case 23: {
           sb.append("<");
           if (0 == nextInt(random, 0, 3)) {
             sb.append("          ".substring(nextInt(random, 1, 10)));
           }
           if (0 == nextInt(random, 0, 1)) {
             sb.append("/");
             if (0 == nextInt(random, 0, 3)) {
               sb.append("          ".substring(nextInt(random, 1, 10)));
             }
           }
           switch (nextInt(random, 0, 3)) {
             case 0: sb.append(randomlyRecaseCodePoints(random, "script")); break;
             case 1: sb.append(randomlyRecaseCodePoints(random, "style")); break;
             case 2: sb.append(randomlyRecaseCodePoints(random, "br")); break;
             // default: append nothing
           }
           sb.append(">".substring(nextInt(random, 0, 1)));
           break;
         }
         default: sb.append(randomSimpleString(random));
       }
     }
     return sb.toString();
   }

   /**
    * Randomly upcases, downcases, or leaves intact each code point in the given string
    */
   public static String randomlyRecaseCodePoints(Random random, String str) {
     StringBuilder builder = new StringBuilder();
     int pos = 0;
     while (pos < str.length()) {
       int codePoint = str.codePointAt(pos);
       pos += Character.charCount(codePoint);
       switch (nextInt(random, 0, 2)) {
         case 0: builder.appendCodePoint(Character.toUpperCase(codePoint)); break;
         case 1: builder.appendCodePoint(Character.toLowerCase(codePoint)); break;
         case 2: builder.appendCodePoint(codePoint); // leave intact
       }
     }
     return builder.toString();
   }

   private static final int[] blockStarts = {
     0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400,
     0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800,
     0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00,
     0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380,
     0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780,
     0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00,
     0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00,
     0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300,
     0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0,
     0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80,
     0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000,
     0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200,
     0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640,
     0xA6A0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900,
     0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, 0xABC0, 0xAC00, 0xD7B0,
     0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10,
     0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0,
     0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0,
     0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800,
     0x10840, 0x10900, 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60,
     0x10C00, 0x10E60, 0x11080, 0x12000, 0x12400, 0x13000, 0x1D000, 0x1D100,
     0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200,
     0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000
   };

   private static final int[] blockEnds = {
     0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF,
     0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F,
     0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F,
     0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F,
     0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF,
     0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F,
     0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF,
     0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF,
     0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF,
     0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF,
     0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F,
     0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF,
     0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F,
     0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F,
     0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, 0xABFF, 0xD7AF, 0xD7FF,
     0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F,
     0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF,
     0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF,
     0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F,
     0x1085F, 0x1091F, 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F,
     0x10C4F, 0x10E7F, 0x110CF, 0x123FF, 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF,
     0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF,
     0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
   };

   /** Returns random string of length between 0-20 codepoints, all codepoints within the same unicode block. */
   public static String randomRealisticUnicodeString(Random r) {
     return randomRealisticUnicodeString(r, 20);
   }

   /** Returns random string of length up to maxLength codepoints , all codepoints within the same unicode block. */
   public static String randomRealisticUnicodeString(Random r, int maxLength) {
     return randomRealisticUnicodeString(r, 0, maxLength);
   }

   /** Returns random string of length between min and max codepoints, all codepoints within the same unicode block. */
   public static String randomRealisticUnicodeString(Random r, int minLength, int maxLength) {
     final int end = nextInt(r, minLength, maxLength);
     final int block = r.nextInt(blockStarts.length);
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < end; i++)
       sb.appendCodePoint(nextInt(r, blockStarts[block], blockEnds[block]));
     return sb.toString();
   }

   /** Returns random string, with a given UTF-8 byte length*/
   public static String randomFixedByteLengthUnicodeString(Random r, int length) {

     final char[] buffer = new char[length*3];
     int bytes = length;
     int i = 0;
     for (; i < buffer.length && bytes != 0; i++) {
       int t;
       if (bytes >= 4) {
         t = r.nextInt(5);
       } else if (bytes >= 3) {
         t = r.nextInt(4);
       } else if (bytes >= 2) {
         t = r.nextInt(2);
       } else {
         t = 0;
       }
       if (t == 0) {
         buffer[i] = (char) r.nextInt(0x80);
         bytes--;
       } else if (1 == t) {
         buffer[i] = (char) nextInt(r, 0x80, 0x7ff);
         bytes -= 2;
       } else if (2 == t) {
         buffer[i] = (char) nextInt(r, 0x800, 0xd7ff);
         bytes -= 3;
       } else if (3 == t) {
         buffer[i] = (char) nextInt(r, 0xe000, 0xffff);
         bytes -= 3;
       } else if (4 == t) {
         // Make a surrogate pair
         // High surrogate
         buffer[i++] = (char) nextInt(r, 0xd800, 0xdbff);
         // Low surrogate
         buffer[i] = (char) nextInt(r, 0xdc00, 0xdfff);
         bytes -= 4;
       }

     }
     return new String(buffer, 0, i);
   }


   /** Return a Codec that can read any of the
    *  default codecs and formats, but always writes in the specified
    *  format. */
   public static Codec alwaysPostingsFormat(final PostingsFormat format) {
     // TODO: we really need for postings impls etc to announce themselves
     // (and maybe their params, too) to infostream on flush and merge.
     // otherwise in a real debugging situation we won't know whats going on!
     if (LuceneTestCase.VERBOSE) {
       System.out.println("forcing postings format to:" + format);
     }
     return new Lucene42Codec() {
       @Override
       public PostingsFormat getPostingsFormatForField(String field) {
         return format;
       }
     };
   }

   /** Return a Codec that can read any of the
    *  default codecs and formats, but always writes in the specified
    *  format. */
   public static Codec alwaysDocValuesFormat(final DocValuesFormat format) {
     // TODO: we really need for docvalues impls etc to announce themselves
     // (and maybe their params, too) to infostream on flush and merge.
     // otherwise in a real debugging situation we won't know whats going on!
     if (LuceneTestCase.VERBOSE) {
       System.out.println("forcing docvalues format to:" + format);
     }
     return new Lucene42Codec() {
       @Override
       public DocValuesFormat getDocValuesFormatForField(String field) {
         return format;
       }
     };
   }

   // TODO: generalize all 'test-checks-for-crazy-codecs' to
   // annotations (LUCENE-3489)
   public static String getPostingsFormat(String field) {
     return getPostingsFormat(Codec.getDefault(), field);
   }

   public static String getPostingsFormat(Codec codec, String field) {
     PostingsFormat p = codec.postingsFormat();
     if (p instanceof PerFieldPostingsFormat) {
       return ((PerFieldPostingsFormat)p).getPostingsFormatForField(field).getName();
     } else {
       return p.getName();
     }
   }

   public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
     String[] files = dir.listAll();
     if (files.length > 1 || (files.length == 1 && !files[0].equals("write.lock"))) {
       return true;
     } else {
       return false;
     }
   }

   /** just tries to configure things to keep the open file
    * count lowish */
   public static void reduceOpenFiles(IndexWriter w) {
     // keep number of open files lowish
     MergePolicy mp = w.getConfig().getMergePolicy();
     if (mp instanceof LogMergePolicy) {
       LogMergePolicy lmp = (LogMergePolicy) mp;
       lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
       lmp.setNoCFSRatio(1.0);
     } else if (mp instanceof TieredMergePolicy) {
       TieredMergePolicy tmp = (TieredMergePolicy) mp;
       tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
       tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
       tmp.setNoCFSRatio(1.0);
     }
     MergeScheduler ms = w.getConfig().getMergeScheduler();
     if (ms instanceof ConcurrentMergeScheduler) {
       // wtf... shouldnt it be even lower since its 1 by default?!?!
       ((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
     }
   }

   /** Checks some basic behaviour of an AttributeImpl
    * @param reflectedValues contains a map with "AttributeClass#key" as values
    */
   public static <T> void assertAttributeReflection(final AttributeImpl att, Map<String,T> reflectedValues) {
     final Map<String,Object> map = new HashMap<String,Object>();
     att.reflectWith(new AttributeReflector() {
       @Override
       public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
         map.put(attClass.getName() + '#' + key, value);
       }
     });
     Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
   }

   public static void keepFullyDeletedSegments(IndexWriter w) {
     try {
       // Carefully invoke what is a package-private (test
       // only, internal) method on IndexWriter:
       Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments");
       m.setAccessible(true);
       m.invoke(w);
     } catch (Exception e) {
       // Should not happen?
       throw new RuntimeException(e);
     }
   }

   /**
    * insecure, fast version of File.createTempFile
    * uses Random instead of SecureRandom.
    */
   public static File createTempFile(String prefix, String suffix, File directory)
       throws IOException {
     if (prefix.length() < 3) {
       throw new IllegalArgumentException("prefix must be at least 3 characters");
     }
     String newSuffix = suffix == null ? ".tmp" : suffix;
     // always pull a long from master random. that way, the randomness of the test
     // is not affected by whether it initialized the counter (in genTempFile) or not.
     // note that the Random used by genTempFile is *not* the master Random, and therefore
     // does not affect the randomness of the test.
     final Random random = new Random(RandomizedContext.current().getRandom().nextLong());
     File result;
     do {
       result = genTempFile(random, prefix, newSuffix, directory);
     } while (!result.createNewFile());
     return result;
   }

   /* identify for differnt VM processes */
   private static String counterBase;

   /* Temp file counter */
   private static int counter;
   private static final Object counterLock = new Object();

   private static File genTempFile(Random random, String prefix, String suffix, File directory) {
     final int identify;
     synchronized (counterLock) {
       if (counterBase == null) { // init once
         counter = random.nextInt() & 0xFFFF; // up to five digits number
         counterBase = Integer.toString(counter);
       }
       identify = counter++;
     }
     StringBuilder newName = new StringBuilder();
     newName.append(prefix);
     newName.append(counterBase);
     newName.append(identify);
     newName.append(suffix);
     return new File(directory, newName.toString());
   }

   public static void assertEquals(TopDocs expected, TopDocs actual) {
     Assert.assertEquals("wrong total hits", expected.totalHits, actual.totalHits);
     Assert.assertEquals("wrong maxScore", expected.getMaxScore(), actual.getMaxScore(), 0.0);
     Assert.assertEquals("wrong hit count", expected.scoreDocs.length, actual.scoreDocs.length);
     for(int hitIDX=0;hitIDX<expected.scoreDocs.length;hitIDX++) {
       final ScoreDoc expectedSD = expected.scoreDocs[hitIDX];
       final ScoreDoc actualSD = actual.scoreDocs[hitIDX];
       Assert.assertEquals("wrong hit docID", expectedSD.doc, actualSD.doc);
       Assert.assertEquals("wrong hit score", expectedSD.score, actualSD.score, 0.0);
       if (expectedSD instanceof FieldDoc) {
         Assert.assertTrue(actualSD instanceof FieldDoc);
         Assert.assertArrayEquals("wrong sort field values",
                             ((FieldDoc) expectedSD).fields,
                             ((FieldDoc) actualSD).fields);
       } else {
         Assert.assertFalse(actualSD instanceof FieldDoc);
       }
     }
   }

   // NOTE: this is likely buggy, and cannot clone fields
   // with tokenStreamValues, etc.  Use at your own risk!!

   // TODO: is there a pre-existing way to do this!!!
   public static Document cloneDocument(Document doc1) {
     final Document doc2 = new Document();
     for(IndexableField f : doc1.getFields()) {
       final Field field1 = (Field) f;
       final Field field2;
       final DocValuesType dvType = field1.fieldType().docValueType();
       final NumericType numType = field1.fieldType().numericType();
       if (dvType != null) {
         switch(dvType) {
           case NUMERIC:
             field2 = new NumericDocValuesField(field1.name(), field1.numericValue().longValue());
             break;
           case BINARY:
             field2 = new BinaryDocValuesField(field1.name(), field1.binaryValue());
           break;
           case SORTED:
             field2 = new SortedDocValuesField(field1.name(), field1.binaryValue());
             break;
           default:
             throw new IllegalStateException("unknown Type: " + dvType);
         }
       } else if (numType != null) {
         switch (numType) {
           case INT:
             field2 = new IntField(field1.name(), field1.numericValue().intValue(), field1.fieldType());
             break;
           case FLOAT:
             field2 = new FloatField(field1.name(), field1.numericValue().intValue(), field1.fieldType());
             break;
           case LONG:
             field2 = new LongField(field1.name(), field1.numericValue().intValue(), field1.fieldType());
             break;
           case DOUBLE:
             field2 = new DoubleField(field1.name(), field1.numericValue().intValue(), field1.fieldType());
             break;
           default:
             throw new IllegalStateException("unknown Type: " + numType);
         }
       } else {
         field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
       }
       doc2.add(field2);
     }

     return doc2;
   }

   // Returns a DocsEnum, but randomly sometimes uses a
   // DocsAndFreqsEnum, DocsAndPositionsEnum.  Returns null
   // if field/term doesn't exist:
   public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
     final Terms terms = MultiFields.getTerms(r, field);
     if (terms == null) {
       return null;
     }
     final TermsEnum termsEnum = terms.iterator(null);
     if (!termsEnum.seekExact(term, random.nextBoolean())) {
       return null;
     }
     return docs(random, termsEnum, liveDocs, reuse, flags);
   }

   // Returns a DocsEnum from a positioned TermsEnum, but
   // randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum.
   public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
     if (random.nextBoolean()) {
       if (random.nextBoolean()) {
         final int posFlags;
         switch (random.nextInt(4)) {
           case 0: posFlags = 0; break;
           case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break;
           case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break;
           default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break;
         }
         // TODO: cast to DocsAndPositionsEnum?
         DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags);
         if (docsAndPositions != null) {
           return docsAndPositions;
         }
       }
       flags |= DocsEnum.FLAG_FREQS;
     }
     return termsEnum.docs(liveDocs, reuse, flags);
   }

   public static CharSequence stringToCharSequence(String string, Random random) {
     return bytesToCharSequence(new BytesRef(string), random);
   }

   public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
     switch(random.nextInt(5)) {
     case 4:
       CharsRef chars = new CharsRef(ref.length);
       UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
       return chars;
     case 3:
       return CharBuffer.wrap(ref.utf8ToString());
     default:
       return ref.utf8ToString();
     }
   }

   /**
    * Shutdown {@link ExecutorService} and wait for its.
    */
   public static void shutdownExecutorService(ExecutorService ex) {
     if (ex != null) {
       try {
         ex.shutdown();
         ex.awaitTermination(1, TimeUnit.SECONDS);
       } catch (InterruptedException e) {
         // Just report it on the syserr.
         System.err.println("Could not properly shutdown executor service.");
         e.printStackTrace(System.err);
       }
     }
   }

   public static FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
     Directory cfsDir = null;
     try {
       if (info.getUseCompoundFile()) {
         cfsDir = new CompoundFileDirectory(info.dir,
                                            IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION),
                                            IOContext.READONCE,
                                            false);
       } else {
         cfsDir = info.dir;
       }
       return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir,
                                                                            info.name,
                                                                            IOContext.READONCE);
     } finally {
       if (info.getUseCompoundFile() && cfsDir != null) {
         cfsDir.close();
       }
     }
   }

   /**
    * Returns a valid (compiling) Pattern instance with random stuff inside. Be careful
    * when applying random patterns to longer strings as certain types of patterns
    * may explode into exponential times in backtracking implementations (such as Java's).
    */
   public static Pattern randomPattern(Random random) {
     final String nonBmpString = "AB\uD840\uDC00C";
     while (true) {
       try {
         Pattern p = Pattern.compile(_TestUtil.randomRegexpishString(random));
         String replacement = null;
         // ignore bugs in Sun's regex impl
         try {
           replacement = p.matcher(nonBmpString).replaceAll("_");
         } catch (StringIndexOutOfBoundsException jdkBug) {
           System.out.println("WARNING: your jdk is buggy!");
           System.out.println("Pattern.compile(\"" + p.pattern() +
               "\").matcher(\"AB\\uD840\\uDC00C\").replaceAll(\"_\"); should not throw IndexOutOfBounds!");
         }
         // Make sure the result of applying the pattern to a string with extended
         // unicode characters is a valid utf16 string. See LUCENE-4078 for discussion.
         if (replacement != null && UnicodeUtil.validUTF16String(replacement)) {
           return p;
         }
       } catch (PatternSyntaxException ignored) {
         // Loop trying until we hit something that compiles.
       }
     }
   }


   public static final FilterStrategy randomFilterStrategy(final Random random) {
     switch(random.nextInt(6)) {
       case 5:
       case 4:
         return new FilteredQuery.RandomAccessFilterStrategy() {
           @Override
           protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
             return LuceneTestCase.random().nextBoolean();
           }
         };
       case 3:
         return FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY;
       case 2:
         return FilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY;
       case 1:
         return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
       case 0:
         return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY;
       default:
         return FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY;
     }
   }

   /**
    * Returns a random string in the specified length range consisting
    * entirely of whitespace characters
    * @see #WHITESPACE_CHARACTERS
    */
   public static String randomWhitespace(Random r, int minLength, int maxLength) {
     final int end = nextInt(r, minLength, maxLength);
     StringBuilder out = new StringBuilder();
     for (int i = 0; i < end; i++) {
       int offset = nextInt(r, 0, WHITESPACE_CHARACTERS.length-1);
       char c = WHITESPACE_CHARACTERS[offset];
       // sanity check
       Assert.assertTrue("Not really whitespace? (@"+offset+"): " + c, Character.isWhitespace(c));
       out.append(c);
     }
     return out.toString();
   }

   /** List of characters that match {@link Character#isWhitespace} */
   public static final char[] WHITESPACE_CHARACTERS = new char[] {
     // :TODO: is this list exhaustive?
     '\u0009',
     '\n',
     '\u000B',
     '\u000C',
     '\r',
     '\u001C',
     '\u001D',
     '\u001E',
     '\u001F',
     '\u0020',
     // '\u0085', faild sanity check?
     '\u1680',
     '\u180E',
     '\u2000',
     '\u2001',
     '\u2002',
     '\u2003',
     '\u2004',
     '\u2005',
     '\u2006',
     '\u2008',
     '\u2009',
     '\u200A',
     '\u2028',
     '\u2029',
     '\u205F',
     '\u3000',
   };
 }