| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.bench.generators; |
| |
| import static org.apache.solr.bench.generators.SourceDSL.checkArguments; |
| import static org.apache.solr.bench.generators.SourceDSL.integers; |
| |
| import java.io.InputStream; |
| import java.nio.charset.StandardCharsets; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.List; |
| import java.util.Objects; |
| import java.util.Random; |
| import java.util.Scanner; |
| import java.util.SplittableRandom; |
| import org.apache.solr.bench.BaseBenchState; |
| import org.apache.solr.bench.SolrGenerate; |
| import org.apache.solr.bench.SolrRandomnessSource; |
| import org.quicktheories.core.Gen; |
| import org.quicktheories.core.RandomnessSource; |
| import org.quicktheories.impl.SplittableRandomSource; |
| |
| /** The type Strings dsl. */ |
| public class StringsDSL { |
| |
| private static final int BASIC_LATIN_LAST_CODEPOINT = 0x007E; |
| private static final int BASIC_LATIN_FIRST_CODEPOINT = 0x0020; |
| private static final int ASCII_LAST_CODEPOINT = 0x007F; |
| private static final int LARGEST_DEFINED_BMP_CODEPOINT = 65533; |
| |
| private static final List<String> words; |
| |
| private static final int WORD_SIZE; |
| |
| static { |
| // english word list via https://github.com/dwyl/english-words |
| |
| words = new ArrayList<>(1000); |
| InputStream inputStream = StringsDSL.class.getClassLoader().getResourceAsStream("words.txt"); |
| try (Scanner scanner = |
| new Scanner(Objects.requireNonNull(inputStream), StandardCharsets.UTF_8.name())) { |
| while (scanner.hasNextLine()) { |
| words.add(scanner.nextLine()); |
| } |
| } |
| Collections.shuffle(words, new Random(BaseBenchState.RANDOM_SEED)); |
| WORD_SIZE = words.size(); |
| } |
| |
| /** |
| * Word list word list generator builder. |
| * |
| * @return the word list generator builder |
| */ |
| public WordListGeneratorBuilder wordList() { |
| return new WordListGeneratorBuilder( |
| new SolrGen<>(new WordListStringSolrGen(), String.class).describedAs("WordList Word")); |
| } |
| |
| /** |
| * Generates integers as Strings, and shrinks towards "0". |
| * |
| * @return a Source of type String |
| */ |
| public SolrGen<String> numeric() { |
| return new SolrGen<>(numericBetween(Integer.MIN_VALUE, Integer.MAX_VALUE), String.class); |
| } |
| |
| /** |
| * Generates integers within the interval as Strings. |
| * |
| * @param startInclusive - lower inclusive bound of integer domain |
| * @param endInclusive - upper inclusive bound of integer domain |
| * @return a Source of type String |
| */ |
| public SolrGen<String> numericBetween(int startInclusive, int endInclusive) { |
| checkArguments( |
| startInclusive <= endInclusive, |
| "There are no Integer values to be generated between startInclusive (%s) and endInclusive (%s)", |
| startInclusive, |
| endInclusive); |
| return new SolrGen<>(Strings.boundedNumericStrings(startInclusive, endInclusive), String.class); |
| } |
| |
| /** |
| * Constructs a StringGeneratorBuilder which will build Strings composed of all defined code |
| * points |
| * |
| * @return a StringGeneratorBuilder |
| */ |
| public StringGeneratorBuilder allPossible() { |
| return betweenCodePoints(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); |
| } |
| |
| /** |
| * Realistic unicode generator builder. No whitespace. |
| * |
| * @param minLength the min length |
| * @param maxLength the max length |
| * @return the realistic unicode generator builder |
| */ |
| public RealisticUnicodeGeneratorBuilder realisticUnicode(int minLength, int maxLength) { |
| final var randomUnicodeGen = |
| new SolrGen<String>() { |
| @Override |
| public String generate(SolrRandomnessSource in) { |
| int block = |
| integers() |
| .between(0, blockStarts.length - 1) |
| .describedAs("Realistic Unicode BLock Index") |
| .generate(in); |
| |
| return Strings.ofBoundedLengthStrings( |
| blockStarts[block], blockEnds[block], minLength, maxLength) |
| .describedAs("Realistic Unicode") |
| .generate(in); |
| } |
| }.assuming( |
| str -> { |
| // The string must not have whitespace |
| for (int i = 0; i < str.length(); i++) { |
| char c = str.charAt(i); |
| if (Character.isWhitespace(c)) { |
| return false; |
| } |
| } |
| return true; |
| }); |
| return new RealisticUnicodeGeneratorBuilder( |
| new SolrDescribingGenerator<>(randomUnicodeGen, Objects::toString)); |
| } |
| |
| /** |
| * Constructs a StringGeneratorBuilder which will build Strings composed of all defined code |
| * points in the Basic Multilingual Plane |
| * |
| * @return a StringGeneratorBuilder |
| */ |
| public StringGeneratorBuilder basicMultilingualPlaneAlphabet() { |
| return betweenCodePoints(Character.MIN_CODE_POINT, LARGEST_DEFINED_BMP_CODEPOINT); |
| } |
| |
| /** |
| * Constructs a StringGeneratorBuilder which will build Strings composed of Unicode Basic Latin |
| * Alphabet |
| * |
| * @return a StringGeneratorBuilder |
| */ |
| public StringGeneratorBuilder basicLatinAlphabet() { |
| return betweenCodePoints(BASIC_LATIN_FIRST_CODEPOINT, BASIC_LATIN_LAST_CODEPOINT); |
| } |
| |
| /** |
| * Alpha string generator builder. |
| * |
| * @return the string generator builder |
| */ |
| public StringGeneratorBuilder alpha() { |
| return betweenCodePoints('a', 'z' + 1); |
| } |
| |
| /** |
| * Alpha numeric string generator builder. |
| * |
| * @return the string generator builder |
| */ |
| public StringGeneratorBuilder alphaNumeric() { |
| return betweenCodePoints(' ', 'z' + 1); |
| } |
| |
| /** |
| * Constructs a StringGeneratorBuilder which will build Strings composed of Unicode Ascii Alphabet |
| * |
| * @return a StringGeneratorBuilder |
| */ |
| public StringGeneratorBuilder ascii() { |
| return betweenCodePoints(Character.MIN_CODE_POINT, ASCII_LAST_CODEPOINT); |
| } |
| |
| /** |
| * Strings with characters between two (inclusive) code points |
| * |
| * @param minInclusive minimum code point |
| * @param maxInclusive max code point |
| * @return Builder for strings |
| */ |
| public StringGeneratorBuilder betweenCodePoints(int minInclusive, int maxInclusive) { |
| return new StringGeneratorBuilder(minInclusive, maxInclusive); |
| } |
| |
| /** The type Word list generator builder. */ |
| public static class WordListGeneratorBuilder { |
| private final SolrGen<String> strings; |
| |
| /** |
| * Instantiates a new Word list generator builder. |
| * |
| * @param strings the strings |
| */ |
| WordListGeneratorBuilder(SolrGen<String> strings) { |
| this.strings = strings; |
| } |
| |
| /** |
| * Of one solr gen. |
| * |
| * @return the solr gen |
| */ |
| public SolrGen<String> ofOne() { |
| return strings; |
| } |
| |
| /** |
| * Multi solr gen. |
| * |
| * @param count the count |
| * @return the solr gen |
| */ |
| public SolrGen<String> multi(int count) { |
| return multiStringGen(strings, count); |
| } |
| |
| /** |
| * With distribution word list generator builder. |
| * |
| * @param distribution the distribution |
| * @return the word list generator builder |
| */ |
| public WordListGeneratorBuilder withDistribution(Distribution distribution) { |
| this.strings.withDistribution(distribution); |
| return this; |
| } |
| } |
| |
| /** The type Realistic unicode generator builder. */ |
| public static class RealisticUnicodeGeneratorBuilder { |
| private final SolrGen<String> strings; |
| |
| /** |
| * Instantiates a new Realistic unicode generator builder. |
| * |
| * @param strings the strings |
| */ |
| RealisticUnicodeGeneratorBuilder(SolrGen<String> strings) { |
| this.strings = strings; |
| } |
| |
| /** |
| * Of one solr gen. |
| * |
| * @return the solr gen |
| */ |
| public SolrGen<String> ofOne() { |
| return strings; |
| } |
| |
| /** |
| * Multi solr gen. |
| * |
| * @param count the count |
| * @return the solr gen |
| */ |
| public SolrGen<String> multi(int count) { |
| return multiStringGen(strings, count); |
| } |
| } |
| |
| /** The type String generator builder. */ |
| public static class StringGeneratorBuilder { |
| |
| private final int minCodePoint; |
| private final int maxCodePoint; |
| private Integer cardinalityStart; |
| private Gen<Integer> maxCardinality; |
| private int multi; |
| |
| private StringGeneratorBuilder(int minCodePoint, int maxCodePoint) { |
| this.minCodePoint = minCodePoint; |
| this.maxCodePoint = maxCodePoint; |
| } |
| |
| /** |
| * Generates Strings of a fixed number of code points. |
| * |
| * @param codePoints - the fixed number of code points for the String |
| * @return a a Source of type String |
| */ |
| public SolrGen<String> ofFixedNumberOfCodePoints(int codePoints) { |
| checkArguments( |
| codePoints >= 0, |
| "The number of codepoints cannot be negative; %s is not an accepted argument", |
| codePoints); |
| return new SolrGen<>( |
| Strings.withCodePoints(minCodePoint, maxCodePoint, SolrGenerate.constant(codePoints)), |
| String.class); |
| } |
| |
| /** |
| * Generates Strings of a fixed length. |
| * |
| * @param fixedLength - the fixed length for the Strings |
| * @return a Source of type String |
| */ |
| public SolrGen<String> ofLength(int fixedLength) { |
| return ofLengthBetween(fixedLength, fixedLength); |
| } |
| |
| /** |
| * Max cardinality string generator builder. |
| * |
| * @param max the max |
| * @return the string generator builder |
| */ |
| public StringGeneratorBuilder maxCardinality(int max) { |
| maxCardinality = SolrGenerate.constant(max); |
| return this; |
| } |
| |
| /** |
| * Max cardinality string generator builder. |
| * |
| * @param max the max |
| * @return the string generator builder |
| */ |
| public StringGeneratorBuilder maxCardinality(Gen<Integer> max) { |
| maxCardinality = max; |
| return this; |
| } |
| |
| /** |
| * Multi string generator builder. |
| * |
| * @param count the count |
| * @return the string generator builder |
| */ |
| public StringGeneratorBuilder multi(int count) { |
| this.multi = count; |
| return this; |
| } |
| |
| /** |
| * Generates Strings of length bounded between minLength and maxLength inclusively. |
| * |
| * @param minLength - minimum inclusive length of String |
| * @param maxLength - maximum inclusive length of String |
| * @return a Source of type String |
| */ |
| public SolrGen<String> ofLengthBetween(int minLength, int maxLength) { |
| checkArguments( |
| minLength <= maxLength, |
| "The minLength (%s) is longer than the maxLength(%s)", |
| minLength, |
| maxLength); |
| checkArguments( |
| minLength >= 0, |
| "The length of a String cannot be negative; %s is not an accepted argument", |
| minLength); |
| SolrGen<String> strings = |
| Strings.ofBoundedLengthStrings(minCodePoint, maxCodePoint, minLength, maxLength); |
| |
| if (maxCardinality != null) { |
| SolrGen<String> gen = |
| new SolrGen<>( |
| new SolrGen<>() { |
| @Override |
| public String generate(SolrRandomnessSource in) { |
| Integer maxCard = maxCardinality.generate(in); |
| |
| if (cardinalityStart == null) { |
| cardinalityStart = |
| SolrGenerate.range(0, Integer.MAX_VALUE - maxCard - 1).generate(in); |
| } |
| |
| long seed = |
| SolrGenerate.range(cardinalityStart, cardinalityStart + maxCard - 1) |
| .generate(in); |
| return strings.generate( |
| (RandomnessSource) new SplittableRandomSource(new SplittableRandom(seed))); |
| } |
| |
| @Override |
| public String generate(RandomnessSource in) { |
| return generate((SolrRandomnessSource) in); |
| } |
| }, |
| String.class); |
| if (multi > 1) { |
| return multiStringGen(gen, multi); |
| } |
| return new SolrGen<>(gen, String.class); |
| } else { |
| if (multi > 1) { |
| return multiStringGen(strings, multi); |
| } |
| return new SolrGen<>(strings, String.class); |
| } |
| } |
| } |
| |
| private static SolrGen<String> multiStringGen(SolrGen<String> strings, int multi) { |
| return new SolrGen<>(MultiString.class) { |
| |
| { |
| describedAs("MultiString"); |
| } |
| |
| @Override |
| public String generate(SolrRandomnessSource in) { |
| StringBuilder sb = new StringBuilder(64); |
| for (int i = 0; i < multi; i++) { |
| sb.append(strings.generate(in)); |
| if (i < multi - 1) { |
| sb.append(' '); |
| } |
| } |
| return sb.toString(); |
| } |
| }; |
| } |
| |
| private static class WordListStringSolrGen extends SolrGen<String> { |
| /** Instantiates a new Word list string solr gen. */ |
| public WordListStringSolrGen() { |
| super(String.class); |
| } |
| |
| @Override |
| public String generate(RandomnessSource in) { |
| return words.get( |
| integers() |
| .between(0, WORD_SIZE - 1) |
| .describedAs("WordList Index") |
| .withDistribution(this.getDistribution()) |
| .generate(in)); |
| } |
| } |
| |
| private static final int[] blockStarts = { |
| 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400, 0x0500, 0x0530, 0x0590, |
| 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, |
| 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380, |
| 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, 0x1800, 0x18B0, 0x1900, |
| 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00, 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, |
| 0x1DC0, 0x1E00, 0x1F00, 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300, |
| 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0, 0x27F0, 0x2800, 0x2900, |
| 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80, 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, |
| 0x2F00, 0x2FF0, 0x3000, 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200, |
| 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640, 0xA6A0, 0xA700, 0xA720, |
| 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900, 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, |
| 0xABC0, 0xAC00, 0xD7B0, 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10, 0xFE20, 0xFE30, 0xFE50, |
| 0xFE70, 0xFF00, 0xFFF0, 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0, |
| 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800, 0x10840, 0x10900, |
| 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60, 0x10C00, 0x10E60, 0x11080, 0x12000, |
| 0x12400, 0x13000, 0x1D000, 0x1D100, 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, |
| 0x1F100, 0x1F200, 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000 |
| }; |
| |
| private static final int[] blockEnds = { |
| 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF, 0x052F, 0x058F, 0x05FF, |
| 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F, 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, |
| 0x0C7F, 0x0CFF, 0x0D7F, 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F, |
| 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF, 0x18AF, 0x18FF, 0x194F, |
| 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F, 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, |
| 0x1DFF, 0x1EFF, 0x1FFF, 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF, |
| 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF, 0x27FF, 0x28FF, 0x297F, |
| 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF, 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, |
| 0x2FDF, 0x2FFF, 0x303F, 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF, |
| 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F, 0xA6FF, 0xA71F, 0xA7FF, |
| 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F, 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, |
| 0xABFF, 0xD7AF, 0xD7FF, 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F, 0xFE2F, 0xFE4F, 0xFE6F, |
| 0xFEFF, 0xFFEF, 0xFFFF, 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF, |
| 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F, 0x1085F, 0x1091F, |
| 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F, 0x10C4F, 0x10E7F, 0x110CF, 0x123FF, |
| 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF, 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, |
| 0x1F1FF, 0x1F2FF, 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF |
| }; |
| } |