blob: 191e397c14e0f1807df2c809c3b2937cfb06bda0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.statistics;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.function.Supplier;
import org.apache.parquet.FixedBinaryTestUtils;
import org.apache.parquet.io.api.Binary;
public class RandomValues {
private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890";
static abstract class RandomValueGenerator<T extends Comparable<T>> implements Supplier<T> {
private final Random random;
protected RandomValueGenerator(long seed) {
this.random = new Random(seed);
}
public boolean shouldGenerateNull() {
return (random.nextInt(10) == 0);
}
public int randomInt() { return random.nextInt(); }
public int randomPositiveInt(int maximum) {
// Maximum may be a random number (which may be negative).
return random.nextInt(Math.abs(maximum) + 1);
}
public long randomLong() { return random.nextLong(); }
public long randomLong(long maximum) { return randomLong() % maximum; }
public float randomFloat() { return random.nextFloat(); }
public float randomFloat(float maximum) { return random.nextFloat() % maximum; }
public double randomDouble() { return random.nextDouble(); }
public double randomDouble(double maximum) { return random.nextDouble() % maximum; }
public BigInteger randomInt96() {
return new BigInteger(95, random);
}
public BigInteger randomInt96(BigInteger maximum) {
BigInteger result;
while ((result = randomInt96()).compareTo(maximum) > 0);
return result;
}
public char randomLetter() {
return ALPHABET.charAt(randomPositiveInt(ALPHABET.length() - 1));
}
public String randomString(int maxLength) {
return randomFixedLengthString(randomPositiveInt(maxLength));
}
public String randomFixedLengthString(int length) {
StringBuilder builder = new StringBuilder();
for (int index = 0; index < length; index++) {
builder.append(randomLetter());
}
return builder.toString();
}
public abstract T nextValue();
@Override
public T get() {
return nextValue();
}
}
static abstract class RandomBinaryBase<T extends Comparable<T>> extends RandomValueGenerator<T> {
protected final int bufferLength;
protected final byte[] buffer;
public RandomBinaryBase(long seed, int bufferLength) {
super(seed);
this.bufferLength = bufferLength;
this.buffer = new byte[bufferLength];
}
public abstract Binary nextBinaryValue();
public Binary asReusedBinary(byte[] data) {
int length = Math.min(data.length, bufferLength);
System.arraycopy(data, 0, buffer, 0, length);
return Binary.fromReusedByteArray(data, 0, length);
}
}
public static class IntGenerator extends RandomValueGenerator<Integer> {
private final int minimum;
private final int range;
public IntGenerator(long seed) {
super(seed);
RandomRange<Integer> randomRange = new RandomRange<>(randomInt(), randomInt());
this.minimum = randomRange.minimum();
this.range = (randomRange.maximum() - this.minimum);
}
public IntGenerator(long seed, int minimum, int maximum) {
super(seed);
RandomRange<Integer> randomRange = new RandomRange<>(minimum, maximum);
this.minimum = randomRange.minimum();
this.range = randomRange.maximum() - this.minimum;
}
@Override
public Integer nextValue() {
return (minimum + randomPositiveInt(range));
}
}
public static class UIntGenerator extends IntGenerator {
private final int mask;
public UIntGenerator(long seed, byte minimum, byte maximum) {
super(seed, minimum, maximum);
mask = 0xFF;
}
public UIntGenerator(long seed, short minimum, short maximum) {
super(seed, minimum, maximum);
mask = 0xFFFF;
}
@Override
public Integer nextValue() {
return super.nextValue() & mask;
}
}
public static class UnconstrainedIntGenerator extends RandomValueGenerator<Integer> {
public UnconstrainedIntGenerator(long seed) {
super(seed);
}
@Override
public Integer nextValue() {
return randomInt();
}
}
public static class LongGenerator extends RandomValueGenerator<Long> {
private final RandomRange<Long> randomRange = new RandomRange<Long>(randomLong(), randomLong());
private final long minimum = randomRange.minimum();
private final long maximum = randomRange.maximum();
private final long range = (maximum - minimum);
public LongGenerator(long seed) {
super(seed);
}
@Override
public Long nextValue() {
return (minimum + randomLong(range));
}
}
public static class UnconstrainedLongGenerator extends RandomValueGenerator<Long> {
public UnconstrainedLongGenerator(long seed) {
super(seed);
}
@Override
public Long nextValue() {
return randomLong();
}
}
public static class Int96Generator extends RandomBinaryBase<BigInteger> {
private final RandomRange<BigInteger> randomRange = new RandomRange<BigInteger>(randomInt96(), randomInt96());
private final BigInteger minimum = randomRange.minimum();
private final BigInteger maximum = randomRange.maximum();
private final BigInteger range = maximum.subtract(minimum);
private static final int INT_96_LENGTH = 12;
public Int96Generator(long seed) {
super(seed, INT_96_LENGTH);
}
@Override
public BigInteger nextValue() {
return (minimum.add(randomInt96(range)));
}
@Override
public Binary nextBinaryValue() {
return FixedBinaryTestUtils.getFixedBinary(INT_96_LENGTH, nextValue());
}
}
public static class FloatGenerator extends RandomValueGenerator<Float> {
private final RandomRange<Float> randomRange = new RandomRange<Float>(randomFloat(), randomFloat());
private final float minimum = randomRange.minimum();
private final float maximum = randomRange.maximum();
private final float range = (maximum - minimum);
public FloatGenerator(long seed) {
super(seed);
}
@Override
public Float nextValue() {
return (minimum + randomFloat(range));
}
}
public static class UnconstrainedFloatGenerator extends RandomValueGenerator<Float> {
public UnconstrainedFloatGenerator(long seed) {
super(seed);
}
@Override
public Float nextValue() {
return randomFloat();
}
}
public static class DoubleGenerator extends RandomValueGenerator<Double> {
private final RandomRange<Double> randomRange = new RandomRange<Double>(randomDouble(), randomDouble());
private final double minimum = randomRange.minimum();
private final double maximum = randomRange.maximum();
private final double range = (maximum - minimum);
public DoubleGenerator(long seed) {
super(seed);
}
@Override
public Double nextValue() {
return (minimum + randomDouble(range));
}
}
public static class UnconstrainedDoubleGenerator extends RandomValueGenerator<Double> {
public UnconstrainedDoubleGenerator(long seed) {
super(seed);
}
@Override
public Double nextValue() {
return randomDouble();
}
}
public static class StringGenerator extends RandomBinaryBase<String> {
private static final int MAX_STRING_LENGTH = 16;
public StringGenerator(long seed) {
super(seed, MAX_STRING_LENGTH);
}
@Override
public String nextValue() {
int stringLength = randomPositiveInt(15) + 1;
return randomString(stringLength);
}
@Override
public Binary nextBinaryValue() {
return asReusedBinary(nextValue().getBytes());
}
}
public static class BinaryGenerator extends RandomBinaryBase<Binary> {
private static final int MAX_STRING_LENGTH = 16;
public BinaryGenerator(long seed) {
super(seed, MAX_STRING_LENGTH);
}
@Override
public Binary nextValue() {
// use a random length, but ensure it is at least a few bytes
int length = 5 + randomPositiveInt(buffer.length - 5);
for (int index = 0; index < length; index++) {
buffer[index] = (byte) randomInt();
}
return Binary.fromReusedByteArray(buffer, 0, length);
}
@Override
public Binary nextBinaryValue() {
return nextValue();
}
}
public static class FixedGenerator extends RandomBinaryBase<Binary> {
public FixedGenerator(long seed, int length) {
super(seed, length);
}
@Override
public Binary nextValue() {
for (int index = 0; index < buffer.length; index++) {
buffer[index] = (byte) randomInt();
}
return Binary.fromReusedByteArray(buffer);
}
@Override
public Binary nextBinaryValue() {
return nextValue();
}
}
private static class RandomRange<T extends Comparable<T>> {
private T minimum;
private T maximum;
public RandomRange(T lhs, T rhs) {
this.minimum = lhs;
this.maximum = rhs;
if (minimum.compareTo(rhs) > 0) {
T temporary = minimum;
minimum = maximum;
maximum = temporary;
}
}
public T minimum() { return this.minimum; }
public T maximum() { return this.maximum; }
}
public static Supplier<Binary> binaryStringGenerator(long seed) {
final StringGenerator generator = new StringGenerator(seed);
return generator::nextBinaryValue;
}
public static Supplier<Binary> int96Generator(long seed) {
final Int96Generator generator = new Int96Generator(seed);
return generator::nextBinaryValue;
}
public static <T extends Comparable<T>> Supplier<T> wrapSorted(Supplier<T> supplier,
int recordCount, boolean ascending) {
return wrapSorted(supplier, recordCount, ascending, (a, b) -> a.compareTo(b));
}
public static <T> Supplier<T> wrapSorted(Supplier<T> supplier, int recordCount, boolean ascending,
Comparator<T> cmp) {
List<T> values = new ArrayList<>(recordCount);
for (int i = 0; i < recordCount; ++i) {
values.add(supplier.get());
}
if (ascending) {
values.sort(cmp);
} else {
values.sort((a, b) -> cmp.compare(b, a));
}
final Iterator<T> it = values.iterator();
return it::next;
}
}