blob: 772bdbabf353628a0888e2963a00037b36da2780 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.io.file.tfile;
import java.util.Random;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
/**
* Generate random <key, value> pairs.
*/
class KVGenerator {
private final Random random;
private final byte[][] dict;
private final boolean sorted;
private final DiscreteRNG keyLenRNG, valLenRNG;
private BytesWritable lastKey;
private static final int MIN_KEY_LEN = 4;
private final byte prefix[] = new byte[MIN_KEY_LEN];
public KVGenerator(Random random, boolean sorted, DiscreteRNG keyLenRNG,
DiscreteRNG valLenRNG, DiscreteRNG wordLenRNG, int dictSize) {
this.random = random;
dict = new byte[dictSize][];
this.sorted = sorted;
this.keyLenRNG = keyLenRNG;
this.valLenRNG = valLenRNG;
for (int i = 0; i < dictSize; ++i) {
int wordLen = wordLenRNG.nextInt();
dict[i] = new byte[wordLen];
random.nextBytes(dict[i]);
}
lastKey = new BytesWritable();
fillKey(lastKey);
}
private void fillKey(BytesWritable o) {
int len = keyLenRNG.nextInt();
if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
o.setSize(len);
int n = MIN_KEY_LEN;
while (n < len) {
byte[] word = dict[random.nextInt(dict.length)];
int l = Math.min(word.length, len - n);
System.arraycopy(word, 0, o.getBytes(), n, l);
n += l;
}
if (sorted && WritableComparator.compareBytes(
lastKey.getBytes(), MIN_KEY_LEN, lastKey.getLength() - MIN_KEY_LEN,
o.getBytes(), MIN_KEY_LEN, o.getLength() - MIN_KEY_LEN) > 0) {
incrementPrefix();
}
System.arraycopy(prefix, 0, o.getBytes(), 0, MIN_KEY_LEN);
lastKey.set(o);
}
private void fillValue(BytesWritable o) {
int len = valLenRNG.nextInt();
o.setSize(len);
int n = 0;
while (n < len) {
byte[] word = dict[random.nextInt(dict.length)];
int l = Math.min(word.length, len - n);
System.arraycopy(word, 0, o.getBytes(), n, l);
n += l;
}
}
private void incrementPrefix() {
for (int i = MIN_KEY_LEN - 1; i >= 0; --i) {
++prefix[i];
if (prefix[i] != 0) return;
}
throw new RuntimeException("Prefix overflown");
}
public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
if (dupKey) {
key.set(lastKey);
}
else {
fillKey(key);
}
fillValue(value);
}
}