blob: 877d434e5a9c4e7de942bbb4ae17c3ecec2d92c0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred.gridmix;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
/**
* A random text generator. The words are simply sequences of alphabets.
*/
class RandomTextDataGenerator {
static final Log LOG = LogFactory.getLog(RandomTextDataGenerator.class);
/**
* Configuration key for random text data generator's list size.
*/
static final String GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE =
"gridmix.datagenerator.randomtext.listsize";
/**
* Configuration key for random text data generator's word size.
*/
static final String GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE =
"gridmix.datagenerator.randomtext.wordsize";
/**
* Default random text data generator's list size.
*/
static final int DEFAULT_LIST_SIZE = 200;
/**
* Default random text data generator's word size.
*/
static final int DEFAULT_WORD_SIZE = 10;
/**
* Default random text data generator's seed.
*/
static final long DEFAULT_SEED = 0L;
/**
* A list of random words
*/
private String[] words;
private Random random;
/**
* Constructor for {@link RandomTextDataGenerator} with default seed.
* @param size the total number of words to consider.
* @param wordSize Size of each word
*/
RandomTextDataGenerator(int size, int wordSize) {
this(size, DEFAULT_SEED , wordSize);
}
/**
* Constructor for {@link RandomTextDataGenerator}.
* @param size the total number of words to consider.
* @param seed Random number generator seed for repeatability
* @param wordSize Size of each word
*/
RandomTextDataGenerator(int size, Long seed, int wordSize) {
random = new Random(seed);
words = new String[size];
//TODO change the default with the actual stats
//TODO do u need varied sized words?
for (int i = 0; i < size; ++i) {
words[i] =
RandomStringUtils.random(wordSize, 0, 0, true, false, null, random);
}
}
/**
* Get the configured random text data generator's list size.
*/
static int getRandomTextDataGeneratorListSize(Configuration conf) {
return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, DEFAULT_LIST_SIZE);
}
/**
* Set the random text data generator's list size.
*/
static void setRandomTextDataGeneratorListSize(Configuration conf,
int listSize) {
if (LOG.isDebugEnabled()) {
LOG.debug("Random text data generator is configured to use a dictionary "
+ " with " + listSize + " words");
}
conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize);
}
/**
* Get the configured random text data generator word size.
*/
static int getRandomTextDataGeneratorWordSize(Configuration conf) {
return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, DEFAULT_WORD_SIZE);
}
/**
* Set the random text data generator word size.
*/
static void setRandomTextDataGeneratorWordSize(Configuration conf,
int wordSize) {
if (LOG.isDebugEnabled()) {
LOG.debug("Random text data generator is configured to use a dictionary "
+ " with words of length " + wordSize);
}
conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize);
}
/**
* Returns a randomly selected word from a list of random words.
*/
String getRandomWord() {
int index = random.nextInt(words.length);
return words[index];
}
/**
* This is mainly for testing.
*/
List<String> getRandomWords() {
return Arrays.asList(words);
}
}