| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.mapred.gridmix; |
| |
| import java.util.Arrays; |
| import java.util.List; |
| import java.util.Random; |
| |
| import org.apache.commons.lang.RandomStringUtils; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| |
| /** |
| * A random text generator. The words are simply sequences of alphabets. |
| */ |
| class RandomTextDataGenerator { |
| static final Log LOG = LogFactory.getLog(RandomTextDataGenerator.class); |
| |
| /** |
| * Configuration key for random text data generator's list size. |
| */ |
| static final String GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE = |
| "gridmix.datagenerator.randomtext.listsize"; |
| |
| /** |
| * Configuration key for random text data generator's word size. |
| */ |
| static final String GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE = |
| "gridmix.datagenerator.randomtext.wordsize"; |
| |
| /** |
| * Default random text data generator's list size. |
| */ |
| static final int DEFAULT_LIST_SIZE = 200; |
| |
| /** |
| * Default random text data generator's word size. |
| */ |
| static final int DEFAULT_WORD_SIZE = 10; |
| |
| /** |
| * Default random text data generator's seed. |
| */ |
| static final long DEFAULT_SEED = 0L; |
| |
| /** |
| * A list of random words |
| */ |
| private String[] words; |
| private Random random; |
| |
| /** |
| * Constructor for {@link RandomTextDataGenerator} with default seed. |
| * @param size the total number of words to consider. |
| * @param wordSize Size of each word |
| */ |
| RandomTextDataGenerator(int size, int wordSize) { |
| this(size, DEFAULT_SEED , wordSize); |
| } |
| |
| /** |
| * Constructor for {@link RandomTextDataGenerator}. |
| * @param size the total number of words to consider. |
| * @param seed Random number generator seed for repeatability |
| * @param wordSize Size of each word |
| */ |
| RandomTextDataGenerator(int size, Long seed, int wordSize) { |
| random = new Random(seed); |
| words = new String[size]; |
| |
| //TODO change the default with the actual stats |
| //TODO do u need varied sized words? |
| for (int i = 0; i < size; ++i) { |
| words[i] = |
| RandomStringUtils.random(wordSize, 0, 0, true, false, null, random); |
| } |
| } |
| |
| /** |
| * Get the configured random text data generator's list size. |
| */ |
| static int getRandomTextDataGeneratorListSize(Configuration conf) { |
| return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, DEFAULT_LIST_SIZE); |
| } |
| |
| /** |
| * Set the random text data generator's list size. |
| */ |
| static void setRandomTextDataGeneratorListSize(Configuration conf, |
| int listSize) { |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Random text data generator is configured to use a dictionary " |
| + " with " + listSize + " words"); |
| } |
| conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize); |
| } |
| |
| /** |
| * Get the configured random text data generator word size. |
| */ |
| static int getRandomTextDataGeneratorWordSize(Configuration conf) { |
| return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, DEFAULT_WORD_SIZE); |
| } |
| |
| /** |
| * Set the random text data generator word size. |
| */ |
| static void setRandomTextDataGeneratorWordSize(Configuration conf, |
| int wordSize) { |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("Random text data generator is configured to use a dictionary " |
| + " with words of length " + wordSize); |
| } |
| conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize); |
| } |
| |
| /** |
| * Returns a randomly selected word from a list of random words. |
| */ |
| String getRandomWord() { |
| int index = random.nextInt(words.length); |
| return words[index]; |
| } |
| |
| /** |
| * This is mainly for testing. |
| */ |
| List<String> getRandomWords() { |
| return Arrays.asList(words); |
| } |
| } |