| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.fs.loadGenerator; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.PrintStream; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Random; |
| |
| import org.apache.hadoop.util.ToolRunner; |
| |
| /** |
| * This program generates a random namespace structure with the following |
| * constraints: |
| * 1. The number of subdirectories is a random number in [minWidth, maxWidth]. |
| * 2. The maximum depth of each subdirectory is a random number |
| * [2*maxDepth/3, maxDepth]. |
| * 3. Files are randomly placed in the empty directories. The size of each |
| * file follows Gaussian distribution. |
| * The generated namespace structure is described by two files in the output |
| * directory. Each line of the first file |
| * contains the full name of a leaf directory. |
| * Each line of the second file contains |
| * the full name of a file and its size, separated by a blank. |
| * |
| * The synopsis of the command is |
| * java StructureGenerator |
| -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5. |
| -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1 |
| -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5 |
| -numOfFiles <#OfFiles> : the total number of files; default is 10. |
| -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1. |
| -outDir <outDir>: output directory; default is the current directory. |
| -seed <seed>: random number generator seed; default is the current time. |
| */ |
| public class StructureGenerator { |
| private int maxDepth = 5; |
| private int minWidth = 1; |
| private int maxWidth = 5; |
| private int numOfFiles = 10; |
| private double avgFileSize = 1; |
| private File outDir = DEFAULT_STRUCTURE_DIRECTORY; |
| final static private String USAGE = "java StructureGenerator\n" + |
| "-maxDepth <maxDepth>\n" + |
| "-minWidth <minWidth>\n" + |
| "-maxWidth <maxWidth>\n" + |
| "-numOfFiles <#OfFiles>\n" + |
| "-avgFileSize <avgFileSizeInBlocks>\n" + |
| "-outDir <outDir>\n" + |
| "-seed <seed>"; |
| |
| private Random r = null; |
| |
| /** Default directory for storing file/directory structure */ |
| final static File DEFAULT_STRUCTURE_DIRECTORY = new File("."); |
| /** The name of the file for storing directory structure */ |
| final static String DIR_STRUCTURE_FILE_NAME = "dirStructure"; |
| /** The name of the file for storing file structure */ |
| final static String FILE_STRUCTURE_FILE_NAME = "fileStructure"; |
| /** The name prefix for the files created by this program */ |
| final static String FILE_NAME_PREFIX = "_file_"; |
| |
| /** |
| * The main function first parses the command line arguments, |
| * then generates in-memory directory structure and outputs to a file, |
| * last generates in-memory files and outputs them to a file. |
| */ |
| public int run(String[] args) throws Exception { |
| int exitCode = 0; |
| exitCode = init(args); |
| if (exitCode != 0) { |
| return exitCode; |
| } |
| genDirStructure(); |
| output(new File(outDir, DIR_STRUCTURE_FILE_NAME)); |
| genFileStructure(); |
| outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME)); |
| return exitCode; |
| } |
| |
| /** Parse the command line arguments and initialize the data */ |
| private int init(String[] args) { |
| try { |
| for (int i = 0; i < args.length; i++) { // parse command line |
| if (args[i].equals("-maxDepth")) { |
| maxDepth = Integer.parseInt(args[++i]); |
| if (maxDepth<1) { |
| System.err.println("maxDepth must be positive: " + maxDepth); |
| return -1; |
| } |
| } else if (args[i].equals("-minWidth")) { |
| minWidth = Integer.parseInt(args[++i]); |
| if (minWidth<0) { |
| System.err.println("minWidth must be positive: " + minWidth); |
| return -1; |
| } |
| } else if (args[i].equals("-maxWidth")) { |
| maxWidth = Integer.parseInt(args[++i]); |
| } else if (args[i].equals("-numOfFiles")) { |
| numOfFiles = Integer.parseInt(args[++i]); |
| if (numOfFiles<1) { |
| System.err.println("NumOfFiles must be positive: " + numOfFiles); |
| return -1; |
| } |
| } else if (args[i].equals("-avgFileSize")) { |
| avgFileSize = Double.parseDouble(args[++i]); |
| if (avgFileSize<=0) { |
| System.err.println("AvgFileSize must be positive: " + avgFileSize); |
| return -1; |
| } |
| } else if (args[i].equals("-outDir")) { |
| outDir = new File(args[++i]); |
| } else if (args[i].equals("-seed")) { |
| r = new Random(Long.parseLong(args[++i])); |
| } else { |
| System.err.println(USAGE); |
| ToolRunner.printGenericCommandUsage(System.err); |
| return -1; |
| } |
| } |
| } catch (NumberFormatException e) { |
| System.err.println("Illegal parameter: " + e.getLocalizedMessage()); |
| System.err.println(USAGE); |
| return -1; |
| } |
| |
| if (maxWidth < minWidth) { |
| System.err.println( |
| "maxWidth must be bigger than minWidth: " + maxWidth); |
| return -1; |
| } |
| |
| if (r==null) { |
| r = new Random(); |
| } |
| return 0; |
| } |
| |
| /** In memory representation of a directory */ |
| private static class INode { |
| private String name; |
| private List<INode> children = new ArrayList<INode>(); |
| |
| /** Constructor */ |
| private INode(String name) { |
| this.name = name; |
| } |
| |
| /** Add a child (subdir/file) */ |
| private void addChild(INode child) { |
| children.add(child); |
| } |
| |
| /** Output the subtree rooted at the current node. |
| * Only the leaves are printed. |
| */ |
| private void output(PrintStream out, String prefix) { |
| prefix = prefix==null?name:prefix+"/"+name; |
| if (children.isEmpty()) { |
| out.println(prefix); |
| } else { |
| for (INode child : children) { |
| child.output(out, prefix); |
| } |
| } |
| } |
| |
| /** Output the files in the subtree rooted at this node */ |
| protected void outputFiles(PrintStream out, String prefix) { |
| prefix = prefix==null?name:prefix+"/"+name; |
| for (INode child : children) { |
| child.outputFiles(out, prefix); |
| } |
| } |
| |
| /** Add all the leaves in the subtree to the input list */ |
| private void getLeaves(List<INode> leaves) { |
| if (children.isEmpty()) { |
| leaves.add(this); |
| } else { |
| for (INode child : children) { |
| child.getLeaves(leaves); |
| } |
| } |
| } |
| } |
| |
| /** In memory representation of a file */ |
| private static class FileINode extends INode { |
| private double numOfBlocks; |
| |
| /** constructor */ |
| private FileINode(String name, double numOfBlocks) { |
| super(name); |
| this.numOfBlocks = numOfBlocks; |
| } |
| |
| /** Output a file attribute */ |
| protected void outputFiles(PrintStream out, String prefix) { |
| prefix = (prefix == null)?super.name: prefix + "/"+super.name; |
| out.println(prefix + " " + numOfBlocks); |
| } |
| } |
| |
| private INode root; |
| |
| /** Generates a directory tree with a max depth of <code>maxDepth</code> */ |
| private void genDirStructure() { |
| root = genDirStructure("", maxDepth); |
| } |
| |
| /** Generate a directory tree rooted at <code>rootName</code> |
| * The number of subtree is in the range of [minWidth, maxWidth]. |
| * The maximum depth of each subtree is in the range of |
| * [2*maxDepth/3, maxDepth]. |
| */ |
| private INode genDirStructure(String rootName, int maxDepth) { |
| INode root = new INode(rootName); |
| |
| if (maxDepth>0) { |
| maxDepth--; |
| int minDepth = maxDepth*2/3; |
| // Figure out the number of subdirectories to generate |
| int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1); |
| // Expand the tree |
| for (int i=0; i<numOfSubDirs; i++) { |
| int childDepth = (maxDepth == 0)?0: |
| (r.nextInt(maxDepth-minDepth+1)+minDepth); |
| INode child = genDirStructure("dir"+i, childDepth); |
| root.addChild(child); |
| } |
| } |
| return root; |
| } |
| |
| /** Collects leaf nodes in the tree */ |
| private List<INode> getLeaves() { |
| List<INode> leaveDirs = new ArrayList<INode>(); |
| root.getLeaves(leaveDirs); |
| return leaveDirs; |
| } |
| |
| /** Decides where to place all the files and its length. |
| * It first collects all empty directories in the tree. |
| * For each file, it randomly chooses an empty directory to place the file. |
| * The file's length is generated using Gaussian distribution. |
| */ |
| private void genFileStructure() { |
| List<INode> leaves = getLeaves(); |
| int totalLeaves = leaves.size(); |
| for (int i=0; i<numOfFiles; i++) { |
| int leaveNum = r.nextInt(totalLeaves); |
| double fileSize; |
| do { |
| fileSize = r.nextGaussian()+avgFileSize; |
| } while (fileSize<0); |
| leaves.get(leaveNum).addChild( |
| new FileINode(FILE_NAME_PREFIX+i, fileSize)); |
| } |
| } |
| |
| /** Output directory structure to a file, each line of the file |
| * contains the directory name. Only empty directory names are printed. */ |
| private void output(File outFile) throws FileNotFoundException { |
| System.out.println("Printing to " + outFile.toString()); |
| PrintStream out = new PrintStream(outFile); |
| root.output(out, null); |
| out.close(); |
| } |
| |
| /** Output all files' attributes to a file, each line of the output file |
| * contains a file name and its length. */ |
| private void outputFiles(File outFile) throws FileNotFoundException { |
| System.out.println("Printing to " + outFile.toString()); |
| PrintStream out = new PrintStream(outFile); |
| root.outputFiles(out, null); |
| out.close(); |
| } |
| |
| /** |
| * Main program |
| * @param args Command line arguments |
| * @throws Exception |
| */ |
| public static void main(String[] args) throws Exception { |
| StructureGenerator sg = new StructureGenerator(); |
| System.exit(sg.run(args)); |
| } |
| } |