blob: a2e8d9eb37ae920808238afc384d74cc52922df1 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.tools.generate;
import java.util.Properties;
import java.util.Random;
import java.io.FileWriter;
import java.io.PrintWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.ql.io.RCFile;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
/**
* Generate RCFile test data
*
*/
public class RCFileGenerator {
private static Configuration conf = new Configuration();
private static Path basedir;
private static FileSystem fs;
private static Properties tbl;
private static Random rand;
private static Path getFile(String filename) throws Exception {
return new Path(basedir, filename);
}
private static String[] firstName = {"alice", "bob", "calvin", "david",
"ethan", "fred", "gabriella", "holly", "irene", "jessica", "katie",
"luke", "mike", "nick", "oscar", "priscilla", "quinn", "rachel",
"sarah", "tom", "ulysses", "victor", "wendy", "xavier", "yuri",
"zach"};
private static String[] lastName = {"allen", "brown", "carson",
"davidson", "ellison", "falkner", "garcia", "hernandez", "ichabod",
"johnson", "king", "laertes", "miller", "nixon", "ovid", "polk",
"quirinius", "robinson", "steinbeck", "thompson", "underhill",
"van buren", "white", "xylophone", "young", "zipper"};
private static String randomName() {
StringBuffer buf =
new StringBuffer(firstName[rand.nextInt(firstName.length)]);
buf.append(' ');
buf.append(lastName[rand.nextInt(lastName.length)]);
return buf.toString();
}
private static int randomAge() {
return rand.nextInt(60) + 18;
}
private static double randomGpa() {
return 4 * rand.nextFloat();
}
private static String[] registration = {"democrat", "green",
"independent", "libertarian", "republican", "socialist"};
private static String randomRegistration() {
return registration[rand.nextInt(registration.length)];
}
private static double randomContribution() {
return rand.nextFloat() * 1000;
}
private static byte[] randomMap() throws Exception {
int len = rand.nextInt(5) + 1;
StringBuffer buf = new StringBuffer();
for (int i = 0; i < len; i++) {
if (i != 0) buf.append('\u0002');
buf.append(firstName[rand.nextInt(26)]);
buf.append('\u0003');
buf.append(lastName[rand.nextInt(26)]);
}
return buf.toString().getBytes("UTF-8");
}
private static byte[] randomArray() throws Exception {
int len = rand.nextInt(5) + 1;
StringBuffer buf = new StringBuffer();
for (int i = 0; i < len; i++) {
if (i != 0) buf.append('\u0002');
buf.append(Integer.valueOf(randomAge()).toString());
buf.append('\u0003');
buf.append(randomName());
}
return buf.toString().getBytes("UTF-8");
}
private static void usage() {
System.err.println("Usage: rcfilegen format number_of_rows " +
"output_file plain_output_file");
System.err.println(" format one of: student voter alltypes");
System.exit(1);
}
public static void main(String[] args) throws Exception {
if (args.length != 4) usage();
String format = args[0];
int numRows = Integer.valueOf(args[1]);
if (numRows < 1) usage();
String output = args[2];
String plainOutput = args[3];
fs = FileSystem.getLocal(conf);
basedir = new Path(".");
genData(format, numRows, output, plainOutput);
}
private static void genData(String format,
int numRows,
String output, String plainOutput) throws Exception {
int numFields = 0;
if (format.equals("student")) {
rand = new Random(numRows);
numFields = 3;
} else if (format.equals("voter")) {
rand = new Random(1000000000 + numRows);
numFields = 4;
} else if (format.equals("alltypes")) {
rand = new Random(2000000000L + numRows);
numFields = 10;
}
RCFileOutputFormat.setColumnNumber(conf, numFields);
RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output),
null, new DefaultCodec());
PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
for (int j = 0; j < numRows; j++) {
BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
byte[][] fields = null;
if (format.equals("student")) {
byte[][] f = {
randomName().getBytes("UTF-8"),
Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
Double.valueOf(randomGpa()).toString().getBytes("UTF-8")
};
fields = f;
} else if (format.equals("voter")) {
byte[][] f = {
randomName().getBytes("UTF-8"),
Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
randomRegistration().getBytes("UTF-8"),
Double.valueOf(randomContribution()).toString().getBytes("UTF-8")
};
fields = f;
} else if (format.equals("alltypes")) {
byte[][] f = {
Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"),
Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"),
Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"),
Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"),
Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"),
Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"),
randomName().getBytes("UTF-8"),
randomMap(),
randomArray()
};
fields = f;
}
for (int i = 0; i < fields.length; i++) {
BytesRefWritable field = new BytesRefWritable(fields[i], 0,
fields[i].length);
row.set(i, field);
pw.print(new String(fields[i]));
if (i != fields.length - 1)
pw.print("\t");
else
pw.println();
}
writer.append(row);
}
writer.close();
pw.close();
}
}