blob: 94f9baaf89b58d51a86ec03de165730a79442a43 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.examples.terasort;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.math.BigInteger;
import java.util.zip.Checksum;
import org.apache.hadoop.util.PureJavaCrc32;
/**
* A single process data generator for the terasort data. Based on gensort.c
* version 1.1 (3 Mar 2009) from Chris Nyberg <chris.nyberg@ordinal.com>.
*/
public class GenSort {
/**
* Generate a "binary" record suitable for all sort benchmarks *except*
* PennySort.
*/
static void generateRecord(byte[] recBuf, Unsigned16 rand,
Unsigned16 recordNumber) {
/* generate the 10-byte key using the high 10 bytes of the 128-bit
* random number
*/
for(int i=0; i < 10; ++i) {
recBuf[i] = rand.getByte(i);
}
/* add 2 bytes of "break" */
recBuf[10] = 0x00;
recBuf[11] = 0x11;
/* convert the 128-bit record number to 32 bits of ascii hexadecimal
* as the next 32 bytes of the record.
*/
for (int i = 0; i < 32; i++) {
recBuf[12 + i] = (byte) recordNumber.getHexDigit(i);
}
/* add 4 bytes of "break" data */
recBuf[44] = (byte) 0x88;
recBuf[45] = (byte) 0x99;
recBuf[46] = (byte) 0xAA;
recBuf[47] = (byte) 0xBB;
/* add 48 bytes of filler based on low 48 bits of random number */
for(int i=0; i < 12; ++i) {
recBuf[48+i*4] = recBuf[49+i*4] = recBuf[50+i*4] = recBuf[51+i*4] =
(byte) rand.getHexDigit(20 + i);
}
/* add 4 bytes of "break" data */
recBuf[96] = (byte) 0xCC;
recBuf[97] = (byte) 0xDD;
recBuf[98] = (byte) 0xEE;
recBuf[99] = (byte) 0xFF;
}
private static BigInteger makeBigInteger(long x) {
byte[] data = new byte[8];
for(int i=0; i < 8; ++i) {
data[i] = (byte) (x >>> (56 - 8*i));
}
return new BigInteger(1, data);
}
private static final BigInteger NINETY_FIVE = new BigInteger("95");
/**
* Generate an ascii record suitable for all sort benchmarks including
* PennySort.
*/
static void generateAsciiRecord(byte[] recBuf, Unsigned16 rand,
Unsigned16 recordNumber) {
/* generate the 10-byte ascii key using mostly the high 64 bits.
*/
long temp = rand.getHigh8();
if (temp < 0) {
// use biginteger to avoid the negative sign problem
BigInteger bigTemp = makeBigInteger(temp);
recBuf[0] = (byte) (' ' + (bigTemp.mod(NINETY_FIVE).longValue()));
temp = bigTemp.divide(NINETY_FIVE).longValue();
} else {
recBuf[0] = (byte) (' ' + (temp % 95));
temp /= 95;
}
for(int i=1; i < 8; ++i) {
recBuf[i] = (byte) (' ' + (temp % 95));
temp /= 95;
}
temp = rand.getLow8();
if (temp < 0) {
BigInteger bigTemp = makeBigInteger(temp);
recBuf[8] = (byte) (' ' + (bigTemp.mod(NINETY_FIVE).longValue()));
temp = bigTemp.divide(NINETY_FIVE).longValue();
} else {
recBuf[8] = (byte) (' ' + (temp % 95));
temp /= 95;
}
recBuf[9] = (byte)(' ' + (temp % 95));
/* add 2 bytes of "break" */
recBuf[10] = ' ';
recBuf[11] = ' ';
/* convert the 128-bit record number to 32 bits of ascii hexadecimal
* as the next 32 bytes of the record.
*/
for (int i = 0; i < 32; i++) {
recBuf[12 + i] = (byte) recordNumber.getHexDigit(i);
}
/* add 2 bytes of "break" data */
recBuf[44] = ' ';
recBuf[45] = ' ';
/* add 52 bytes of filler based on low 48 bits of random number */
for(int i=0; i < 13; ++i) {
recBuf[46+i*4] = recBuf[47+i*4] = recBuf[48+i*4] = recBuf[49+i*4] =
(byte) rand.getHexDigit(19 + i);
}
/* add 2 bytes of "break" data */
recBuf[98] = '\r'; /* nice for Windows */
recBuf[99] = '\n';
}
private static void usage() {
PrintStream out = System.out;
out.println("usage: gensort [-a] [-c] [-bSTARTING_REC_NUM] NUM_RECS FILE_NAME");
out.println("-a Generate ascii records required for PennySort or JouleSort.");
out.println(" These records are also an alternative input for the other");
out.println(" sort benchmarks. Without this flag, binary records will be");
out.println(" generated that contain the highest density of randomness in");
out.println(" the 10-byte key.");
out.println( "-c Calculate the sum of the crc32 checksums of each of the");
out.println(" generated records and send it to standard error.");
out.println("-bN Set the beginning record generated to N. By default the");
out.println(" first record generated is record 0.");
out.println("NUM_RECS The number of sequential records to generate.");
out.println("FILE_NAME The name of the file to write the records to.\n");
out.println("Example 1 - to generate 1000000 ascii records starting at record 0 to");
out.println("the file named \"pennyinput\":");
out.println(" gensort -a 1000000 pennyinput\n");
out.println("Example 2 - to generate 1000 binary records beginning with record 2000");
out.println("to the file named \"partition2\":");
out.println(" gensort -b2000 1000 partition2");
System.exit(1);
}
public static void outputRecords(OutputStream out,
boolean useAscii,
Unsigned16 firstRecordNumber,
Unsigned16 recordsToGenerate,
Unsigned16 checksum
) throws IOException {
byte[] row = new byte[100];
Unsigned16 recordNumber = new Unsigned16(firstRecordNumber);
Unsigned16 lastRecordNumber = new Unsigned16(firstRecordNumber);
Checksum crc = new PureJavaCrc32();
Unsigned16 tmp = new Unsigned16();
lastRecordNumber.add(recordsToGenerate);
Unsigned16 ONE = new Unsigned16(1);
Unsigned16 rand = Random16.skipAhead(firstRecordNumber);
while (!recordNumber.equals(lastRecordNumber)) {
Random16.nextRand(rand);
if (useAscii) {
generateAsciiRecord(row, rand, recordNumber);
} else {
generateRecord(row, rand, recordNumber);
}
if (checksum != null) {
crc.reset();
crc.update(row, 0, row.length);
tmp.set(crc.getValue());
checksum.add(tmp);
}
recordNumber.add(ONE);
out.write(row);
}
}
public static void main(String[] args) throws Exception {
Unsigned16 startingRecord = new Unsigned16();
Unsigned16 numberOfRecords;
OutputStream out;
boolean useAscii = false;
Unsigned16 checksum = null;
int i;
for(i=0; i < args.length; ++i) {
String arg = args[i];
int argLength = arg.length();
if (argLength >= 1 && arg.charAt(0) == '-') {
if (argLength < 2) {
usage();
}
switch (arg.charAt(1)) {
case 'a':
useAscii = true;
break;
case 'b':
startingRecord = Unsigned16.fromDecimal(arg.substring(2));
break;
case 'c':
checksum = new Unsigned16();
break;
default:
usage();
}
} else {
break;
}
}
if (args.length - i != 2) {
usage();
}
numberOfRecords = Unsigned16.fromDecimal(args[i]);
out = new FileOutputStream(args[i+1]);
outputRecords(out, useAscii, startingRecord, numberOfRecords, checksum);
out.close();
if (checksum != null) {
System.out.println(checksum);
}
}
}