blob: 8ec8d10c431f7861db3214f9ae1841efb7d2d178 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import org.junit.Assert;
import org.junit.Test;
/**
* Unit test to verify that the pure-Java CRC32 algorithm gives
* the same results as the built-in implementation.
*/
public class TestPureJavaCrc32 {
private final CRC32 theirs = new CRC32();
private final PureJavaCrc32 ours = new PureJavaCrc32();
@Test
public void testCorrectness() throws Exception {
checkSame();
theirs.update(104);
ours.update(104);
checkSame();
checkOnBytes(new byte[] {40, 60, 97, -70}, false);
checkOnBytes("hello world!".getBytes("UTF-8"), false);
for (int i = 0; i < 10000; i++) {
byte randomBytes[] = new byte[new Random().nextInt(2048)];
new Random().nextBytes(randomBytes);
checkOnBytes(randomBytes, false);
}
}
private void checkOnBytes(byte[] bytes, boolean print) {
theirs.reset();
ours.reset();
checkSame();
for (int i = 0; i < bytes.length; i++) {
ours.update(bytes[i]);
theirs.update(bytes[i]);
checkSame();
}
if (print) {
System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
+ "\nours:\t" + Long.toHexString(ours.getValue()));
}
theirs.reset();
ours.reset();
ours.update(bytes, 0, bytes.length);
theirs.update(bytes, 0, bytes.length);
if (print) {
System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
+ "\nours:\t" + Long.toHexString(ours.getValue()));
}
checkSame();
if (bytes.length >= 10) {
ours.update(bytes, 5, 5);
theirs.update(bytes, 5, 5);
checkSame();
}
}
private void checkSame() {
Assert.assertEquals(theirs.getValue(), ours.getValue());
}
/**
* Generate a table to perform checksums based on the same CRC-32 polynomial
* that java.util.zip.CRC32 uses.
*/
public static class Table {
private final int[][] tables;
private Table(final int nBits, final int nTables,
long polynomial) {
tables = new int[nTables][];
final int size = 1 << nBits;
for(int i = 0; i < tables.length; i++) {
tables[i] = new int[size];
}
//compute the first table
final int[] first = tables[0];
for (int i = 0; i < first.length; i++) {
int crc = i;
for (int j = 0; j < nBits; j++) {
if ((crc & 1) == 1) {
crc >>>= 1;
crc ^= polynomial;
} else {
crc >>>= 1;
}
}
first[i] = crc;
}
//compute the remaining tables
final int mask = first.length - 1;
for(int j = 1; j < tables.length; j++) {
final int[] previous = tables[j-1];
final int[] current = tables[j];
for (int i = 0; i < current.length; i++) {
current[i] = (previous[i] >>> nBits) ^ first[previous[i] & mask];
}
}
}
String[] toStrings(String nameformat) {
final String[] s = new String[tables.length];
for (int j = 0; j < tables.length; j++) {
final int[] t = tables[j];
final StringBuilder b = new StringBuilder();
b.append(String.format(" static final int[] " + nameformat
+ " = new int[] {", j));
for (int i = 0; i < t.length;) {
b.append("\n ");
for(int k = 0; k < 4; k++) {
b.append(String.format("0x%08X, ", t[i++]));
}
}
b.setCharAt(b.length() - 2, '\n');
s[j] = b.toString() + " };\n";
}
return s;
}
/** {@inheritDoc} */
public String toString() {
final StringBuilder b = new StringBuilder();
for(String s : toStrings(String.format("T%d_",
Integer.numberOfTrailingZeros(tables[0].length)) + "%d")) {
b.append(s);
}
return b.toString();
}
/** Generate CRC-32 lookup tables */
public static void main(String[] args) throws FileNotFoundException {
if (args.length != 1) {
System.err.println("Usage: " + Table.class.getName() +
" <polynomial>");
System.exit(1);
}
long polynomial = Long.parseLong(args[0], 16);
int i = 8;
final PrintStream out = new PrintStream(
new FileOutputStream("table" + i + ".txt"), true);
final Table t = new Table(i, 16, polynomial);
final String s = t.toString();
System.out.println(s);
out.println(s);
}
}
/**
* Performance tests to compare performance of the Pure Java implementation
* to the built-in java.util.zip implementation. This can be run from the
* command line with:
*
* java -cp path/to/test/classes:path/to/common/classes \
* 'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest'
*
* The output is in JIRA table format.
*/
public static class PerformanceTest {
public static final int MAX_LEN = 32*1024*1024; // up to 32MB chunks
public static final int BYTES_PER_SIZE = MAX_LEN * 4;
static final Checksum zip = new CRC32();
static final Checksum[] CRCS = {new PureJavaCrc32()};
public static void main(String args[]) {
printSystemProperties(System.out);
doBench(CRCS, System.out);
}
private static void printCell(String s, int width, PrintStream out) {
final int w = s.length() > width? s.length(): width;
out.printf(" %" + w + "s |", s);
}
private static void doBench(final Checksum[] crcs, final PrintStream out) {
final ArrayList<Checksum> a = new ArrayList<Checksum>();
a.add(zip);
for (Checksum c : crcs)
if(c.getClass() != zip.getClass())
a.add(c);
doBench(a, out);
}
private static void doBench(final List<Checksum> crcs, final PrintStream out
) {
final byte[] bytes = new byte[MAX_LEN];
new Random().nextBytes(bytes);
// Print header
out.printf("\nPerformance Table (The unit is MB/sec)\n||");
final String title = "Num Bytes";
printCell("Num Bytes", 0, out);
for (Checksum c : crcs) {
out.printf("|");
printCell(c.getClass().getSimpleName(), 8, out);
}
out.printf("|\n");
// Warm up implementations to get jit going.
for (Checksum c : crcs) {
doBench(c, bytes, 2, null);
doBench(c, bytes, 2101, null);
}
// Test on a variety of sizes
for (int size = 1; size < MAX_LEN; size *= 2) {
out.printf("|");
printCell(String.valueOf(size), title.length()+1, out);
Long expected = null;
for(Checksum c : crcs) {
System.gc();
final long result = doBench(c, bytes, size, out);
if(c.getClass() == zip.getClass()) {
expected = result;
} else if (result != expected) {
throw new RuntimeException(c.getClass() + " has bugs!");
}
}
out.printf("\n");
}
}
private static long doBench(Checksum crc, byte[] bytes, int size,
PrintStream out) {
final String name = crc.getClass().getSimpleName();
final int trials = BYTES_PER_SIZE / size;
final long st = System.nanoTime();
crc.reset();
for (int i = 0; i < trials; i++) {
crc.update(bytes, 0, size);
}
final long result = crc.getValue();
final long et = System.nanoTime();
double mbProcessed = trials * size / 1024.0 / 1024.0;
double secsElapsed = (et - st) / 1000000000.0d;
if (out != null) {
final String s = String.format("%9.3f", mbProcessed/secsElapsed);
printCell(s, name.length()+1, out);
}
return result;
}
private static void printSystemProperties(PrintStream out) {
final String[] names = {
"java.version",
"java.runtime.name",
"java.runtime.version",
"java.vm.version",
"java.vm.vendor",
"java.vm.name",
"java.vm.specification.version",
"java.specification.version",
"os.arch",
"os.name",
"os.version"
};
final Properties p = System.getProperties();
for(String n : names) {
out.println(n + " = " + p.getProperty(n));
}
}
}
}