MAPREDUCE-1970. Reed-Solomon code implementation for HDFS RAID.
(Scott Chen via dhruba)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@1027822 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 593cb18..248a9a8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -18,6 +18,9 @@
MAPREDUCE-220. Collect cpu and memory statistics per task. (Scott Chen via
acmurthy)
+ MAPREDUCE-1970. Reed-Solomon code implementation for HDFS RAID.
+ (Scott Chen via dhruba)
+
IMPROVEMENTS
MAPREDUCE-2140. Regenerate fair scheduler design doc PDF. (matei)
diff --git a/src/contrib/raid/src/java/org/apache/hadoop/raid/ErasureCode.java b/src/contrib/raid/src/java/org/apache/hadoop/raid/ErasureCode.java
new file mode 100644
index 0000000..0747394
--- /dev/null
+++ b/src/contrib/raid/src/java/org/apache/hadoop/raid/ErasureCode.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.raid;
+
+public interface ErasureCode {
+ /**
+ * Encodes the given message.
+ * @param message The data of the message. The data is present in the least
+ * significant bits of each int. The number of data bits is
+ * symbolSize(). The number of elements of message is
+ * stripeSize().
+ * @param parity (out) The information is present in the least
+ * significant bits of each int. The number of parity bits is
+ * symbolSize(). The number of elements in the code is
+ * paritySize().
+ */
+ public void encode(int[] message, int[] parity);
+
+ /**
+ * Generates missing portions of data.
+ * @param data The message and parity. The parity should be placed in the
+ * first part of the array. In each integer, the relevant portion
+ * is present in the least significant bits of each int.
+ * The number of elements in data is stripeSize() + paritySize().
+ * @param erasedLocations The indexes in data which are not available.
+ * @param erasedValues (out)The decoded values corresponding to erasedLocations.
+ */
+ public void decode(int[] data, int[] erasedLocations, int[] erasedValues);
+
+ /**
+ * The number of elements in the message.
+ */
+ public int stripeSize();
+
+ /**
+ * The number of elements in the code.
+ */
+ public int paritySize();
+
+ /**
+ * Number of bits for each symbol.
+ */
+ public int symbolSize();
+}
diff --git a/src/contrib/raid/src/java/org/apache/hadoop/raid/GaloisField.java b/src/contrib/raid/src/java/org/apache/hadoop/raid/GaloisField.java
new file mode 100644
index 0000000..8c71545
--- /dev/null
+++ b/src/contrib/raid/src/java/org/apache/hadoop/raid/GaloisField.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.raid;
+
+/**
+ * Implementation of Galois field arithmetics with 2^p elements.
+ * The input must be unsigned integers.
+ */
+public class GaloisField {
+
+ private final int[] logTable;
+ private final int[] powTable;
+ private final int[][] mulTable;
+ private final int[][] divTable;
+ private final int fieldSize;
+ private final int primitivePeriod;
+ private final int primitivePolynomial;
+
+ // Field size 256 is good for byte based system
+ private static final int DEFAULT_FIELD_SIZE = 256;
+ // primitive polynomial 1 + X + X^2 + X^4 + X^8
+ private static final int DEFAULT_PRIMITIVE_POLYNOMIAL = 285;
+
+ /**
+ * An object can perform Galois field arithmetics
+ * @param fieldSize size of the field
+ * @param primitivePolynomial a primitive polynomial corresponds to the size
+ */
+ public GaloisField(int fieldSize, int primitivePolynomial) {
+ this.fieldSize = fieldSize;
+ this.primitivePeriod = fieldSize - 1;
+ this.primitivePolynomial = primitivePolynomial;
+ logTable = new int[fieldSize];
+ powTable = new int[fieldSize];
+ mulTable = new int[fieldSize][fieldSize];
+ divTable = new int[fieldSize][fieldSize];
+ int value = 1;
+ for (int pow = 0; pow < fieldSize - 1; pow++) {
+ powTable[pow] = value;
+ logTable[value] = pow;
+ value = value * 2;
+ if (value >= fieldSize) {
+ value = value ^ primitivePolynomial;
+ }
+ }
+ // building multiplication table
+ for (int i = 0; i < fieldSize; i++) {
+ for (int j = 0; j < fieldSize; j++) {
+ if (i == 0 || j == 0) {
+ mulTable[i][j] = 0;
+ continue;
+ }
+ int z = logTable[i] + logTable[j];
+ z = z >= primitivePeriod ? z - primitivePeriod : z;
+ z = powTable[z];
+ mulTable[i][j] = z;
+ }
+ }
+ // building division table
+ for (int i = 0; i < fieldSize; i++) {
+ for (int j = 1; j < fieldSize; j++) {
+ if (i == 0) {
+ divTable[i][j] = 0;
+ continue;
+ }
+ int z = logTable[i] - logTable[j];
+ z = z < 0 ? z + primitivePeriod : z;
+ z = powTable[z];
+ divTable[i][j] = z;
+ }
+ }
+ }
+
+ /**
+ * An object can perform Galois field arithmetics
+ */
+ public GaloisField() {
+ this(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL);
+ }
+
+ /**
+ * Return number of elements in the field
+ * @return number of elements in the field
+ */
+ public int getFieldSize() {
+ return fieldSize;
+ }
+
+ /**
+ * Return the primitive polynomial in GF(2)
+ * @return primitive polynomial as a integer
+ */
+ public int getPrimitivePolynomial() {
+ return primitivePolynomial;
+ }
+
+ /**
+ * Compute the sum of two fields
+ * @param x input field
+ * @param y input field
+ * @return result of addition
+ */
+ public int add(int x, int y) {
+ assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
+ return x ^ y;
+ }
+
+ /**
+ * Compute the multiplication of two fields
+ * @param x input field
+ * @param y input field
+ * @return result of multiplication
+ */
+ public int multiply(int x, int y) {
+ assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
+ return mulTable[x][y];
+ }
+
+ /**
+ * Compute the division of two fields
+ * @param x input field
+ * @param y input field
+ * @return x/y
+ */
+ public int divide(int x, int y) {
+ assert(x >= 0 && x < getFieldSize() && y > 0 && y < getFieldSize());
+ return divTable[x][y];
+ }
+
+ /**
+ * Compute power n of a field
+ * @param x input field
+ * @param n power
+ * @return x^n
+ */
+ public int power(int x, int n) {
+ assert(x >= 0 && x < getFieldSize());
+ if (n == 0) {
+ return 1;
+ }
+ if (x == 0) {
+ return 0;
+ }
+ x = logTable[x] * n;
+ if (x < primitivePeriod) {
+ return powTable[x];
+ }
+ x = x % primitivePeriod;
+ return powTable[x];
+ }
+
+ /**
+ * Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
+ * that Vz=y. The output z will be placed in y.
+ * @param x the vector which describe the Vandermonde matrix
+ * @param y right-hand side of the Vandermonde system equation.
+ * will be replaced the output in this vector
+ */
+ public void solveVandermondeSystem(int[] x, int[] y) {
+ solveVandermondeSystem(x, y, x.length);
+ }
+
+ /**
+ * Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
+ * that Vz=y. The output z will be placed in y.
+ * @param x the vector which describe the Vandermonde matrix
+ * @param y right-hand side of the Vandermonde system equation.
+ * will be replaced the output in this vector
+ * @param len consider x and y only from 0...len-1
+ */
+ public void solveVandermondeSystem(int[] x, int[] y, int len) {
+ assert(x.length <= len && y.length <= len);
+ for (int i = 0; i < len - 1; i++) {
+ for (int j = len - 1; j > i; j--) {
+ y[j] = y[j] ^ mulTable[x[i]][y[j - 1]];
+ }
+ }
+ for (int i = len - 1; i >= 0; i--) {
+ for (int j = i + 1; j < len; j++) {
+ y[j] = divTable[y[j]][x[j] ^ x[j - i - 1]];
+ }
+ for (int j = i; j < len - 1; j++) {
+ y[j] = y[j] ^ y[j + 1];
+ }
+ }
+ }
+
+ /**
+ * Compute the multiplication of two polynomials. The index in the
+ * array corresponds to the power of the entry. For example p[0] is the
+ * constant term of the polynomial p.
+ * @param p input polynomial
+ * @param q input polynomial
+ * @return polynomial represents p*q
+ */
+ public int[] multiply(int[] p, int[] q) {
+ int len = p.length + q.length - 1;
+ int[] result = new int[len];
+ for (int i = 0; i < len; i++) {
+ result[i] = 0;
+ }
+ for (int i = 0; i < p.length; i++) {
+ for (int j = 0; j < q.length; j++) {
+ result[i + j] = add(result[i + j], multiply(p[i], q[j]));
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Compute the remainder of a dividend and divisor pair. The index in the
+ * array corresponds to the power of the entry. For example p[0] is the
+ * constant term of the polynomial p.
+ * @param dividend dividend polynomial, the remainder will be placed here when return
+ * @param divisor divisor polynomial
+ */
+ public void remainder(int[] dividend, int[] divisor) {
+ for (int i = dividend.length - divisor.length; i >= 0; i--) {
+ int ratio =
+ divTable[dividend[i + divisor.length - 1]][divisor[divisor.length - 1]];
+ for (int j = 0; j < divisor.length; j++) {
+ int k = j + i;
+ dividend[k] = dividend[k] ^ mulTable[ratio][divisor[j]];
+ }
+ }
+ }
+
+ /**
+ * Compute the sum of two polynomials. The index in the
+ * array corresponds to the power of the entry. For example p[0] is the
+ * constant term of the polynomial p.
+ * @param p input polynomial
+ * @param q input polynomial
+ * @return polynomial represents p+q
+ */
+ public int[] add(int[] p, int[] q) {
+ int len = Math.max(p.length, q.length);
+ int[] result = new int[len];
+ for (int i = 0; i < len; i++) {
+ if (i < p.length && i < q.length) {
+ result[i] = add(p[i], q[i]);
+ } else if (i < p.length) {
+ result[i] = p[i];
+ } else {
+ result[i] = q[i];
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Substitute x into polynomial p(x).
+ * @param p input polynomial
+ * @param x input field
+ * @return p(x)
+ */
+ public int substitute(int[] p, int x) {
+ int result = 0;
+ int y = 1;
+ for (int i = 0; i < p.length; i++) {
+ result = result ^ mulTable[p[i]][y];
+ y = mulTable[x][y];
+ }
+ return result;
+ }
+}
diff --git a/src/contrib/raid/src/java/org/apache/hadoop/raid/ReedSolomonCode.java b/src/contrib/raid/src/java/org/apache/hadoop/raid/ReedSolomonCode.java
new file mode 100644
index 0000000..88d0b51
--- /dev/null
+++ b/src/contrib/raid/src/java/org/apache/hadoop/raid/ReedSolomonCode.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.raid;
+
+public class ReedSolomonCode implements ErasureCode {
+
+ private final int stripeSize;
+ private final int paritySize;
+ private final int[] generatingRoots;
+ private final int[] generatingPolynomial;
+ private final int PRIMITIVE_ROOT = 2;
+ private final GaloisField GF = new GaloisField();
+ private int[] errSignature;
+ private final int[] paritySymbolLocations;
+ private final int[] dataBuff;
+
+ public ReedSolomonCode(int stripeSize, int paritySize) {
+ assert(stripeSize + paritySize < GF.getFieldSize());
+ this.stripeSize = stripeSize;
+ this.paritySize = paritySize;
+ this.errSignature = new int[paritySize];
+ this.paritySymbolLocations = new int[paritySize];
+ this.dataBuff = new int[paritySize + stripeSize];
+ for (int i = 0; i < paritySize; i++) {
+ paritySymbolLocations[i] = i;
+ }
+ this.generatingRoots = new int[paritySize];
+
+ // compute generating polynomial
+ int[] gen = {1};
+ int[] poly = new int[2];
+ for (int i = 0; i < paritySize; i++) {
+ generatingRoots[i] = GF.power(PRIMITIVE_ROOT, i);
+ poly[0] = generatingRoots[i];
+ poly[1] = 1;
+ gen = GF.multiply(gen, poly);
+ }
+ // generating polynomial has all generating roots
+ generatingPolynomial = gen;
+ }
+
+ @Override
+ public void encode(int[] message, int[] parity) {
+ assert(message.length == stripeSize && parity.length == paritySize);
+ for (int i = 0; i < paritySize; i++) {
+ dataBuff[i] = 0;
+ }
+ for (int i = 0; i < stripeSize; i++) {
+ dataBuff[i + paritySize] = message[i];
+ }
+ GF.remainder(dataBuff, generatingPolynomial);
+ for (int i = 0; i < paritySize; i++) {
+ parity[i] = dataBuff[i];
+ }
+ }
+
+ @Override
+ public void decode(int[] data, int[] erasedLocation, int[] erasedValue) {
+ if (erasedLocation.length == 0) {
+ return;
+ }
+ assert(erasedLocation.length == erasedValue.length);
+ for (int i = 0; i < erasedLocation.length; i++) {
+ data[erasedLocation[i]] = 0;
+ }
+ for (int i = 0; i < erasedLocation.length; i++) {
+ errSignature[i] = GF.power(PRIMITIVE_ROOT, erasedLocation[i]);
+ erasedValue[i] = GF.substitute(data, generatingRoots[i]);
+ }
+ GF.solveVandermondeSystem(errSignature, erasedValue, erasedLocation.length);
+ }
+
+ @Override
+ public int stripeSize() {
+ return this.stripeSize;
+ }
+
+ @Override
+ public int paritySize() {
+ return this.paritySize;
+ }
+
+ @Override
+ public int symbolSize() {
+ return (int) Math.round(Math.log(GF.getFieldSize()) / Math.log(2));
+ }
+}
diff --git a/src/contrib/raid/src/test/org/apache/hadoop/raid/TestErasureCodes.java b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestErasureCodes.java
new file mode 100644
index 0000000..d1d3f60
--- /dev/null
+++ b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestErasureCodes.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.raid;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+public class TestErasureCodes extends TestCase {
+ final int TEST_CODES = 100;
+ final int TEST_TIMES = 1000;
+ final Random RAND = new Random();
+
+ public void testEncodeDecode() {
+ for (int n = 0; n < TEST_CODES; n++) {
+ int stripeSize = RAND.nextInt(99) + 1; // 1, 2, 3, ... 100
+ int paritySize = RAND.nextInt(9) + 1; //1, 2, 3, 4, ... 10
+ ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
+ for (int m = 0; m < TEST_TIMES; m++) {
+ int symbolMax = (int) Math.pow(2, ec.symbolSize());
+ int[] message = new int[stripeSize];
+ for (int i = 0; i < stripeSize; i++) {
+ message[i] = RAND.nextInt(symbolMax);
+ }
+ int[] parity = new int[paritySize];
+ ec.encode(message, parity);
+ int[] data = new int[stripeSize + paritySize];
+ int[] copy = new int[data.length];
+ for (int i = 0; i < paritySize; i++) {
+ data[i] = parity[i];
+ copy[i] = parity[i];
+ }
+ for (int i = 0; i < stripeSize; i++) {
+ data[i + paritySize] = message[i];
+ copy[i + paritySize] = message[i];
+ }
+ int erasedLen = paritySize == 1 ? 1 : RAND.nextInt(paritySize - 1) + 1;
+ int[] erasedLocations = randomErasedLocation(erasedLen, data.length);
+ for (int i = 0; i < erasedLocations.length; i++) {
+ data[erasedLocations[i]] = 0;
+ }
+ int[] erasedValues = new int[erasedLen];
+ ec.decode(data, erasedLocations, erasedValues);
+ for (int i = 0; i < erasedLen; i++) {
+ assertEquals("Decode failed", copy[erasedLocations[i]], erasedValues[i]);
+ }
+ }
+ }
+ }
+
+ public void testRSPerformance() {
+ int stripeSize = 10;
+ int paritySize = 4;
+ ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
+ int symbolMax = (int) Math.pow(2, ec.symbolSize());
+ byte[][] message = new byte[stripeSize][];
+ int bufsize = 1024 * 1024 * 10;
+ for (int i = 0; i < stripeSize; i++) {
+ message[i] = new byte[bufsize];
+ for (int j = 0; j < bufsize; j++) {
+ message[i][j] = (byte) RAND.nextInt(symbolMax);
+ }
+ }
+ byte[][] parity = new byte[paritySize][];
+ for (int i = 0; i < paritySize; i++) {
+ parity[i] = new byte[bufsize];
+ }
+ long encodeStart = System.currentTimeMillis();
+ int[] tmpIn = new int[stripeSize];
+ int[] tmpOut = new int[paritySize];
+ for (int i = 0; i < bufsize; i++) {
+ // Copy message.
+ for (int j = 0; j < stripeSize; j++) tmpIn[j] = 0x000000FF & message[j][i];
+ ec.encode(tmpIn, tmpOut);
+ // Copy parity.
+ for (int j = 0; j < paritySize; j++) parity[j][i] = (byte)tmpOut[j];
+ }
+ long encodeEnd = System.currentTimeMillis();
+ float encodeMSecs = (encodeEnd - encodeStart);
+ System.out.println("Time to encode rs = " + encodeMSecs +
+ "msec (" + message[0].length / (1000 * encodeMSecs) + " MB/s)");
+
+ // Copy erased array.
+ int[] data = new int[paritySize + stripeSize];
+ // 4th location is the 0th symbol in the message
+ int[] erasedLocations = new int[]{4, 1, 5, 7};
+ int[] erasedValues = new int[erasedLocations.length];
+ byte[] copy = new byte[bufsize];
+ for (int j = 0; j < bufsize; j++) {
+ copy[j] = message[0][j];
+ message[0][j] = 0;
+ }
+
+ long decodeStart = System.currentTimeMillis();
+ for (int i = 0; i < bufsize; i++) {
+ // Copy parity first.
+ for (int j = 0; j < paritySize; j++) {
+ data[j] = 0x000000FF & parity[j][i];
+ }
+ // Copy message. Skip 0 as the erased symbol
+ for (int j = 1; j < stripeSize; j++) {
+ data[j + paritySize] = 0x000000FF & message[j][i];
+ }
+ // Use 0, 2, 3, 6, 8, 9, 10, 11, 12, 13th symbol to reconstruct the data
+ ec.decode(data, erasedLocations, erasedValues);
+ message[0][i] = (byte)erasedValues[0];
+ }
+ long decodeEnd = System.currentTimeMillis();
+ float decodeMSecs = (decodeEnd - decodeStart);
+ System.out.println("Time to decode = " + decodeMSecs +
+ "msec (" + message[0].length / (1000 * decodeMSecs) + " MB/s)");
+ assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
+ }
+
+ public void testXorPerformance() {
+ java.util.Random RAND = new java.util.Random();
+ int stripeSize = 10;
+ byte[][] message = new byte[stripeSize][];
+ int bufsize = 1024 * 1024 * 10;
+ for (int i = 0; i < stripeSize; i++) {
+ message[i] = new byte[bufsize];
+ for (int j = 0; j < bufsize; j++) {
+ message[i][j] = (byte)RAND.nextInt(256);
+ }
+ }
+ byte[] parity = new byte[bufsize];
+
+ long encodeStart = System.currentTimeMillis();
+ for (int i = 0; i < bufsize; i++) {
+ for (int j = 0; j < stripeSize; j++) parity[i] ^= message[j][i];
+ }
+ long encodeEnd = System.currentTimeMillis();
+ float encodeMSecs = encodeEnd - encodeStart;
+ System.out.println("Time to encode xor = " + encodeMSecs +
+ " msec (" + message[0].length / (1000 * encodeMSecs) + "MB/s)");
+
+ byte[] copy = new byte[bufsize];
+ for (int j = 0; j < bufsize; j++) {
+ copy[j] = message[0][j];
+ message[0][j] = 0;
+ }
+
+ long decodeStart = System.currentTimeMillis();
+ for (int i = 0; i < bufsize; i++) {
+ for (int j = 1; j < stripeSize; j++) message[0][i] ^= message[j][i];
+ message[0][i] ^= parity[i];
+ }
+ long decodeEnd = System.currentTimeMillis();
+ float decodeMSecs = decodeEnd - decodeStart;
+ System.out.println("Time to decode xor = " + decodeMSecs +
+ " msec (" + message[0].length / (1000 * decodeMSecs) + "MB/s)");
+ assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
+ }
+
+ private int[] randomErasedLocation(int erasedLen, int dataLen) {
+ int[] erasedLocations = new int[erasedLen];
+ for (int i = 0; i < erasedLen; i++) {
+ Set<Integer> s = new HashSet<Integer>();
+ while (s.size() != erasedLen) {
+ s.add(RAND.nextInt(dataLen));
+ }
+ int t = 0;
+ for (int erased : s) {
+ erasedLocations[t++] = erased;
+ }
+ }
+ return erasedLocations;
+ }
+}
diff --git a/src/contrib/raid/src/test/org/apache/hadoop/raid/TestGaloisField.java b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestGaloisField.java
new file mode 100644
index 0000000..e2a8868
--- /dev/null
+++ b/src/contrib/raid/src/test/org/apache/hadoop/raid/TestGaloisField.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.raid;
+
+import java.util.Random;
+import java.util.Set;
+import java.util.HashSet;
+
+import junit.framework.TestCase;
+
+public class TestGaloisField extends TestCase {
+
+ final int TEST_TIMES = 10000;
+ final Random RAND = new Random();
+ final static GaloisField GF = new GaloisField();
+
+ private int randGF() {
+ return 0x000000FF & RAND.nextInt(GF.getFieldSize());
+ }
+ private int[] randGFPoly(int len) {
+ int[] result = new int[len];
+ for (int i = 0; i < len; i++) {
+ result[i] = randGF();
+ }
+ return result;
+ }
+
+ public void testDistributivity() {
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int a = RAND.nextInt(GF.getFieldSize());
+ int b = RAND.nextInt(GF.getFieldSize());
+ int c = RAND.nextInt(GF.getFieldSize());
+ int result1 = GF.multiply(a, GF.add(b, c));
+ int result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
+ assertTrue("Distributivity test #" + i + " failed: " + a + ", " + b + ", "
+ + c, result1 == result2);
+ }
+ }
+
+ public void testDevision() {
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int a = RAND.nextInt(GF.getFieldSize());
+ int b = RAND.nextInt(GF.getFieldSize());
+ if (b == 0) {
+ continue;
+ }
+ int c = GF.divide(a, b);
+ assertTrue("Division test #" + i + " failed: " + a + "/" + b + " = " + c,
+ a == GF.multiply(c, b));
+ }
+ }
+
+ public void testPower() {
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int a = randGF();
+ int n = RAND.nextInt(10);
+ int result1 = GF.power(a, n);
+ int result2 = 1;
+ for (int j = 0; j < n; j++) {
+ result2 = GF.multiply(result2, a);
+ }
+ assert(result1 == result2);
+ }
+ }
+
+ public void testPolynomialDistributivity() {
+ final int TEST_LEN = 15;
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int[] result1 = GF.multiply(a, GF.add(b, c));
+ int[] result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
+ assertTrue("Distributivity test on polynomials failed",
+ java.util.Arrays.equals(result1, result2));
+ }
+ }
+
+ public void testSubstitute() {
+ final int TEST_LEN = 15;
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ int x = randGF();
+ // (a * b * c)(x)
+ int result1 = GF.substitute(GF.multiply(GF.multiply(a, b), c), x);
+ // a(x) * b(x) * c(x)
+ int result2 =
+ GF.multiply(GF.multiply(GF.substitute(a, x), GF.substitute(b, x)),
+ GF.substitute(c, x));
+ assertTrue("Substitute test on polynomial failed",
+ result1 == result2);
+ }
+ }
+
+ public void testSolveVandermondeSystem() {
+ final int TEST_LEN = 15;
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int[] z = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
+ // generate distinct values for x
+ int[] x = new int[z.length];
+ Set<Integer> s = new HashSet<Integer>();
+ while (s.size() != z.length) {
+ s.add(randGF());
+ }
+ int t = 0;
+ for (int v : s) {
+ x[t++] = v;
+ }
+ // compute the output for the Vandermonde system
+ int[] y = new int[x.length];
+ for (int j = 0; j < x.length; j++) {
+ y[j] = 0;
+ for (int k = 0; k < x.length; k++) {
+ //y[j] = y[j] + z[k] * pow(x[k], j);
+ y[j] = GF.add(y[j], GF.multiply(GF.power(x[k], j), z[k]));
+ }
+ }
+
+ GF.solveVandermondeSystem(x, y);
+ assertTrue("Solving Vandermonde system failed",
+ java.util.Arrays.equals(y, z));
+ }
+ }
+
+ public void testRemainder() {
+ final int TEST_LEN = 15;
+ for (int i = 0; i < TEST_TIMES; i++) {
+ int[] quotient = null;
+ int[] divisor = null;
+ int[] remainder = null;
+ int[] dividend = null;
+ while (true) {
+ quotient = randGFPoly(RAND.nextInt(TEST_LEN - 3) + 3);
+ divisor = randGFPoly(RAND.nextInt(quotient.length - 2) + 2);
+ remainder = randGFPoly(RAND.nextInt(divisor.length - 1) + 1);
+ dividend = GF.add(remainder, GF.multiply(quotient, divisor));
+ if (quotient[quotient.length - 1] != 0 &&
+ divisor[divisor.length - 1] != 0 &&
+ remainder[remainder.length - 1] != 0) {
+ // make sure all the leading terms are not zero
+ break;
+ }
+ }
+ GF.remainder(dividend, divisor);
+ for (int j = 0; j < remainder.length; j++) {
+ assertTrue("Distributivity test on polynomials failed",
+ dividend[j] == remainder[j]);
+ }
+ }
+ }
+}