blob: ffdb4edfa07e72d5fee0eb33e3edec1374ea0c26 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.net.URI;
import java.util.Random;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.RaidUtils;
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
import org.apache.hadoop.util.StringUtils;
public class TestRaidDfs extends TestCase {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"build/contrib/raid/test/data")).getAbsolutePath();
final static String LOG_DIR = "/raidlog";
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidDfs");
final static int NUM_DATANODES = 3;
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
FileSystem fileSys = null;
String jobTrackerName = null;
ErasureCodeType code;
int stripeLength;
private void mySetup(
String erasureCode, int rsParityLength) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("fs.raid.recoverylogdir", LOG_DIR);
conf.setInt(RaidNode.RS_PARITY_LENGTH_KEY, rsParityLength);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:0");
conf.setInt("hdfs.raid.stripeLength", stripeLength);
conf.set("xor".equals(erasureCode) ? RaidNode.RAID_LOCATION_KEY :
RaidNode.RAIDRS_LOCATION_KEY, "/destraid");
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
}
private void myTearDown() throws Exception {
if (dfs != null) { dfs.shutdown(); }
}
private LocatedBlocks getBlockLocations(Path file, long length)
throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
return RaidDFSUtil.getBlockLocations(
dfs, file.toUri().getPath(), 0, length);
}
private LocatedBlocks getBlockLocations(Path file)
throws IOException {
FileStatus stat = fileSys.getFileStatus(file);
return getBlockLocations(file, stat.getLen());
}
private DistributedRaidFileSystem getRaidFS() throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
Configuration clientConf = new Configuration(conf);
clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedRaidFileSystem");
clientConf.set("fs.raid.underlyingfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
URI dfsUri = dfs.getUri();
return (DistributedRaidFileSystem)FileSystem.get(dfsUri, clientConf);
}
public static void waitForFileRaided(
Log logger, FileSystem fileSys, Path file, Path destPath)
throws IOException, InterruptedException {
FileStatus parityStat = null;
String fileName = file.getName().toString();
// wait till file is raided
while (parityStat == null) {
logger.info("Waiting for files to be raided.");
try {
FileStatus[] listPaths = fileSys.listStatus(destPath);
if (listPaths != null) {
for (FileStatus f : listPaths) {
logger.info("File raided so far : " + f.getPath());
String found = f.getPath().getName().toString();
if (fileName.equals(found)) {
parityStat = f;
break;
}
}
}
} catch (FileNotFoundException e) {
//ignore
}
Thread.sleep(1000); // keep waiting
}
while (true) {
LocatedBlocks locations = null;
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
locations = RaidDFSUtil.getBlockLocations(
dfs, file.toUri().getPath(), 0, parityStat.getLen());
if (!locations.isUnderConstruction()) {
break;
}
Thread.sleep(1000);
}
while (true) {
FileStatus stat = fileSys.getFileStatus(file);
if (stat.getReplication() == 1) break;
Thread.sleep(1000);
}
}
private void corruptBlockAndValidate(Path srcFile, Path destPath,
int[] listBlockNumToCorrupt, long blockSize, int numBlocks)
throws IOException, InterruptedException {
int repl = 1;
long crc = createTestFilePartialLastBlock(fileSys, srcFile, repl,
numBlocks, blockSize);
long length = fileSys.getFileStatus(srcFile).getLen();
RaidNode.doRaid(conf, fileSys.getFileStatus(srcFile),
destPath, code, new RaidNode.Statistics(), new RaidUtils.DummyProgressable(),
false, repl, repl, stripeLength);
// Delete first block of file
for (int blockNumToCorrupt : listBlockNumToCorrupt) {
LOG.info("Corrupt block " + blockNumToCorrupt + " of file " + srcFile);
LocatedBlocks locations = getBlockLocations(srcFile);
corruptBlock(dfs, srcFile, locations.get(blockNumToCorrupt).getBlock(),
NUM_DATANODES, true);
}
// Validate
DistributedRaidFileSystem raidfs = getRaidFS();
assertTrue(validateFile(raidfs, srcFile, length, crc));
validateLogFile(getRaidFS(), new Path(LOG_DIR));
}
/**
* Create a file, corrupt several blocks in it and ensure that the file can be
* read through DistributedRaidFileSystem by ReedSolomon coding.
*/
public void testRaidDfsRs() throws Exception {
LOG.info("Test testRaidDfs started.");
code = ErasureCodeType.RS;
long blockSize = 8192L;
int numBlocks = 8;
stripeLength = 3;
mySetup("rs", 3);
int[][] corrupt = {{1, 2, 3}, {1, 4, 7}, {3, 6, 7}};
try {
for (int i = 0; i < corrupt.length; i++) {
Path file = new Path("/user/dhruba/raidtest/file" + i);
corruptBlockAndValidate(
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
}
} catch (Exception e) {
LOG.info("testRaidDfs Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testRaidDfs completed.");
}
/**
* Test DistributedRaidFileSystem.readFully()
*/
public void testReadFully() throws Exception {
code = ErasureCodeType.XOR;
stripeLength = 3;
mySetup("xor", 1);
try {
Path file = new Path("/user/raid/raidtest/file1");
long crc = createTestFile(fileSys, file, 1, 8, 8192L);
FileStatus stat = fileSys.getFileStatus(file);
LOG.info("Created " + file + ", crc=" + crc + ", len=" + stat.getLen());
byte[] filebytes = new byte[(int)stat.getLen()];
// Test that readFully returns the correct CRC when there are no errors.
DistributedRaidFileSystem raidfs = getRaidFS();
FSDataInputStream stm = raidfs.open(file);
stm.readFully(0, filebytes);
assertEquals(crc, bufferCRC(filebytes));
stm.close();
// Generate parity.
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
new Path("/destraid"), code, new RaidNode.Statistics(),
new RaidUtils.DummyProgressable(),
false, 1, 1, stripeLength);
int[] corrupt = {0, 4, 7}; // first, last and middle block
for (int blockIdx : corrupt) {
LOG.info("Corrupt block " + blockIdx + " of file " + file);
LocatedBlocks locations = getBlockLocations(file);
corruptBlock(dfs, file, locations.get(blockIdx).getBlock(),
NUM_DATANODES, true);
}
// Test that readFully returns the correct CRC when there are errors.
stm = raidfs.open(file);
stm.readFully(0, filebytes);
assertEquals(crc, bufferCRC(filebytes));
} finally {
myTearDown();
}
}
/**
* Test that access time and mtime of a source file do not change after
* raiding.
*/
public void testAccessTime() throws Exception {
LOG.info("Test testAccessTime started.");
code = ErasureCodeType.XOR;
long blockSize = 8192L;
int numBlocks = 8;
int repl = 1;
stripeLength = 3;
mySetup("xor", 1);
Path file = new Path("/user/dhruba/raidtest/file");
createTestFilePartialLastBlock(fileSys, file, repl, numBlocks, blockSize);
FileStatus stat = fileSys.getFileStatus(file);
try {
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
new Path("/destraid"), code, new RaidNode.Statistics(),
new RaidUtils.DummyProgressable(), false, repl, repl, stripeLength);
FileStatus newStat = fileSys.getFileStatus(file);
assertEquals(stat.getModificationTime(), newStat.getModificationTime());
assertEquals(stat.getAccessTime(), newStat.getAccessTime());
} finally {
myTearDown();
}
}
/**
* Create a file, corrupt a block in it and ensure that the file can be
* read through DistributedRaidFileSystem by XOR code.
*/
public void testRaidDfsXor() throws Exception {
LOG.info("Test testRaidDfs started.");
code = ErasureCodeType.XOR;
long blockSize = 8192L;
int numBlocks = 8;
stripeLength = 3;
mySetup("xor", 1);
int[][] corrupt = {{0}, {4}, {7}}; // first, last and middle block
try {
for (int i = 0; i < corrupt.length; i++) {
Path file = new Path("/user/dhruba/raidtest/" + i);
corruptBlockAndValidate(
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
}
} catch (Exception e) {
LOG.info("testRaidDfs Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testRaidDfs completed.");
}
//
// creates a file and populate it with random data. Returns its crc.
//
public static long createTestFile(FileSystem fileSys, Path name, int repl,
int numBlocks, long blocksize)
throws IOException {
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, blocksize);
// fill random data into file
final byte[] b = new byte[(int)blocksize];
for (int i = 0; i < numBlocks; i++) {
rand.nextBytes(b);
stm.write(b);
crc.update(b);
}
stm.close();
return crc.getValue();
}
//
// Creates a file with partially full last block. Populate it with random
// data. Returns its crc.
//
public static long createTestFilePartialLastBlock(
FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
throws IOException {
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, blocksize);
// Write whole blocks.
byte[] b = new byte[(int)blocksize];
for (int i = 1; i < numBlocks; i++) {
rand.nextBytes(b);
stm.write(b);
crc.update(b);
}
// Write partial block.
b = new byte[(int)blocksize/2 - 1];
rand.nextBytes(b);
stm.write(b);
crc.update(b);
stm.close();
return crc.getValue();
}
static long bufferCRC(byte[] buf) {
CRC32 crc = new CRC32();
crc.update(buf, 0, buf.length);
return crc.getValue();
}
//
// validates that file matches the crc.
//
public static boolean validateFile(FileSystem fileSys, Path name, long length,
long crc)
throws IOException {
long numRead = 0;
CRC32 newcrc = new CRC32();
FSDataInputStream stm = fileSys.open(name);
final byte[] b = new byte[4192];
int num = 0;
while (num >= 0) {
num = stm.read(b);
if (num < 0) {
break;
}
numRead += num;
newcrc.update(b, 0, num);
}
stm.close();
if (numRead != length) {
LOG.info("Number of bytes read " + numRead +
" does not match file size " + length);
return false;
}
LOG.info(" Newcrc " + newcrc.getValue() + " old crc " + crc);
if (newcrc.getValue() != crc) {
LOG.info("CRC mismatch of file " + name + ": " + newcrc + " vs. " + crc);
}
return true;
}
//
// validates the contents of raid recovery log file
//
public static void validateLogFile(FileSystem fileSys, Path logDir)
throws IOException {
FileStatus f = fileSys.listStatus(logDir)[0];
FSDataInputStream stm = fileSys.open(f.getPath());
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(stm));
assertEquals("Recovery attempt log", reader.readLine());
assertTrue(Pattern.matches("Source path : /user/dhruba/raidtest/.*",
reader.readLine()));
assertTrue(Pattern.matches("Alternate path : .*/destraid",
reader.readLine()));
assertEquals("Stripe lentgh : 3", reader.readLine());
assertTrue(Pattern.matches("Corrupt offset : \\d*", reader.readLine()));
assertTrue(Pattern.matches("Output from unRaid : " +
"hdfs://.*/tmp/raid/user/dhruba/raidtest/.*recovered",
reader.readLine()));
} finally {
stm.close();
}
LOG.info("Raid HDFS Recovery log verified");
}
//
// Delete/Corrupt specified block of file
//
public static void corruptBlock(MiniDFSCluster dfs, Path file, ExtendedBlock blockNum,
int numDataNodes, boolean delete) throws IOException {
// Now deliberately remove/truncate replicas of blocks
int numDeleted = 0;
int numCorrupted = 0;
for (int i = 0; i < numDataNodes; i++) {
File block = MiniDFSCluster.getBlockFile(i, blockNum);
if (block == null || !block.exists()) {
continue;
}
if (delete) {
block.delete();
LOG.info("Deleted block " + block);
numDeleted++;
} else {
// Corrupt
long seekPos = block.length()/2;
RandomAccessFile raf = new RandomAccessFile(block, "rw");
raf.seek(seekPos);
int data = raf.readInt();
raf.seek(seekPos);
raf.writeInt(data+1);
LOG.info("Corrupted block " + block);
numCorrupted++;
}
}
assertTrue("Nothing corrupted or deleted",
(numCorrupted + numDeleted) > 0);
}
public static void corruptBlock(Path file, ExtendedBlock blockNum,
int numDataNodes, long offset) throws IOException {
// Now deliberately corrupt replicas of the the block.
for (int i = 0; i < numDataNodes; i++) {
File block = MiniDFSCluster.getBlockFile(i, blockNum);
if (block == null || !block.exists()) {
continue;
}
RandomAccessFile raf = new RandomAccessFile(block, "rw");
raf.seek(offset);
int data = raf.readInt();
raf.seek(offset);
raf.writeInt(data+1);
LOG.info("Corrupted block " + block);
}
}
}