| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hdfs; |
| |
| import java.io.BufferedReader; |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.io.RandomAccessFile; |
| import java.net.URI; |
| import java.util.Random; |
| import java.util.regex.Pattern; |
| import java.util.zip.CRC32; |
| |
| import junit.framework.TestCase; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FSDataInputStream; |
| import org.apache.hadoop.fs.FSDataOutputStream; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hdfs.protocol.ExtendedBlock; |
| import org.apache.hadoop.hdfs.protocol.LocatedBlocks; |
| import org.apache.hadoop.raid.RaidNode; |
| import org.apache.hadoop.raid.RaidUtils; |
| import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType; |
| import org.apache.hadoop.util.StringUtils; |
| |
| public class TestRaidDfs extends TestCase { |
| final static String TEST_DIR = new File(System.getProperty("test.build.data", |
| "target/test-data")).getAbsolutePath(); |
| final static String LOG_DIR = "target/raidlog"; |
| final static long RELOAD_INTERVAL = 1000; |
| final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidDfs"); |
| final static int NUM_DATANODES = 3; |
| |
| Configuration conf; |
| String namenode = null; |
| String hftp = null; |
| MiniDFSCluster dfs = null; |
| FileSystem fileSys = null; |
| String jobTrackerName = null; |
| ErasureCodeType code; |
| int stripeLength; |
| |
| private void mySetup( |
| String erasureCode, int rsParityLength) throws Exception { |
| |
| new File(TEST_DIR).mkdirs(); // Make sure data directory exists |
| conf = new Configuration(); |
| |
| conf.set("fs.raid.recoverylogdir", LOG_DIR); |
| conf.setInt(RaidNode.RS_PARITY_LENGTH_KEY, rsParityLength); |
| |
| // scan all policies once every 5 second |
| conf.setLong("raid.policy.rescan.interval", 5000); |
| |
| // make all deletions not go through Trash |
| conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient"); |
| |
| // do not use map-reduce cluster for Raiding |
| conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode"); |
| |
| conf.set("raid.server.address", "localhost:0"); |
| conf.setInt("hdfs.raid.stripeLength", stripeLength); |
| conf.set("xor".equals(erasureCode) ? RaidNode.RAID_LOCATION_KEY : |
| RaidNode.RAIDRS_LOCATION_KEY, "/destraid"); |
| |
| dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); |
| dfs.waitActive(); |
| fileSys = dfs.getFileSystem(); |
| namenode = fileSys.getUri().toString(); |
| hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort(); |
| |
| FileSystem.setDefaultUri(conf, namenode); |
| } |
| |
| private void myTearDown() throws Exception { |
| if (dfs != null) { dfs.shutdown(); } |
| } |
| |
| private LocatedBlocks getBlockLocations(Path file, long length) |
| throws IOException { |
| DistributedFileSystem dfs = (DistributedFileSystem) fileSys; |
| return RaidDFSUtil.getBlockLocations( |
| dfs, file.toUri().getPath(), 0, length); |
| } |
| |
| private LocatedBlocks getBlockLocations(Path file) |
| throws IOException { |
| FileStatus stat = fileSys.getFileStatus(file); |
| return getBlockLocations(file, stat.getLen()); |
| } |
| |
| private DistributedRaidFileSystem getRaidFS() throws IOException { |
| DistributedFileSystem dfs = (DistributedFileSystem)fileSys; |
| Configuration clientConf = new Configuration(conf); |
| clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedRaidFileSystem"); |
| clientConf.set("fs.raid.underlyingfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); |
| clientConf.setBoolean("fs.hdfs.impl.disable.cache", true); |
| URI dfsUri = dfs.getUri(); |
| return (DistributedRaidFileSystem)FileSystem.get(dfsUri, clientConf); |
| } |
| |
| public static void waitForFileRaided( |
| Log logger, FileSystem fileSys, Path file, Path destPath) |
| throws IOException, InterruptedException { |
| FileStatus parityStat = null; |
| String fileName = file.getName().toString(); |
| // wait till file is raided |
| while (parityStat == null) { |
| logger.info("Waiting for files to be raided."); |
| try { |
| FileStatus[] listPaths = fileSys.listStatus(destPath); |
| if (listPaths != null) { |
| for (FileStatus f : listPaths) { |
| logger.info("File raided so far : " + f.getPath()); |
| String found = f.getPath().getName().toString(); |
| if (fileName.equals(found)) { |
| parityStat = f; |
| break; |
| } |
| } |
| } |
| } catch (FileNotFoundException e) { |
| //ignore |
| } |
| Thread.sleep(1000); // keep waiting |
| } |
| |
| while (true) { |
| LocatedBlocks locations = null; |
| DistributedFileSystem dfs = (DistributedFileSystem) fileSys; |
| locations = RaidDFSUtil.getBlockLocations( |
| dfs, file.toUri().getPath(), 0, parityStat.getLen()); |
| if (!locations.isUnderConstruction()) { |
| break; |
| } |
| Thread.sleep(1000); |
| } |
| |
| while (true) { |
| FileStatus stat = fileSys.getFileStatus(file); |
| if (stat.getReplication() == 1) break; |
| Thread.sleep(1000); |
| } |
| } |
| |
| private void corruptBlockAndValidate(Path srcFile, Path destPath, |
| int[] listBlockNumToCorrupt, long blockSize, int numBlocks) |
| throws IOException, InterruptedException { |
| int repl = 1; |
| long crc = createTestFilePartialLastBlock(fileSys, srcFile, repl, |
| numBlocks, blockSize); |
| long length = fileSys.getFileStatus(srcFile).getLen(); |
| |
| RaidNode.doRaid(conf, fileSys.getFileStatus(srcFile), |
| destPath, code, new RaidNode.Statistics(), new RaidUtils.DummyProgressable(), |
| false, repl, repl, stripeLength); |
| |
| // Delete first block of file |
| for (int blockNumToCorrupt : listBlockNumToCorrupt) { |
| LOG.info("Corrupt block " + blockNumToCorrupt + " of file " + srcFile); |
| LocatedBlocks locations = getBlockLocations(srcFile); |
| corruptBlock(dfs, srcFile, locations.get(blockNumToCorrupt).getBlock(), |
| NUM_DATANODES, true); |
| } |
| |
| // Validate |
| DistributedRaidFileSystem raidfs = getRaidFS(); |
| assertTrue(validateFile(raidfs, srcFile, length, crc)); |
| validateLogFile(getRaidFS(), new Path(LOG_DIR)); |
| } |
| |
| /** |
| * Create a file, corrupt several blocks in it and ensure that the file can be |
| * read through DistributedRaidFileSystem by ReedSolomon coding. |
| */ |
| public void testRaidDfsRs() throws Exception { |
| LOG.info("Test testRaidDfs started."); |
| |
| code = ErasureCodeType.RS; |
| long blockSize = 8192L; |
| int numBlocks = 8; |
| stripeLength = 3; |
| mySetup("rs", 3); |
| |
| int[][] corrupt = {{1, 2, 3}, {1, 4, 7}, {3, 6, 7}}; |
| try { |
| for (int i = 0; i < corrupt.length; i++) { |
| Path file = new Path("/user/dhruba/raidtest/file" + i); |
| corruptBlockAndValidate( |
| file, new Path("/destraid"), corrupt[i], blockSize, numBlocks); |
| } |
| } catch (Exception e) { |
| LOG.info("testRaidDfs Exception " + e + |
| StringUtils.stringifyException(e)); |
| throw e; |
| } finally { |
| myTearDown(); |
| } |
| LOG.info("Test testRaidDfs completed."); |
| } |
| |
| /** |
| * Test DistributedRaidFileSystem.readFully() |
| */ |
| public void testReadFully() throws Exception { |
| code = ErasureCodeType.XOR; |
| stripeLength = 3; |
| mySetup("xor", 1); |
| |
| try { |
| Path file = new Path("/user/raid/raidtest/file1"); |
| long crc = createTestFile(fileSys, file, 1, 8, 8192L); |
| FileStatus stat = fileSys.getFileStatus(file); |
| LOG.info("Created " + file + ", crc=" + crc + ", len=" + stat.getLen()); |
| |
| byte[] filebytes = new byte[(int)stat.getLen()]; |
| // Test that readFully returns the correct CRC when there are no errors. |
| DistributedRaidFileSystem raidfs = getRaidFS(); |
| FSDataInputStream stm = raidfs.open(file); |
| stm.readFully(0, filebytes); |
| assertEquals(crc, bufferCRC(filebytes)); |
| stm.close(); |
| |
| // Generate parity. |
| RaidNode.doRaid(conf, fileSys.getFileStatus(file), |
| new Path("/destraid"), code, new RaidNode.Statistics(), |
| new RaidUtils.DummyProgressable(), |
| false, 1, 1, stripeLength); |
| int[] corrupt = {0, 4, 7}; // first, last and middle block |
| for (int blockIdx : corrupt) { |
| LOG.info("Corrupt block " + blockIdx + " of file " + file); |
| LocatedBlocks locations = getBlockLocations(file); |
| corruptBlock(dfs, file, locations.get(blockIdx).getBlock(), |
| NUM_DATANODES, true); |
| } |
| // Test that readFully returns the correct CRC when there are errors. |
| stm = raidfs.open(file); |
| stm.readFully(0, filebytes); |
| assertEquals(crc, bufferCRC(filebytes)); |
| } finally { |
| myTearDown(); |
| } |
| } |
| |
| /** |
| * Test that access time and mtime of a source file do not change after |
| * raiding. |
| */ |
| public void testAccessTime() throws Exception { |
| LOG.info("Test testAccessTime started."); |
| |
| code = ErasureCodeType.XOR; |
| long blockSize = 8192L; |
| int numBlocks = 8; |
| int repl = 1; |
| stripeLength = 3; |
| mySetup("xor", 1); |
| |
| Path file = new Path("/user/dhruba/raidtest/file"); |
| createTestFilePartialLastBlock(fileSys, file, repl, numBlocks, blockSize); |
| FileStatus stat = fileSys.getFileStatus(file); |
| |
| try { |
| RaidNode.doRaid(conf, fileSys.getFileStatus(file), |
| new Path("/destraid"), code, new RaidNode.Statistics(), |
| new RaidUtils.DummyProgressable(), false, repl, repl, stripeLength); |
| |
| FileStatus newStat = fileSys.getFileStatus(file); |
| |
| assertEquals(stat.getModificationTime(), newStat.getModificationTime()); |
| assertEquals(stat.getAccessTime(), newStat.getAccessTime()); |
| } finally { |
| myTearDown(); |
| } |
| } |
| |
| /** |
| * Create a file, corrupt a block in it and ensure that the file can be |
| * read through DistributedRaidFileSystem by XOR code. |
| */ |
| public void testRaidDfsXor() throws Exception { |
| LOG.info("Test testRaidDfs started."); |
| |
| code = ErasureCodeType.XOR; |
| long blockSize = 8192L; |
| int numBlocks = 8; |
| stripeLength = 3; |
| mySetup("xor", 1); |
| |
| int[][] corrupt = {{0}, {4}, {7}}; // first, last and middle block |
| try { |
| for (int i = 0; i < corrupt.length; i++) { |
| Path file = new Path("/user/dhruba/raidtest/" + i); |
| corruptBlockAndValidate( |
| file, new Path("/destraid"), corrupt[i], blockSize, numBlocks); |
| } |
| } catch (Exception e) { |
| LOG.info("testRaidDfs Exception " + e + |
| StringUtils.stringifyException(e)); |
| throw e; |
| } finally { |
| myTearDown(); |
| } |
| LOG.info("Test testRaidDfs completed."); |
| } |
| |
| // |
| // creates a file and populate it with random data. Returns its crc. |
| // |
| public static long createTestFile(FileSystem fileSys, Path name, int repl, |
| int numBlocks, long blocksize) |
| throws IOException { |
| CRC32 crc = new CRC32(); |
| Random rand = new Random(); |
| FSDataOutputStream stm = fileSys.create(name, true, |
| fileSys.getConf().getInt("io.file.buffer.size", 4096), |
| (short)repl, blocksize); |
| // fill random data into file |
| final byte[] b = new byte[(int)blocksize]; |
| for (int i = 0; i < numBlocks; i++) { |
| rand.nextBytes(b); |
| stm.write(b); |
| crc.update(b); |
| } |
| stm.close(); |
| return crc.getValue(); |
| } |
| |
| // |
| // Creates a file with partially full last block. Populate it with random |
| // data. Returns its crc. |
| // |
| public static long createTestFilePartialLastBlock( |
| FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize) |
| throws IOException { |
| CRC32 crc = new CRC32(); |
| Random rand = new Random(); |
| FSDataOutputStream stm = fileSys.create(name, true, |
| fileSys.getConf().getInt("io.file.buffer.size", 4096), |
| (short)repl, blocksize); |
| // Write whole blocks. |
| byte[] b = new byte[(int)blocksize]; |
| for (int i = 1; i < numBlocks; i++) { |
| rand.nextBytes(b); |
| stm.write(b); |
| crc.update(b); |
| } |
| // Write partial block. |
| b = new byte[(int)blocksize/2 - 1]; |
| rand.nextBytes(b); |
| stm.write(b); |
| crc.update(b); |
| |
| stm.close(); |
| return crc.getValue(); |
| } |
| |
| static long bufferCRC(byte[] buf) { |
| CRC32 crc = new CRC32(); |
| crc.update(buf, 0, buf.length); |
| return crc.getValue(); |
| } |
| |
| // |
| // validates that file matches the crc. |
| // |
| public static boolean validateFile(FileSystem fileSys, Path name, long length, |
| long crc) |
| throws IOException { |
| |
| long numRead = 0; |
| CRC32 newcrc = new CRC32(); |
| FSDataInputStream stm = fileSys.open(name); |
| final byte[] b = new byte[4192]; |
| int num = 0; |
| while (num >= 0) { |
| num = stm.read(b); |
| if (num < 0) { |
| break; |
| } |
| numRead += num; |
| newcrc.update(b, 0, num); |
| } |
| stm.close(); |
| |
| if (numRead != length) { |
| LOG.info("Number of bytes read " + numRead + |
| " does not match file size " + length); |
| return false; |
| } |
| |
| LOG.info(" Newcrc " + newcrc.getValue() + " old crc " + crc); |
| if (newcrc.getValue() != crc) { |
| LOG.info("CRC mismatch of file " + name + ": " + newcrc + " vs. " + crc); |
| return false; |
| } |
| return true; |
| } |
| // |
| // validates the contents of raid recovery log file |
| // |
| public static void validateLogFile(FileSystem fileSys, Path logDir) |
| throws IOException { |
| FileStatus f = fileSys.listStatus(logDir)[0]; |
| FSDataInputStream stm = fileSys.open(f.getPath()); |
| try { |
| BufferedReader reader = new BufferedReader(new InputStreamReader(stm)); |
| assertEquals("Recovery attempt log", reader.readLine()); |
| assertTrue(Pattern.matches("Source path : /user/dhruba/raidtest/.*", |
| reader.readLine())); |
| assertTrue(Pattern.matches("Alternate path : .*/destraid", |
| reader.readLine())); |
| assertEquals("Stripe lentgh : 3", reader.readLine()); |
| assertTrue(Pattern.matches("Corrupt offset : \\d*", reader.readLine())); |
| assertTrue(Pattern.matches("Output from unRaid : " + |
| "hdfs://.*/tmp/raid/user/dhruba/raidtest/.*recovered", |
| reader.readLine())); |
| } finally { |
| stm.close(); |
| } |
| LOG.info("Raid HDFS Recovery log verified"); |
| } |
| |
| // |
| // Delete/Corrupt specified block of file |
| // |
| public static void corruptBlock(MiniDFSCluster dfs, Path file, ExtendedBlock blockNum, |
| int numDataNodes, boolean delete) throws IOException { |
| // Now deliberately remove/truncate replicas of blocks |
| int numDeleted = 0; |
| int numCorrupted = 0; |
| for (int i = 0; i < numDataNodes; i++) { |
| File block = MiniDFSCluster.getBlockFile(i, blockNum); |
| if (block == null || !block.exists()) { |
| continue; |
| } |
| if (delete) { |
| block.delete(); |
| LOG.info("Deleted block " + block); |
| numDeleted++; |
| } else { |
| // Corrupt |
| long seekPos = block.length()/2; |
| RandomAccessFile raf = new RandomAccessFile(block, "rw"); |
| raf.seek(seekPos); |
| int data = raf.readInt(); |
| raf.seek(seekPos); |
| raf.writeInt(data+1); |
| LOG.info("Corrupted block " + block); |
| numCorrupted++; |
| } |
| } |
| assertTrue("Nothing corrupted or deleted", |
| (numCorrupted + numDeleted) > 0); |
| } |
| |
| public static void corruptBlock(Path file, ExtendedBlock blockNum, |
| int numDataNodes, long offset) throws IOException { |
| // Now deliberately corrupt replicas of the the block. |
| for (int i = 0; i < numDataNodes; i++) { |
| File block = MiniDFSCluster.getBlockFile(i, blockNum); |
| if (block == null || !block.exists()) { |
| continue; |
| } |
| RandomAccessFile raf = new RandomAccessFile(block, "rw"); |
| raf.seek(offset); |
| int data = raf.readInt(); |
| raf.seek(offset); |
| raf.writeInt(data+1); |
| LOG.info("Corrupted block " + block); |
| } |
| } |
| } |