blob: 7427c52cc3dc9963dd437ab146d56c488689b01e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.log4j.Level;
/**
* A JUnit test for corrupted file handling.
*/
public class TestFileCorruption extends TestCase {
{
((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
}
static Log LOG = ((Log4JLogger)NameNode.stateChangeLog);
/** check if DFS can handle corrupted blocks properly */
public void testFileCorruption() throws Exception {
MiniDFSCluster cluster = null;
DFSTestUtil util = new DFSTestUtil("TestFileCorruption", 20, 3, 8*1024);
try {
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster(conf, 3, true, null);
FileSystem fs = cluster.getFileSystem();
util.createFiles(fs, "/srcdat");
// Now deliberately remove the blocks
File data_dir = new File(System.getProperty("test.build.data"),
"dfs/data/data5/current");
assertTrue("data directory does not exist", data_dir.exists());
File[] blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
for (int idx = 0; idx < blocks.length; idx++) {
if (!blocks[idx].getName().startsWith("blk_")) {
continue;
}
System.out.println("Deliberately removing file "+blocks[idx].getName());
assertTrue("Cannot remove file.", blocks[idx].delete());
}
assertTrue("Corrupted replicas not handled properly.",
util.checkFiles(fs, "/srcdat"));
util.cleanup(fs, "/srcdat");
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** check if local FS can handle corrupted blocks properly */
public void testLocalFileCorruption() throws Exception {
Configuration conf = new HdfsConfiguration();
Path file = new Path(System.getProperty("test.build.data"), "corruptFile");
FileSystem fs = FileSystem.getLocal(conf);
DataOutputStream dos = fs.create(file);
dos.writeBytes("original bytes");
dos.close();
// Now deliberately corrupt the file
dos = new DataOutputStream(new FileOutputStream(file.toString()));
dos.writeBytes("corruption");
dos.close();
// Now attempt to read the file
DataInputStream dis = fs.open(file, 512);
try {
System.out.println("A ChecksumException is expected to be logged.");
dis.readByte();
} catch (ChecksumException ignore) {
//expect this exception but let any NPE get thrown
}
fs.delete(file, true);
}
/** Test the case that a replica is reported corrupt while it is not
* in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
* See Hadoop-4351.
*/
public void testArrayOutOfBoundsException() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster(conf, 2, true, null);
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
final Path FILE_PATH = new Path("/tmp.txt");
final long FILE_LEN = 1L;
DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
// get the block
File dataDir = new File(cluster.getDataDirectory(),
"data1" + MiniDFSCluster.FINALIZED_DIR_NAME);
Block blk = getBlock(dataDir);
if (blk == null) {
blk = getBlock(new File(cluster.getDataDirectory(),
"dfs/data/data2" + MiniDFSCluster.FINALIZED_DIR_NAME));
}
assertFalse(blk==null);
// start a third datanode
cluster.startDataNodes(conf, 1, true, null, null);
ArrayList<DataNode> datanodes = cluster.getDataNodes();
assertEquals(datanodes.size(), 3);
DataNode dataNode = datanodes.get(2);
// report corrupted block by the third datanode
cluster.getNamesystem().markBlockAsCorrupt(blk,
new DatanodeInfo(dataNode.dnRegistration ));
// open the file
fs.open(FILE_PATH);
//clean up
fs.delete(FILE_PATH, false);
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
private Block getBlock(File dataDir) {
assertTrue("data directory does not exist", dataDir.exists());
File[] blocks = dataDir.listFiles();
assertTrue("Blocks do not exist in dataDir", (blocks != null) && (blocks.length > 0));
int idx = 0;
String blockFileName = null;
for (; idx < blocks.length; idx++) {
blockFileName = blocks[idx].getName();
if (blockFileName.startsWith("blk_") && !blockFileName.endsWith(".meta")) {
break;
}
}
if (blockFileName == null) {
return null;
}
long blockId = Long.parseLong(blockFileName.substring("blk_".length()));
long blockTimeStamp = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
for (idx=0; idx < blocks.length; idx++) {
String fileName = blocks[idx].getName();
if (fileName.startsWith(blockFileName) && fileName.endsWith(".meta")) {
int startIndex = blockFileName.length()+1;
int endIndex = fileName.length() - ".meta".length();
blockTimeStamp = Long.parseLong(fileName.substring(startIndex, endIndex));
break;
}
}
return new Block(blockId, blocks[idx].length(), blockTimeStamp);
}
/** check if ClientProtocol.getCorruptFiles() returns a file that has missing blocks */
public void testCorruptFilesMissingBlock() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
cluster = new MiniDFSCluster(conf, 1, true, null);
FileSystem fs = cluster.getFileSystem();
// create two files with one block each
DFSTestUtil util = new DFSTestUtil("testCorruptFilesMissingBlock", 2, 1, 512);
util.createFiles(fs, "/srcdat");
// verify that there are no bad blocks.
ClientProtocol namenode = DFSClient.createNamenode(conf);
FileStatus[] badFiles = namenode.getCorruptFiles();
assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
badFiles.length == 0);
// Now deliberately remove one block
File data_dir = new File(System.getProperty("test.build.data"),
"dfs/data/data1/current/finalized");
assertTrue("data directory does not exist", data_dir.exists());
File[] blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
for (int idx = 0; idx < blocks.length; idx++) {
if (!blocks[idx].getName().startsWith("blk_")) {
continue;
}
LOG.info("Deliberately removing file "+blocks[idx].getName());
assertTrue("Cannot remove file.", blocks[idx].delete());
break;
}
badFiles = namenode.getCorruptFiles();
while (badFiles.length == 0) {
Thread.sleep(1000);
badFiles = namenode.getCorruptFiles();
}
LOG.info("Namenode has bad files. " + badFiles.length);
assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
badFiles.length == 1);
util.cleanup(fs, "/srcdat");
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** check if ClientProtocol.getCorruptFiles() returns the right limit */
public void testMaxCorruptFiles() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
conf.setInt("dfs.corruptfilesreturned.max", 2);
conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
cluster = new MiniDFSCluster(conf, 1, true, null);
FileSystem fs = cluster.getFileSystem();
// create two files with one block each
DFSTestUtil util = new DFSTestUtil("testMaxCorruptFiles", 4, 1, 512);
util.createFiles(fs, "/srcdat2");
// verify that there are no bad blocks.
ClientProtocol namenode = DFSClient.createNamenode(conf);
FileStatus[] badFiles = namenode.getCorruptFiles();
assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting none.",
badFiles.length == 0);
// Now deliberately remove one block
File data_dir = new File(System.getProperty("test.build.data"),
"dfs/data/data1/current/finalized");
assertTrue("data directory does not exist", data_dir.exists());
File[] blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
for (int idx = 0; idx < blocks.length; idx++) {
if (!blocks[idx].getName().startsWith("blk_")) {
continue;
}
LOG.info("Deliberately removing file "+blocks[idx].getName());
assertTrue("Cannot remove file.", blocks[idx].delete());
}
badFiles = namenode.getCorruptFiles();
while (badFiles.length < 2) {
badFiles = namenode.getCorruptFiles();
Thread.sleep(10000);
}
badFiles = namenode.getCorruptFiles(); // once more since time has passed
LOG.info("Namenode has bad files. " + badFiles.length);
assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 2.",
badFiles.length == 2);
util.cleanup(fs, "/srcdat2");
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** check if ClientProtocol.getCorruptFiles() returns a file that has corrupted blocks */
public void testCorruptFilesCorruptedBlock() throws Exception {
MiniDFSCluster cluster = null;
Random random = new Random();
try {
Configuration conf = new HdfsConfiguration();
conf.setInt("dfs.datanode.directoryscan.interval", 1); // datanode scans directories
conf.setInt("dfs.blockreport.intervalMsec", 3 * 1000); // datanode sends block reports
cluster = new MiniDFSCluster(conf, 1, true, null);
FileSystem fs = cluster.getFileSystem();
// create two files with one block each
DFSTestUtil util = new DFSTestUtil("testCorruptFilesCorruptedBlock", 2, 1, 512);
util.createFiles(fs, "/srcdat10");
// fetch bad file list from namenode. There should be none.
ClientProtocol namenode = DFSClient.createNamenode(conf);
FileStatus[] badFiles = namenode.getCorruptFiles();
assertTrue("Namenode has " + badFiles.length + " corrupt files. Expecting None.",
badFiles.length == 0);
// Now deliberately corrupt one block
File data_dir = new File(System.getProperty("test.build.data"),
"dfs/data/data1/current/finalized");
assertTrue("data directory does not exist", data_dir.exists());
File[] blocks = data_dir.listFiles();
assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0));
for (int idx = 0; idx < blocks.length; idx++) {
if (blocks[idx].getName().startsWith("blk_") &&
blocks[idx].getName().endsWith(".meta")) {
//
// shorten .meta file
//
RandomAccessFile file = new RandomAccessFile(blocks[idx], "rw");
FileChannel channel = file.getChannel();
long position = channel.size() - 2;
int length = 2;
byte[] buffer = new byte[length];
random.nextBytes(buffer);
channel.write(ByteBuffer.wrap(buffer), position);
file.close();
LOG.info("Deliberately corrupting file " + blocks[idx].getName() +
" at offset " + position + " length " + length);
// read all files to trigger detection of corrupted replica
try {
util.checkFiles(fs, "/srcdat10");
} catch (BlockMissingException e) {
System.out.println("Received BlockMissingException as expected.");
} catch (IOException e) {
assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " +
" but received IOException " + e, false);
}
break;
}
}
// fetch bad file list from namenode. There should be one file.
badFiles = namenode.getCorruptFiles();
LOG.info("Namenode has bad files. " + badFiles.length);
assertTrue("Namenode has " + badFiles.length + " bad files. Expecting 1.",
badFiles.length == 1);
util.cleanup(fs, "/srcdat10");
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
}