blob: 59ba376c55870aa01ede567c9e6b5f60340ee054 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import com.google.common.base.Supplier;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportReplica;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.log4j.Level;
import org.junit.Test;
import org.slf4j.Logger;
/**
* A JUnit test for corrupted file handling.
*/
public class TestFileCorruption {
{
DFSTestUtil.setNameNodeLogLevel(Level.ALL);
GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL);
GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL);
}
static Logger LOG = NameNode.stateChangeLog;
/** check if DFS can handle corrupted blocks properly */
@Test
public void testFileCorruption() throws Exception {
MiniDFSCluster cluster = null;
DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFileCorruption").
setNumFiles(20).build();
try {
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
FileSystem fs = cluster.getFileSystem();
util.createFiles(fs, "/srcdat");
// Now deliberately remove the blocks
String bpid = cluster.getNamesystem().getBlockPoolId();
DataNode dn = cluster.getDataNodes().get(2);
Map<DatanodeStorage, BlockListAsLongs> blockReports =
dn.getFSDataset().getBlockReports(bpid);
assertTrue("Blocks do not exist on data-dir", !blockReports.isEmpty());
for (BlockListAsLongs report : blockReports.values()) {
for (BlockReportReplica brr : report) {
LOG.info("Deliberately removing block {}", brr.getBlockName());
cluster.getFsDatasetTestUtils(2).getMaterializedReplica(
new ExtendedBlock(bpid, brr)).deleteData();
}
}
assertTrue("Corrupted replicas not handled properly.",
util.checkFiles(fs, "/srcdat"));
util.cleanup(fs, "/srcdat");
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
/** check if local FS can handle corrupted blocks properly */
@Test
public void testLocalFileCorruption() throws Exception {
Configuration conf = new HdfsConfiguration();
Path file = new Path(PathUtils.getTestDirName(getClass()), "corruptFile");
FileSystem fs = FileSystem.getLocal(conf);
DataOutputStream dos = fs.create(file);
dos.writeBytes("original bytes");
dos.close();
// Now deliberately corrupt the file
dos = new DataOutputStream(new FileOutputStream(file.toString()));
dos.writeBytes("corruption");
dos.close();
// Now attempt to read the file
DataInputStream dis = fs.open(file, 512);
try {
LOG.info("A ChecksumException is expected to be logged.");
dis.readByte();
} catch (ChecksumException ignore) {
//expect this exception but let any NPE get thrown
}
fs.delete(file, true);
}
/** Test the case that a replica is reported corrupt while it is not
* in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
* See Hadoop-4351.
*/
@Test
public void testArrayOutOfBoundsException() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
final Path FILE_PATH = new Path("/tmp.txt");
final long FILE_LEN = 1L;
DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
// get the block
final String bpid = cluster.getNamesystem().getBlockPoolId();
ExtendedBlock blk = getFirstBlock(cluster.getDataNodes().get(0), bpid);
assertFalse("Data directory does not contain any blocks or there was an "
+ "IO error", blk==null);
// start a third datanode
cluster.startDataNodes(conf, 1, true, null, null);
ArrayList<DataNode> datanodes = cluster.getDataNodes();
assertEquals(datanodes.size(), 3);
DataNode dataNode = datanodes.get(2);
// report corrupted block by the third datanode
DatanodeRegistration dnR = InternalDataNodeTestUtils.
getDNRegistrationForBP(dataNode, blk.getBlockPoolId());
FSNamesystem ns = cluster.getNamesystem();
ns.writeLock();
try {
cluster.getNamesystem().getBlockManager().findAndMarkBlockAsCorrupt(
blk, new DatanodeInfo(dnR), "TEST", "STORAGE_ID");
} finally {
ns.writeUnlock();
}
// open the file
fs.open(FILE_PATH);
//clean up
fs.delete(FILE_PATH, false);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
@Test
public void testCorruptionWithDiskFailure() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
cluster.waitActive();
BlockManager bm = cluster.getNamesystem().getBlockManager();
FileSystem fs = cluster.getFileSystem();
final Path FILE_PATH = new Path("/tmp.txt");
final long FILE_LEN = 1L;
DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 3, 1L);
// get the block
final String bpid = cluster.getNamesystem().getBlockPoolId();
File storageDir = cluster.getInstanceStorageDir(0, 0);
File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
assertTrue("Data directory does not exist", dataDir.exists());
ExtendedBlock blk = getFirstBlock(cluster.getDataNodes().get(0), bpid);
if (blk == null) {
blk = getFirstBlock(cluster.getDataNodes().get(0), bpid);
}
assertFalse("Data directory does not contain any blocks or there was an" +
" " +
"IO error", blk == null);
ArrayList<DataNode> datanodes = cluster.getDataNodes();
assertEquals(datanodes.size(), 3);
FSNamesystem ns = cluster.getNamesystem();
//fail the storage on that node which has the block
try {
ns.writeLock();
updateAllStorages(bm);
} finally {
ns.writeUnlock();
}
ns.writeLock();
try {
markAllBlocksAsCorrupt(bm, blk);
} finally {
ns.writeUnlock();
}
// open the file
fs.open(FILE_PATH);
//clean up
fs.delete(FILE_PATH, false);
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
@Test
public void testSetReplicationWhenBatchIBR() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 100);
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INCREMENTAL_INTERVAL_MSEC_KEY,
30000);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_KEY,
1);
DistributedFileSystem dfs;
try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(3).build()) {
final int bufferSize = 1024; // 1024 Bytes each time
byte[] outBuffer = new byte[bufferSize];
dfs = cluster.getFileSystem();
String fileName = "/testSetRep1";
Path filePath = new Path(fileName);
FSDataOutputStream out = dfs.create(filePath);
out.write(outBuffer, 0, bufferSize);
out.close();
//sending the FBR to Delay next IBR
cluster.triggerBlockReports();
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
cluster.triggerBlockReports();
if (cluster.getNamesystem().getBlocksTotal() == 1) {
return true;
}
} catch (Exception e) {
// Ignore the exception
}
return false;
}
}, 10, 3000);
fileName = "/testSetRep2";
filePath = new Path(fileName);
out = dfs.create(filePath);
out.write(outBuffer, 0, bufferSize);
out.close();
dfs.setReplication(filePath, (short) 10);
cluster.triggerBlockReports();
// underreplicated Blocks should be one after setrep
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override public Boolean get() {
try {
return cluster.getNamesystem().getBlockManager()
.getUnderReplicatedBlocksCount() == 1;
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
}, 10, 3000);
assertEquals(0,
cluster.getNamesystem().getBlockManager().getMissingBlocksCount());
}
}
private void markAllBlocksAsCorrupt(BlockManager bm,
ExtendedBlock blk) throws IOException {
for (DatanodeStorageInfo info : bm.getStorages(blk.getLocalBlock())) {
bm.findAndMarkBlockAsCorrupt(
blk, info.getDatanodeDescriptor(), info.getStorageID(), "STORAGE_ID");
}
}
private void updateAllStorages(BlockManager bm) {
for (DatanodeDescriptor dd : bm.getDatanodeManager().getDatanodes()) {
Set<DatanodeStorageInfo> setInfos = new HashSet<DatanodeStorageInfo>();
DatanodeStorageInfo[] infos = dd.getStorageInfos();
Random random = new Random();
for (int i = 0; i < infos.length; i++) {
int blkId = random.nextInt(101);
DatanodeStorage storage = new DatanodeStorage(Integer.toString(blkId),
DatanodeStorage.State.FAILED, StorageType.DISK);
infos[i].updateFromStorage(storage);
setInfos.add(infos[i]);
}
}
}
private static ExtendedBlock getFirstBlock(DataNode dn, String bpid) {
Map<DatanodeStorage, BlockListAsLongs> blockReports =
dn.getFSDataset().getBlockReports(bpid);
for (BlockListAsLongs blockLongs : blockReports.values()) {
for (BlockReportReplica block : blockLongs) {
return new ExtendedBlock(bpid, block);
}
}
return null;
}
}