blob: 17803a078690f08c855d3dcbb21419da90fe2bc2 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.fs.permission.AclEntryScope.*;
import static org.apache.hadoop.fs.permission.AclEntryType.*;
import static org.apache.hadoop.fs.permission.FsAction.*;
import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.*;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSInotifyEventInputStream;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ExitUtil.ExitException;
import org.apache.hadoop.util.Lists;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.LogManager;
import org.apache.log4j.spi.LoggingEvent;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.mockito.Mockito;
import org.slf4j.event.Level;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class tests the creation and validation of a checkpoint.
*/
@RunWith(Parameterized.class)
public class TestEditLog {
static {
GenericTestUtils.setLogLevel(FSEditLog.LOG, Level.TRACE);
}
@Parameters
public static Collection<Object[]> data() {
Collection<Object[]> params = new ArrayList<Object[]>();
params.add(new Object[]{ Boolean.FALSE });
params.add(new Object[]{ Boolean.TRUE });
return params;
}
private static boolean useAsyncEditLog;
public TestEditLog(Boolean async) {
useAsyncEditLog = async;
}
public static Configuration getConf() {
Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_ASYNC_LOGGING,
useAsyncEditLog);
return conf;
}
/**
* A garbage mkdir op which is used for testing
* {@link EditLogFileInputStream#scanEditLog(File, long, boolean)}
*/
public static class GarbageMkdirOp extends FSEditLogOp {
public GarbageMkdirOp() {
super(FSEditLogOpCodes.OP_MKDIR);
}
@Override
void resetSubFields() {
// nop
}
@Override
void readFields(DataInputStream in, int logVersion) throws IOException {
throw new IOException("cannot decode GarbageMkdirOp");
}
@Override
public void writeFields(DataOutputStream out) throws IOException {
// write in some garbage content
Random random = new Random();
byte[] content = new byte[random.nextInt(16) + 1];
random.nextBytes(content);
out.write(content);
}
@Override
protected void toXml(ContentHandler contentHandler) throws SAXException {
throw new UnsupportedOperationException(
"Not supported for GarbageMkdirOp");
}
@Override
void fromXml(Stanza st) throws InvalidXmlException {
throw new UnsupportedOperationException(
"Not supported for GarbageMkdirOp");
}
}
static final Logger LOG = LoggerFactory.getLogger(TestEditLog.class);
static final int NUM_DATA_NODES = 0;
// This test creates NUM_THREADS threads and each thread does
// 2 * NUM_TRANSACTIONS Transactions concurrently.
static final int NUM_TRANSACTIONS = 100;
static final int NUM_THREADS = 100;
static final File TEST_DIR = PathUtils.getTestDir(TestEditLog.class);
/** An edits log with 3 edits from 0.20 - the result of
* a fresh namesystem followed by hadoop fs -touchz /myfile */
static final byte[] HADOOP20_SOME_EDITS =
StringUtils.hexStringToByte((
"ffff ffed 0a00 0000 0000 03fa e100 0000" +
"0005 0007 2f6d 7966 696c 6500 0133 000d" +
"3132 3932 3331 3634 3034 3138 3400 0d31" +
"3239 3233 3136 3430 3431 3834 0009 3133" +
"3432 3137 3732 3800 0000 0004 746f 6464" +
"0a73 7570 6572 6772 6f75 7001 a400 1544" +
"4653 436c 6965 6e74 5f2d 3136 3136 3535" +
"3738 3931 000b 3137 322e 3239 2e35 2e33" +
"3209 0000 0005 0007 2f6d 7966 696c 6500" +
"0133 000d 3132 3932 3331 3634 3034 3138" +
"3400 0d31 3239 3233 3136 3430 3431 3834" +
"0009 3133 3432 3137 3732 3800 0000 0004" +
"746f 6464 0a73 7570 6572 6772 6f75 7001" +
"a4ff 0000 0000 0000 0000 0000 0000 0000"
).replace(" ",""));
static {
// No need to fsync for the purposes of tests. This makes
// the tests run much faster.
EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
}
static final byte TRAILER_BYTE = FSEditLogOpCodes.OP_INVALID.getOpCode();
private static final int CHECKPOINT_ON_STARTUP_MIN_TXNS = 100;
//
// an object that does a bunch of transactions
//
static class Transactions implements Runnable {
final FSNamesystem namesystem;
final int numTransactions;
final short replication = 3;
final long blockSize = 64;
final int startIndex;
Transactions(FSNamesystem ns, int numTx, int startIdx) {
namesystem = ns;
numTransactions = numTx;
startIndex = startIdx;
}
// add a bunch of transactions.
@Override
public void run() {
PermissionStatus p = namesystem.createFsOwnerPermissions(
new FsPermission((short)0777));
FSEditLog editLog = namesystem.getEditLog();
for (int i = 0; i < numTransactions; i++) {
INodeFile inode = new INodeFile(namesystem.dir.allocateNewInodeId(), null,
p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize);
inode.toUnderConstruction("", "");
editLog.logOpenFile("/filename" + (startIndex + i), inode, false, false);
editLog.logCloseFile("/filename" + (startIndex + i), inode);
editLog.logSync();
}
}
}
/**
* Construct FSEditLog with default configuration, taking editDirs from NNStorage
*
* @param storage Storage object used by namenode
*/
private static FSEditLog getFSEditLog(NNStorage storage) throws IOException {
Configuration conf = getConf();
// Make sure the edits dirs are set in the provided configuration object.
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
StringUtils.join(",", storage.getEditsDirectories()));
FSEditLog log = FSEditLog.newInstance(
conf, storage, FSNamesystem.getNamespaceEditsDirs(conf));
return log;
}
/**
* Test case for an empty edit log from a prior version of Hadoop.
*/
@Test
public void testPreTxIdEditLogNoEdits() throws Exception {
FSNamesystem namesys = Mockito.mock(FSNamesystem.class);
namesys.dir = Mockito.mock(FSDirectory.class);
long numEdits = testLoad(
StringUtils.hexStringToByte("ffffffed"), // just version number
namesys);
assertEquals(0, numEdits);
}
/**
* Test case for loading a very simple edit log from a format
* prior to the inclusion of edit transaction IDs in the log.
*/
@Test
public void testPreTxidEditLogWithEdits() throws Exception {
Configuration conf = getConf();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
long numEdits = testLoad(HADOOP20_SOME_EDITS, namesystem);
assertEquals(3, numEdits);
// Sanity check the edit
HdfsFileStatus fileInfo =
namesystem.getFileInfo("/myfile", false, false, false);
assertEquals("supergroup", fileInfo.getGroup());
assertEquals(3, fileInfo.getReplication());
} finally {
if (cluster != null) { cluster.shutdown(); }
}
}
private long testLoad(byte[] data, FSNamesystem namesys) throws IOException {
FSEditLogLoader loader = new FSEditLogLoader(namesys, 0);
return loader.loadFSEdits(new EditLogByteInputStream(data), 1);
}
@Test
public void testMultiStreamsLoadEditWithConfMaxTxns()
throws IOException {
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSystem = null;
FSImage writeFsImage = null;
try {
cluster = new MiniDFSCluster
.Builder(conf)
.numDataNodes(NUM_DATA_NODES)
.build();
cluster.waitActive();
fileSystem = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
writeFsImage = namesystem.getFSImage();
for (Iterator<URI> it = cluster.getNameDirs(0)
.iterator(); it.hasNext();) {
File dir = new File(it.next().getPath());
System.out.println(dir);
}
// Roll log so new output buffer size takes effect
// we should now be writing to edits_inprogress_3
long originalLastInodeId = namesystem.dir.getLastInodeId();
// Reopen some files as for append
Transactions trans = new Transactions(
namesystem, NUM_TRANSACTIONS, NUM_TRANSACTIONS / 2);
trans.run();
// Roll another time to finalize edits_inprogress_3
writeFsImage.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
Transactions trans1 = new Transactions(
namesystem, NUM_TRANSACTIONS, NUM_TRANSACTIONS / 2);
trans1.run();
writeFsImage.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
namesystem.dir.resetLastInodeIdWithoutChecking(originalLastInodeId);
for(Iterator<StorageDirectory> it = writeFsImage.getStorage().
dirIterator(NameNodeDirType.EDITS); it.hasNext();){
long expectedTxns = (2 * NUM_TRANSACTIONS) + 2;
File editFile = NNStorage.getFinalizedEditsFile(it.next(),
1, expectedTxns);
File editFile1 = NNStorage.getFinalizedEditsFile(it.next(),
203, 404);
assertTrue("Expect " + editFile + " exists", editFile.exists());
assertTrue("Expect " + editFile1 + " exists", editFile1.exists());
EditLogFileInputStream editLogFileInputStream1 =
new EditLogFileInputStream(editFile, 1, 202, false);
EditLogFileInputStream editLogFileInputStream2 =
new EditLogFileInputStream(editFile1, 203, 404, false);
List<EditLogInputStream> editStreams = Lists.newArrayList();
editStreams.add(editLogFileInputStream1);
editStreams.add(editLogFileInputStream2);
FSImage readFsImage = new FSImage(conf);
try {
readFsImage.loadEdits(editStreams, namesystem, 100, null, null);
} catch (Exception e){
LOG.error("There appears to be an out-of-order edit in the edit log",
e);
fail("no exception should be thrown");
} finally {
if (readFsImage != null) {
readFsImage.close();
}
}
}
} finally {
try {
if(fileSystem != null) {
fileSystem.close();
}
if(cluster != null) {
cluster.shutdown();
}
if(writeFsImage != null){
writeFsImage.close();
}
} catch (Throwable t) {
LOG.error("Couldn't shut down cleanly", t);
}
}
}
/**
* Simple test for writing to and rolling the edit log.
*/
@Test
public void testSimpleEditLog() throws IOException {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
assertExistsInStorageDirs(
cluster, NameNodeDirType.EDITS,
NNStorage.getInProgressEditsFileName(1));
editLog.logSetReplication("fakefile", (short) 1);
editLog.logSync();
editLog.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
assertExistsInStorageDirs(
cluster, NameNodeDirType.EDITS,
NNStorage.getFinalizedEditsFileName(1,3));
assertExistsInStorageDirs(
cluster, NameNodeDirType.EDITS,
NNStorage.getInProgressEditsFileName(4));
editLog.logSetReplication("fakefile", (short) 2);
editLog.logSync();
editLog.close();
} finally {
if(fileSys != null) fileSys.close();
if(cluster != null) cluster.shutdown();
}
}
/**
* Tests transaction logging in dfs.
*/
@Test
public void testMultiThreadedEditLog() throws IOException {
testEditLog(2048);
// force edit buffer to automatically sync on each log of edit log entry
testEditLog(1);
}
private void assertExistsInStorageDirs(MiniDFSCluster cluster,
NameNodeDirType dirType,
String filename) {
NNStorage storage = cluster.getNamesystem().getFSImage().getStorage();
for (StorageDirectory sd : storage.dirIterable(dirType)) {
File f = new File(sd.getCurrentDir(), filename);
assertTrue("Expect that " + f + " exists", f.exists());
}
}
/**
* Test edit log with different initial buffer size
*
* @param initialSize initial edit log buffer size
* @throws IOException
*/
private void testEditLog(int initialSize) throws IOException {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
for (Iterator<URI> it = cluster.getNameDirs(0).iterator(); it.hasNext(); ) {
File dir = new File(it.next().getPath());
System.out.println(dir);
}
FSImage fsimage = namesystem.getFSImage();
FSEditLog editLog = fsimage.getEditLog();
// set small size of flush buffer
editLog.setOutputBufferCapacity(initialSize);
// Roll log so new output buffer size takes effect
// we should now be writing to edits_inprogress_3
fsimage.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
// Remember the current lastInodeId and will reset it back to test
// loading editlog segments.The transactions in the following allocate new
// inode id to write to editlogs but doesn't create ionde in namespace
long originalLastInodeId = namesystem.dir.getLastInodeId();
// Create threads and make them run transactions concurrently.
Thread threadId[] = new Thread[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; i++) {
Transactions trans =
new Transactions(namesystem, NUM_TRANSACTIONS, i*NUM_TRANSACTIONS);
threadId[i] = new Thread(trans, "TransactionThread-" + i);
threadId[i].start();
}
// wait for all transactions to get over
for (int i = 0; i < NUM_THREADS; i++) {
try {
threadId[i].join();
} catch (InterruptedException e) {
i--; // retry
}
}
// Reopen some files as for append
Transactions trans =
new Transactions(namesystem, NUM_TRANSACTIONS, NUM_TRANSACTIONS / 2);
trans.run();
// Roll another time to finalize edits_inprogress_3
fsimage.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
long expectedTxns = ((NUM_THREADS+1) * 2 * NUM_TRANSACTIONS) + 2; // +2 for start/end txns
// Verify that we can read in all the transactions that we have written.
// If there were any corruptions, it is likely that the reading in
// of these transactions will throw an exception.
//
namesystem.dir.resetLastInodeIdWithoutChecking(originalLastInodeId);
for (Iterator<StorageDirectory> it =
fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
File editFile = NNStorage.getFinalizedEditsFile(it.next(), 3,
3 + expectedTxns - 1);
assertTrue("Expect " + editFile + " exists", editFile.exists());
System.out.println("Verifying file: " + editFile);
long numEdits = loader.loadFSEdits(
new EditLogFileInputStream(editFile), 3);
int numLeases = namesystem.leaseManager.countLease();
System.out.println("Number of outstanding leases " + numLeases);
assertEquals(0, numLeases);
assertTrue("Verification for " + editFile + " failed. " +
"Expected " + expectedTxns + " transactions. "+
"Found " + numEdits + " transactions.",
numEdits == expectedTxns);
}
} finally {
try {
if(fileSys != null) fileSys.close();
if(cluster != null) cluster.shutdown();
} catch (Throwable t) {
LOG.error("Couldn't shut down cleanly", t);
}
}
}
private void doLogEdit(ExecutorService exec, final FSEditLog log,
final String filename) throws Exception
{
exec.submit(new Callable<Void>() {
@Override
public Void call() {
log.logSetReplication(filename, (short)1);
return null;
}
}).get();
}
private void doCallLogSync(ExecutorService exec, final FSEditLog log)
throws Exception
{
exec.submit(new Callable<Void>() {
@Override
public Void call() {
log.logSync();
return null;
}
}).get();
}
private void doCallLogSyncAll(ExecutorService exec, final FSEditLog log)
throws Exception
{
exec.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
log.logSyncAll();
return null;
}
}).get();
}
@Test
public void testSyncBatching() throws Exception {
if (useAsyncEditLog) {
// semantics are completely differently since edits will be auto-synced
return;
}
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
ExecutorService threadA = Executors.newSingleThreadExecutor();
ExecutorService threadB = Executors.newSingleThreadExecutor();
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
assertEquals("should start with only the BEGIN_LOG_SEGMENT txn synced",
1, editLog.getSyncTxId());
// Log an edit from thread A
doLogEdit(threadA, editLog, "thread-a 1");
assertEquals("logging edit without syncing should do not affect txid",
1, editLog.getSyncTxId());
// Log an edit from thread B
doLogEdit(threadB, editLog, "thread-b 1");
assertEquals("logging edit without syncing should do not affect txid",
1, editLog.getSyncTxId());
// Now ask to sync edit from B, which should sync both edits.
doCallLogSync(threadB, editLog);
assertEquals("logSync from second thread should bump txid up to 3",
3, editLog.getSyncTxId());
// Now ask to sync edit from A, which was already batched in - thus
// it should increment the batch count metric
doCallLogSync(threadA, editLog);
assertEquals("logSync from first thread shouldn't change txid",
3, editLog.getSyncTxId());
//Should have incremented the batch count exactly once
assertCounter("TransactionsBatchedInSync", 1L,
getMetrics("NameNodeActivity"));
} finally {
threadA.shutdown();
threadB.shutdown();
if(fileSys != null) fileSys.close();
if(cluster != null) cluster.shutdown();
}
}
/**
* Test what happens with the following sequence:
*
* Thread A writes edit
* Thread B calls logSyncAll
* calls close() on stream
* Thread A calls logSync
*
* This sequence is legal and can occur if enterSafeMode() is closely
* followed by saveNamespace.
*/
@Test
public void testBatchedSyncWithClosedLogs() throws Exception {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
ExecutorService threadA = Executors.newSingleThreadExecutor();
ExecutorService threadB = Executors.newSingleThreadExecutor();
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
// Log an edit from thread A
doLogEdit(threadA, editLog, "thread-a 1");
// async log is doing batched syncs in background. logSync just ensures
// the edit is durable, so the txid may increase prior to sync
if (!useAsyncEditLog) {
assertEquals("logging edit without syncing should do not affect txid",
1, editLog.getSyncTxId());
}
// logSyncAll in Thread B
doCallLogSyncAll(threadB, editLog);
assertEquals("logSyncAll should sync thread A's transaction",
2, editLog.getSyncTxId());
// Close edit log
editLog.close();
// Ask thread A to finish sync (which should be a no-op)
doCallLogSync(threadA, editLog);
} finally {
threadA.shutdown();
threadB.shutdown();
if(fileSys != null) fileSys.close();
if(cluster != null) cluster.shutdown();
}
}
@Test
public void testEditChecksum() throws Exception {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
fileSys.mkdirs(new Path("/tmp"));
Iterator<StorageDirectory> iter = fsimage.getStorage().
dirIterator(NameNodeDirType.EDITS);
LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>();
while (iter.hasNext()) {
sds.add(iter.next());
}
editLog.close();
cluster.shutdown();
for (StorageDirectory sd : sds) {
File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3);
assertTrue(editFile.exists());
long fileLen = editFile.length();
LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen);
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
rwf.seek(fileLen-4); // seek to checksum bytes
int b = rwf.readInt();
rwf.seek(fileLen-4);
rwf.writeInt(b+1);
rwf.close();
}
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build();
fail("should not be able to start");
} catch (IOException e) {
// expected
assertNotNull("Cause of exception should be ChecksumException", e.getCause());
assertEquals("Cause of exception should be ChecksumException",
ChecksumException.class, e.getCause().getClass());
}
}
/**
* Test what happens if the NN crashes when it has has started but
* had no transactions written.
*/
@Test
public void testCrashRecoveryNoTransactions() throws Exception {
testCrashRecovery(0);
}
/**
* Test what happens if the NN crashes when it has has started and
* had a few transactions written
*/
@Test
public void testCrashRecoveryWithTransactions() throws Exception {
testCrashRecovery(150);
}
/**
* Do a test to make sure the edit log can recover edits even after
* a non-clean shutdown. This does a simulated crash by copying over
* the edits directory while the NN is still running, then shutting it
* down, and restoring that edits directory.
*/
private void testCrashRecovery(int numTransactions) throws Exception {
MiniDFSCluster cluster = null;
Configuration conf = getConf();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
CHECKPOINT_ON_STARTUP_MIN_TXNS);
try {
LOG.info("\n===========================================\n" +
"Starting empty cluster");
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATA_NODES)
.format(true)
.build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
for (int i = 0; i < numTransactions; i++) {
fs.mkdirs(new Path("/test" + i));
}
// Directory layout looks like:
// test/data/dfs/nameN/current/{fsimage_N,edits_...}
File nameDir = new File(cluster.getNameDirs(0).iterator().next().getPath());
File dfsDir = nameDir.getParentFile();
assertEquals(dfsDir.getName(), "dfs"); // make sure we got right dir
LOG.info("Copying data directory aside to a hot backup");
File backupDir = new File(dfsDir.getParentFile(), "dfs.backup-while-running");
FileUtils.copyDirectory(dfsDir, backupDir);
LOG.info("Shutting down cluster #1");
cluster.shutdown();
cluster = null;
// Now restore the backup
FileUtil.fullyDeleteContents(dfsDir);
dfsDir.delete();
backupDir.renameTo(dfsDir);
// Directory layout looks like:
// test/data/dfs/nameN/current/{fsimage_N,edits_...}
File currentDir = new File(nameDir, "current");
// We should see the file as in-progress
File editsFile = new File(currentDir,
NNStorage.getInProgressEditsFileName(1));
assertTrue("Edits file " + editsFile + " should exist", editsFile.exists());
File imageFile = FSImageTestUtil.findNewestImageFile(
currentDir.getAbsolutePath());
assertNotNull("No image found in " + nameDir, imageFile);
assertEquals(NNStorage.getImageFileName(0), imageFile.getName());
// Try to start a new cluster
LOG.info("\n===========================================\n" +
"Starting same cluster after simulated crash");
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATA_NODES)
.format(false)
.build();
cluster.waitActive();
// We should still have the files we wrote prior to the simulated crash
fs = cluster.getFileSystem();
for (int i = 0; i < numTransactions; i++) {
assertTrue(fs.exists(new Path("/test" + i)));
}
long expectedTxId;
if (numTransactions > CHECKPOINT_ON_STARTUP_MIN_TXNS) {
// It should have saved a checkpoint on startup since there
// were more unfinalized edits than configured
expectedTxId = numTransactions + 1;
} else {
// otherwise, it shouldn't have made a checkpoint
expectedTxId = 0;
}
imageFile = FSImageTestUtil.findNewestImageFile(
currentDir.getAbsolutePath());
assertNotNull("No image found in " + nameDir, imageFile);
assertEquals(NNStorage.getImageFileName(expectedTxId),
imageFile.getName());
// Started successfully. Shut it down and make sure it can restart.
cluster.shutdown();
cluster = null;
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATA_NODES)
.format(false)
.build();
cluster.waitActive();
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
// should succeed - only one corrupt log dir
@Test
public void testCrashRecoveryEmptyLogOneDir() throws Exception {
doTestCrashRecoveryEmptyLog(false, true, true);
}
// should fail - seen_txid updated to 3, but no log dir contains txid 3
@Test
public void testCrashRecoveryEmptyLogBothDirs() throws Exception {
doTestCrashRecoveryEmptyLog(true, true, false);
}
// should succeed - only one corrupt log dir
@Test
public void testCrashRecoveryEmptyLogOneDirNoUpdateSeenTxId()
throws Exception {
doTestCrashRecoveryEmptyLog(false, false, true);
}
// should succeed - both log dirs corrupt, but seen_txid never updated
@Test
public void testCrashRecoveryEmptyLogBothDirsNoUpdateSeenTxId()
throws Exception {
doTestCrashRecoveryEmptyLog(true, false, true);
}
/**
* Test that the NN handles the corruption properly
* after it crashes just after creating an edit log
* (ie before writing START_LOG_SEGMENT). In the case
* that all logs have this problem, it should mark them
* as corrupt instead of trying to finalize them.
*
* @param inBothDirs if true, there will be a truncated log in
* both of the edits directories. If false, the truncated log
* will only be in one of the directories. In both cases, the
* NN should fail to start up, because it's aware that txid 3
* was reached, but unable to find a non-corrupt log starting there.
* @param updateTransactionIdFile if true update the seen_txid file.
* If false, it will not be updated. This will simulate a case where
* the NN crashed between creating the new segment and updating the
* seen_txid file.
* @param shouldSucceed true if the test is expected to succeed.
*/
private void doTestCrashRecoveryEmptyLog(boolean inBothDirs,
boolean updateTransactionIdFile, boolean shouldSucceed)
throws Exception {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATA_NODES).build();
cluster.shutdown();
Collection<URI> editsDirs = cluster.getNameEditsDirs(0);
for (URI uri : editsDirs) {
File dir = new File(uri.getPath());
File currentDir = new File(dir, "current");
// We should start with only the finalized edits_1-2
GenericTestUtils.assertGlobEquals(currentDir, "edits_.*",
NNStorage.getFinalizedEditsFileName(1, 2));
// Make a truncated edits_3_inprogress
File log = new File(currentDir,
NNStorage.getInProgressEditsFileName(3));
EditLogFileOutputStream stream = new EditLogFileOutputStream(conf, log, 1024);
try {
stream.create(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
if (!inBothDirs) {
break;
}
NNStorage storage = new NNStorage(conf,
Collections.<URI>emptyList(),
Lists.newArrayList(uri));
if (updateTransactionIdFile) {
storage.writeTransactionIdFileToStorage(3);
}
storage.close();
} finally {
stream.close();
}
}
try {
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(NUM_DATA_NODES).format(false).build();
if (!shouldSucceed) {
fail("Should not have succeeded in startin cluster");
}
} catch (IOException ioe) {
if (shouldSucceed) {
LOG.info("Should have succeeded in starting cluster, but failed", ioe);
throw ioe;
} else {
GenericTestUtils.assertExceptionContains(
"Gap in transactions. Expected to be able to read up until " +
"at least txid 3 but unable to find any edit logs containing " +
"txid 3", ioe);
}
} finally {
cluster.shutdown();
}
}
private static class EditLogByteInputStream extends EditLogInputStream {
private final InputStream input;
private final long len;
private int version;
private FSEditLogOp.Reader reader = null;
private FSEditLogLoader.PositionTrackingInputStream tracker = null;
public EditLogByteInputStream(byte[] data) throws IOException {
len = data.length;
input = new ByteArrayInputStream(data);
BufferedInputStream bin = new BufferedInputStream(input);
DataInputStream in = new DataInputStream(bin);
version = EditLogFileInputStream.readLogVersion(in, true);
tracker = new FSEditLogLoader.PositionTrackingInputStream(in);
in = new DataInputStream(tracker);
reader = FSEditLogOp.Reader.create(in, tracker, version);
}
@Override
public long getFirstTxId() {
return HdfsServerConstants.INVALID_TXID;
}
@Override
public long getLastTxId() {
return HdfsServerConstants.INVALID_TXID;
}
@Override
public long length() throws IOException {
return len;
}
@Override
public long getPosition() {
return tracker.getPos();
}
@Override
protected FSEditLogOp nextOp() throws IOException {
return reader.readOp(false);
}
@Override
public int getVersion(boolean verifyVersion) throws IOException {
return version;
}
@Override
public void close() throws IOException {
input.close();
}
@Override
public String getName() {
return "AnonEditLogByteInputStream";
}
@Override
public boolean isInProgress() {
return true;
}
@Override
public void setMaxOpSize(int maxOpSize) {
reader.setMaxOpSize(maxOpSize);
}
@Override public boolean isLocalLog() {
return true;
}
}
@Test
public void testFailedOpen() throws Exception {
File logDir = new File(TEST_DIR, "testFailedOpen");
logDir.mkdirs();
ExitUtil.disableSystemExit();
FSEditLog log = FSImageTestUtil.createStandaloneEditLog(logDir);
try {
FileUtil.setWritable(logDir, false);
log.openForWrite(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
fail("Did no throw exception on only having a bad dir");
} catch (ExitException ee) {
GenericTestUtils.assertExceptionContains(
"too few journals successfully started", ee);
} finally {
FileUtil.setWritable(logDir, true);
log.close();
ExitUtil.resetFirstExitException();
}
}
/**
* Regression test for HDFS-1112/HDFS-3020. Ensures that, even if
* logSync isn't called periodically, the edit log will sync itself.
*/
@Test
public void testAutoSync() throws Exception {
File logDir = new File(TEST_DIR, "testAutoSync");
logDir.mkdirs();
FSEditLog log = FSImageTestUtil.createStandaloneEditLog(logDir);
String oneKB = StringUtils.byteToHexString(
new byte[500]);
try {
log.openForWrite(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
NameNodeMetrics mockMetrics = Mockito.mock(NameNodeMetrics.class);
log.setMetricsForTests(mockMetrics);
for (int i = 0; i < 400; i++) {
log.logDelete(oneKB, 1L, false);
}
// After ~400KB, we're still within the 512KB buffer size
Mockito.verify(mockMetrics, Mockito.times(0)).addSync(Mockito.anyLong());
// After ~400KB more, we should have done an automatic sync
for (int i = 0; i < 400; i++) {
log.logDelete(oneKB, 1L, false);
}
Mockito.verify(mockMetrics, Mockito.times(1)).addSync(Mockito.anyLong());
} finally {
log.close();
}
}
/**
* Tests the getEditLogManifest function using mock storage for a number
* of different situations.
*/
@Test
public void testEditLogManifestMocks() throws IOException {
NNStorage storage;
FSEditLog log;
// Simple case - different directories have the same
// set of logs, with an in-progress one at end
storage = mockStorageWithEdits(
"[1,100]|[101,200]|[201,]",
"[1,100]|[101,200]|[201,]");
log = getFSEditLog(storage);
log.initJournalsForWrite();
assertEquals("[[1,100], [101,200]] CommittedTxId: 200",
log.getEditLogManifest(1).toString());
assertEquals("[[101,200]] CommittedTxId: 200",
log.getEditLogManifest(101).toString());
// Another simple case, different directories have different
// sets of files
storage = mockStorageWithEdits(
"[1,100]|[101,200]",
"[1,100]|[201,300]|[301,400]"); // nothing starting at 101
log = getFSEditLog(storage);
log.initJournalsForWrite();
assertEquals("[[1,100], [101,200], [201,300], [301,400]]" +
" CommittedTxId: 400", log.getEditLogManifest(1).toString());
// Case where one directory has an earlier finalized log, followed
// by a gap. The returned manifest should start after the gap.
storage = mockStorageWithEdits(
"[1,100]|[301,400]", // gap from 101 to 300
"[301,400]|[401,500]");
log = getFSEditLog(storage);
log.initJournalsForWrite();
assertEquals("[[301,400], [401,500]] CommittedTxId: 500",
log.getEditLogManifest(1).toString());
// Case where different directories have different length logs
// starting at the same txid - should pick the longer one
storage = mockStorageWithEdits(
"[1,100]|[101,150]", // short log at 101
"[1,50]|[101,200]"); // short log at 1
log = getFSEditLog(storage);
log.initJournalsForWrite();
assertEquals("[[1,100], [101,200]] CommittedTxId: 200",
log.getEditLogManifest(1).toString());
assertEquals("[[101,200]] CommittedTxId: 200",
log.getEditLogManifest(101).toString());
// Case where the first storage has an inprogress while
// the second has finalised that file (i.e. the first failed
// recently)
storage = mockStorageWithEdits(
"[1,100]|[101,]",
"[1,100]|[101,200]");
log = getFSEditLog(storage);
log.initJournalsForWrite();
assertEquals("[[1,100], [101,200]] CommittedTxId: 200",
log.getEditLogManifest(1).toString());
assertEquals("[[101,200]] CommittedTxId: 200",
log.getEditLogManifest(101).toString());
}
/**
* Create a mock NNStorage object with several directories, each directory
* holding edit logs according to a specification. Each directory
* is specified by a pipe-separated string. For example:
* <code>[1,100]|[101,200]</code> specifies a directory which
* includes two finalized segments, one from 1-100, and one from 101-200.
* The syntax <code>[1,]</code> specifies an in-progress log starting at
* txid 1.
*/
private NNStorage mockStorageWithEdits(String... editsDirSpecs) throws IOException {
List<StorageDirectory> sds = Lists.newArrayList();
List<URI> uris = Lists.newArrayList();
NNStorage storage = Mockito.mock(NNStorage.class);
for (String dirSpec : editsDirSpecs) {
List<String> files = Lists.newArrayList();
String[] logSpecs = dirSpec.split("\\|");
for (String logSpec : logSpecs) {
Matcher m = Pattern.compile("\\[(\\d+),(\\d+)?\\]").matcher(logSpec);
assertTrue("bad spec: " + logSpec, m.matches());
if (m.group(2) == null) {
files.add(NNStorage.getInProgressEditsFileName(
Long.parseLong(m.group(1))));
} else {
files.add(NNStorage.getFinalizedEditsFileName(
Long.parseLong(m.group(1)),
Long.parseLong(m.group(2))));
}
}
StorageDirectory sd = FSImageTestUtil.mockStorageDirectory(
NameNodeDirType.EDITS, false,
files.toArray(new String[0]));
sds.add(sd);
URI u = URI.create("file:///storage"+ Math.random());
Mockito.doReturn(sd).when(storage).getStorageDirectory(u);
uris.add(u);
}
Mockito.doReturn(sds).when(storage).dirIterable(NameNodeDirType.EDITS);
Mockito.doReturn(uris).when(storage).getEditsDirectories();
return storage;
}
/**
* Specification for a failure during #setupEdits
*/
static class AbortSpec {
final int roll;
final int logindex;
/**
* Construct the failure specification.
* @param roll number to fail after. e.g. 1 to fail after the first roll
* @param logindex index of journal to fail.
*/
AbortSpec(int roll, int logindex) {
this.roll = roll;
this.logindex = logindex;
}
}
final static int TXNS_PER_ROLL = 10;
final static int TXNS_PER_FAIL = 2;
/**
* Set up directories for tests.
*
* Each rolled file is 10 txns long.
* A failed file is 2 txns long.
*
* @param editUris directories to create edit logs in
* @param numrolls number of times to roll the edit log during setup
* @param closeOnFinish whether to close the edit log after setup
* @param abortAtRolls Specifications for when to fail, see AbortSpec
*/
public static NNStorage setupEdits(List<URI> editUris, int numrolls,
boolean closeOnFinish, AbortSpec... abortAtRolls) throws IOException {
List<AbortSpec> aborts = new ArrayList<AbortSpec>(Arrays.asList(abortAtRolls));
NNStorage storage = new NNStorage(getConf(),
Collections.<URI>emptyList(),
editUris);
storage.format(new NamespaceInfo());
// Verify permissions
LocalFileSystem fs = LocalFileSystem.getLocal(getConf());
for (URI uri : editUris) {
String currDir = uri.getPath() + Path.SEPARATOR +
Storage.STORAGE_DIR_CURRENT;
FileStatus fileStatus = fs.getFileLinkStatus(new Path(currDir));
FsPermission permission = fileStatus.getPermission();
assertEquals(getConf().getInt(
DFSConfigKeys.DFS_JOURNAL_EDITS_DIR_PERMISSION_KEY, 700),
permission.toOctal());
}
FSEditLog editlog = getFSEditLog(storage);
// open the edit log and add two transactions
// logGenerationStamp is used, simply because it doesn't
// require complex arguments.
editlog.initJournalsForWrite();
editlog.openForWrite(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
for (int i = 2; i < TXNS_PER_ROLL; i++) {
editlog.logGenerationStamp((long) 0);
}
editlog.logSync();
// Go into edit log rolling loop.
// On each roll, the abortAtRolls abort specs are
// checked to see if an abort is required. If so the
// the specified journal is aborted. It will be brought
// back into rotation automatically by rollEditLog
for (int i = 0; i < numrolls; i++) {
editlog.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
editlog.logGenerationStamp((long) i);
editlog.logSync();
while (aborts.size() > 0
&& aborts.get(0).roll == (i+1)) {
AbortSpec spec = aborts.remove(0);
editlog.getJournals().get(spec.logindex).abort();
}
for (int j = 3; j < TXNS_PER_ROLL; j++) {
editlog.logGenerationStamp((long) i);
}
editlog.logSync();
}
if (closeOnFinish) {
editlog.close();
}
FSImageTestUtil.logStorageContents(LOG, storage);
return storage;
}
/**
* Set up directories for tests.
*
* Each rolled file is 10 txns long.
* A failed file is 2 txns long.
*
* @param editUris directories to create edit logs in
* @param numrolls number of times to roll the edit log during setup
* @param abortAtRolls Specifications for when to fail, see AbortSpec
*/
public static NNStorage setupEdits(List<URI> editUris, int numrolls,
AbortSpec... abortAtRolls) throws IOException {
return setupEdits(editUris, numrolls, true, abortAtRolls);
}
/**
* Test loading an editlog which has had both its storage fail
* on alternating rolls. Two edit log directories are created.
* The first one fails on odd rolls, the second on even. Test
* that we are able to load the entire editlog regardless.
*/
@Test
public void testAlternatingJournalFailure() throws IOException {
File f1 = new File(TEST_DIR + "/alternatingjournaltest0");
File f2 = new File(TEST_DIR + "/alternatingjournaltest1");
List<URI> editUris = ImmutableList.of(f1.toURI(), f2.toURI());
NNStorage storage = setupEdits(editUris, 10,
new AbortSpec(1, 0),
new AbortSpec(2, 1),
new AbortSpec(3, 0),
new AbortSpec(4, 1),
new AbortSpec(5, 0),
new AbortSpec(6, 1),
new AbortSpec(7, 0),
new AbortSpec(8, 1),
new AbortSpec(9, 0),
new AbortSpec(10, 1));
long totaltxnread = 0;
FSEditLog editlog = getFSEditLog(storage);
editlog.initJournalsForWrite();
long startTxId = 1;
Iterable<EditLogInputStream> editStreams = editlog.selectInputStreams(startTxId,
TXNS_PER_ROLL*11);
for (EditLogInputStream edits : editStreams) {
FSEditLogLoader.EditLogValidation val =
FSEditLogLoader.scanEditLog(edits, Long.MAX_VALUE);
long read = (val.getEndTxId() - edits.getFirstTxId()) + 1;
LOG.info("Loading edits " + edits + " read " + read);
assertEquals(startTxId, edits.getFirstTxId());
startTxId += read;
totaltxnread += read;
}
editlog.close();
storage.close();
assertEquals(TXNS_PER_ROLL*11, totaltxnread);
}
/**
* Test loading an editlog with gaps. A single editlog directory
* is set up. On of the edit log files is deleted. This should
* fail when selecting the input streams as it will not be able
* to select enough streams to load up to 4*TXNS_PER_ROLL.
* There should be 4*TXNS_PER_ROLL transactions as we rolled 3
* times.
*/
@Test
public void testLoadingWithGaps() throws IOException {
File f1 = new File(TEST_DIR + "/gaptest0");
List<URI> editUris = ImmutableList.of(f1.toURI());
NNStorage storage = setupEdits(editUris, 3);
final long startGapTxId = 1*TXNS_PER_ROLL + 1;
final long endGapTxId = 2*TXNS_PER_ROLL;
File[] files = new File(f1, "current").listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name.startsWith(NNStorage.getFinalizedEditsFileName(startGapTxId,
endGapTxId))) {
return true;
}
return false;
}
});
assertEquals(1, files.length);
assertTrue(files[0].delete());
FSEditLog editlog = getFSEditLog(storage);
editlog.initJournalsForWrite();
long startTxId = 1;
try {
editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL);
fail("Should have thrown exception");
} catch (IOException ioe) {
GenericTestUtils.assertExceptionContains(
"Gap in transactions. Expected to be able to read up until " +
"at least txid 40 but unable to find any edit logs containing " +
"txid 11", ioe);
}
}
/**
* Test that we can read from a byte stream without crashing.
*
*/
static void validateNoCrash(byte garbage[]) throws IOException {
final File TEST_LOG_NAME = new File(TEST_DIR, "test_edit_log");
EditLogFileOutputStream elfos = null;
EditLogFileInputStream elfis = null;
try {
elfos = new EditLogFileOutputStream(getConf(), TEST_LOG_NAME, 0);
elfos.create(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
elfos.writeRaw(garbage, 0, garbage.length);
elfos.setReadyToFlush();
elfos.flushAndSync(true);
elfos.close();
elfos = null;
elfis = new EditLogFileInputStream(TEST_LOG_NAME);
// verify that we can read everything without killing the JVM or
// throwing an exception other than IOException
try {
while (true) {
FSEditLogOp op = elfis.readOp();
if (op == null)
break;
}
} catch (IOException e) {
} catch (Throwable t) {
fail("Caught non-IOException throwable " +
StringUtils.stringifyException(t));
}
} finally {
if ((elfos != null) && (elfos.isOpen()))
elfos.close();
if (elfis != null)
elfis.close();
}
}
static byte[][] invalidSequenecs = null;
/**
* "Fuzz" test for the edit log.
*
* This tests that we can read random garbage from the edit log without
* crashing the JVM or throwing an unchecked exception.
*/
@Test
public void testFuzzSequences() throws IOException {
final int MAX_GARBAGE_LENGTH = 512;
final int MAX_INVALID_SEQ = 5000;
// The seed to use for our random number generator. When given the same
// seed, Java.util.Random will always produce the same sequence of values.
// This is important because it means that the test is deterministic and
// repeatable on any machine.
final int RANDOM_SEED = 123;
Random r = new Random(RANDOM_SEED);
for (int i = 0; i < MAX_INVALID_SEQ; i++) {
byte[] garbage = new byte[r.nextInt(MAX_GARBAGE_LENGTH)];
r.nextBytes(garbage);
validateNoCrash(garbage);
}
}
private static long readAllEdits(Collection<EditLogInputStream> streams,
long startTxId) throws IOException {
FSEditLogOp op;
long nextTxId = startTxId;
long numTx = 0;
for (EditLogInputStream s : streams) {
while (true) {
op = s.readOp();
if (op == null)
break;
if (op.getTransactionId() != nextTxId) {
throw new IOException("out of order transaction ID! expected " +
nextTxId + " but got " + op.getTransactionId() + " when " +
"reading " + s.getName());
}
numTx++;
nextTxId = op.getTransactionId() + 1;
}
}
return numTx;
}
/**
* Test edit log failover. If a single edit log is missing, other
* edits logs should be used instead.
*/
@Test
public void testEditLogFailOverFromMissing() throws IOException {
File f1 = new File(TEST_DIR + "/failover0");
File f2 = new File(TEST_DIR + "/failover1");
List<URI> editUris = ImmutableList.of(f1.toURI(), f2.toURI());
NNStorage storage = setupEdits(editUris, 3);
final long startErrorTxId = 1*TXNS_PER_ROLL + 1;
final long endErrorTxId = 2*TXNS_PER_ROLL;
File[] files = new File(f1, "current").listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name.startsWith(NNStorage.getFinalizedEditsFileName(startErrorTxId,
endErrorTxId))) {
return true;
}
return false;
}
});
assertEquals(1, files.length);
assertTrue(files[0].delete());
FSEditLog editlog = getFSEditLog(storage);
editlog.initJournalsForWrite();
long startTxId = 1;
Collection<EditLogInputStream> streams = null;
try {
streams = editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL);
readAllEdits(streams, startTxId);
} catch (IOException e) {
LOG.error("edit log failover didn't work", e);
fail("Edit log failover didn't work");
} finally {
IOUtils.cleanupWithLogger(null,
streams.toArray(new EditLogInputStream[0]));
}
}
/**
* Test edit log failover from a corrupt edit log
*/
@Test
public void testEditLogFailOverFromCorrupt() throws IOException {
File f1 = new File(TEST_DIR + "/failover0");
File f2 = new File(TEST_DIR + "/failover1");
List<URI> editUris = ImmutableList.of(f1.toURI(), f2.toURI());
NNStorage storage = setupEdits(editUris, 3);
final long startErrorTxId = 1*TXNS_PER_ROLL + 1;
final long endErrorTxId = 2*TXNS_PER_ROLL;
File[] files = new File(f1, "current").listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name.startsWith(NNStorage.getFinalizedEditsFileName(startErrorTxId,
endErrorTxId))) {
return true;
}
return false;
}
});
assertEquals(1, files.length);
long fileLen = files[0].length();
LOG.debug("Corrupting Log File: " + files[0] + " len: " + fileLen);
RandomAccessFile rwf = new RandomAccessFile(files[0], "rw");
rwf.seek(fileLen-4); // seek to checksum bytes
int b = rwf.readInt();
rwf.seek(fileLen-4);
rwf.writeInt(b+1);
rwf.close();
FSEditLog editlog = getFSEditLog(storage);
editlog.initJournalsForWrite();
long startTxId = 1;
Collection<EditLogInputStream> streams = null;
try {
streams = editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL);
readAllEdits(streams, startTxId);
} catch (IOException e) {
LOG.error("edit log failover didn't work", e);
fail("Edit log failover didn't work");
} finally {
IOUtils.cleanupWithLogger(null,
streams.toArray(new EditLogInputStream[0]));
}
}
/**
* Test creating a directory with lots and lots of edit log segments
*/
@Test
public void testManyEditLogSegments() throws IOException {
final int NUM_EDIT_LOG_ROLLS = 1000;
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
for (int i = 0; i < NUM_EDIT_LOG_ROLLS; i++){
editLog.logSetReplication("fakefile" + i, (short)(i % 3));
assertExistsInStorageDirs(
cluster, NameNodeDirType.EDITS,
NNStorage.getInProgressEditsFileName((i * 3) + 1));
editLog.logSync();
editLog.rollEditLog(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
assertExistsInStorageDirs(
cluster, NameNodeDirType.EDITS,
NNStorage.getFinalizedEditsFileName((i * 3) + 1, (i * 3) + 3));
}
editLog.close();
} finally {
if(fileSys != null) fileSys.close();
if(cluster != null) cluster.shutdown();
}
// How long does it take to read through all these edit logs?
long startTime = Time.now();
try {
cluster = new MiniDFSCluster.Builder(conf).
numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
long endTime = Time.now();
double delta = ((float)(endTime - startTime)) / 1000.0;
LOG.info(String.format("loaded %d edit log segments in %.2f seconds",
NUM_EDIT_LOG_ROLLS, delta));
}
/**
* Edit log op instances are cached internally using thread-local storage.
* This test checks that the cached instances are reset in between different
* transactions processed on the same thread, so that we don't accidentally
* apply incorrect attributes to an inode.
*
* @throws IOException if there is an I/O error
*/
@Test
public void testResetThreadLocalCachedOps() throws IOException {
Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
// Set single handler thread, so all transactions hit same thread-local ops.
conf.setInt(DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY, 1);
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
// Create /dir1 with a default ACL.
Path dir1 = new Path("/dir1");
fileSys.mkdirs(dir1);
List<AclEntry> aclSpec = Lists.newArrayList(
aclEntry(DEFAULT, USER, "foo", READ_EXECUTE));
fileSys.modifyAclEntries(dir1, aclSpec);
// /dir1/dir2 is expected to clone the default ACL.
Path dir2 = new Path("/dir1/dir2");
fileSys.mkdirs(dir2);
// /dir1/file1 is expected to clone the default ACL.
Path file1 = new Path("/dir1/file1");
fileSys.create(file1).close();
// /dir3 is not a child of /dir1, so must not clone the default ACL.
Path dir3 = new Path("/dir3");
fileSys.mkdirs(dir3);
// /file2 is not a child of /dir1, so must not clone the default ACL.
Path file2 = new Path("/file2");
fileSys.create(file2).close();
// Restart and assert the above stated expectations.
IOUtils.cleanupWithLogger(LOG, fileSys);
cluster.restartNameNode();
fileSys = cluster.getFileSystem();
assertFalse(fileSys.getAclStatus(dir1).getEntries().isEmpty());
assertFalse(fileSys.getAclStatus(dir2).getEntries().isEmpty());
assertFalse(fileSys.getAclStatus(file1).getEntries().isEmpty());
assertTrue(fileSys.getAclStatus(dir3).getEntries().isEmpty());
assertTrue(fileSys.getAclStatus(file2).getEntries().isEmpty());
} finally {
IOUtils.cleanupWithLogger(LOG, fileSys);
if (cluster != null) {
cluster.shutdown();
}
}
}
class TestAppender extends AppenderSkeleton {
private final List<LoggingEvent> log = new ArrayList<>();
@Override
public boolean requiresLayout() {
return false;
}
@Override
protected void append(final LoggingEvent loggingEvent) {
log.add(loggingEvent);
}
@Override
public void close() {
}
public List<LoggingEvent> getLog() {
return new ArrayList<>(log);
}
}
/**
*
* @throws Exception
*/
@Test
public void testReadActivelyUpdatedLog() throws Exception {
final TestAppender appender = new TestAppender();
LogManager.getRootLogger().addAppender(appender);
Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
// Set single handler thread, so all transactions hit same thread-local ops.
conf.setInt(DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY, 1);
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
FSImage fsimage = cluster.getNamesystem().getFSImage();
StorageDirectory sd = fsimage.getStorage().getStorageDir(0);
final DistributedFileSystem fileSys = cluster.getFileSystem();
DFSInotifyEventInputStream events = fileSys.getInotifyEventStream();
fileSys.mkdirs(new Path("/test"));
fileSys.mkdirs(new Path("/test/dir1"));
fileSys.delete(new Path("/test/dir1"), true);
fsimage.getEditLog().logSync();
fileSys.mkdirs(new Path("/test/dir2"));
final File inProgressEdit = NNStorage.getInProgressEditsFile(sd, 1);
assertTrue(inProgressEdit.exists());
EditLogFileInputStream elis = new EditLogFileInputStream(inProgressEdit);
FSEditLogOp op;
long pos = 0;
while (true) {
op = elis.readOp();
if (op != null && op.opCode != FSEditLogOpCodes.OP_INVALID) {
pos = elis.getPosition();
} else {
break;
}
}
elis.close();
assertTrue(pos > 0);
RandomAccessFile rwf = new RandomAccessFile(inProgressEdit, "rw");
rwf.seek(pos);
assertEquals(rwf.readByte(), (byte) -1);
rwf.seek(pos + 1);
rwf.writeByte(2);
rwf.close();
events.poll();
String pattern = "Caught exception after reading (.*) ops";
Pattern r = Pattern.compile(pattern);
final List<LoggingEvent> log = appender.getLog();
for (LoggingEvent event : log) {
Matcher m = r.matcher(event.getRenderedMessage());
if (m.find()) {
fail("Should not try to read past latest syned edit log op");
}
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
LogManager.getRootLogger().removeAppender(appender);
}
}
/**
* Test edits can be writen and read without ErasureCoding supported.
*/
@Test
public void testEditLogWithoutErasureCodingSupported()
throws IOException {
Configuration conf = getConf();
MiniDFSCluster cluster = null;
// ERASURECODING not supported
int logVersion = -61;
assertFalse(NameNodeLayoutVersion.supports(
NameNodeLayoutVersion.Feature.ERASURE_CODING, logVersion));
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
FileSystem fileSys = cluster.getFileSystem();
final FSEditLog editLog = fsimage.getEditLog();
editLog.editLogStream.setCurrentLogVersion(logVersion);
// Write new version edit log
long txid = editLog.rollEditLog(logVersion);
String testDir = "/test";
String testFile = "testfile_001";
String testFilePath = testDir + "/" + testFile;
fileSys.mkdirs(new Path(testDir), new FsPermission("755"));
// Create a file
Path p = new Path(testFilePath);
DFSTestUtil.createFile(fileSys, p, 0, (short) 1, 1);
long blkId = 1;
long blkNumBytes = 1024;
long timestamp = 1426222918;
// Add a block to the file
BlockInfoContiguous blockInfo =
new BlockInfoContiguous(
new Block(blkId, blkNumBytes, timestamp),
(short)1);
INodeFile file
= (INodeFile)namesystem.getFSDirectory().getINode(testFilePath);
file.addBlock(blockInfo);
file.toUnderConstruction("testClient", "testMachine");
// Write edit log
editLog.logAddBlock(testFilePath, file);
editLog.rollEditLog(logVersion);
// Read edit log
Collection<EditLogInputStream> editStreams
= editLog.selectInputStreams(txid, txid + 1);
EditLogInputStream inputStream = null;
for (EditLogInputStream s : editStreams) {
if (s.getFirstTxId() == txid) {
inputStream = s;
break;
}
}
assertNotNull(inputStream);
int readLogVersion = inputStream.getVersion(false);
assertEquals(logVersion, readLogVersion);
FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
long records = loader.loadFSEdits(inputStream, txid);
assertTrue(records > 0);
editLog.close();
cluster.shutdown();
}
}