blob: dab82144f04a0ce2d96bb96fd33500587c92503d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.DroppedSnapshotException;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALProvider.Writer;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.mockito.Mockito;
import org.mockito.exceptions.verification.WantedButNotInvoked;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Testing sync/append failures.
* Copied from TestHRegion.
*/
@Category({SmallTests.class})
public class TestFailedAppendAndSync {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestFailedAppendAndSync.class);
private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class);
@Rule public TestName name = new TestName();
private static final String COLUMN_FAMILY = "MyCF";
private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
HRegion region = null;
// Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack)
private static HBaseTestingUtility TEST_UTIL;
public static Configuration CONF ;
private String dir;
// Test names
protected TableName tableName;
@Before
public void setup() throws IOException {
TEST_UTIL = new HBaseTestingUtility();
CONF = TEST_UTIL.getConfiguration();
// Disable block cache.
CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
tableName = TableName.valueOf(name.getMethodName());
}
@After
public void tearDown() throws Exception {
EnvironmentEdgeManagerTestHelper.reset();
LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
TEST_UTIL.cleanupTestDir();
}
String getName() {
return name.getMethodName();
}
// Dodgy WAL. Will throw exceptions when flags set.
class DodgyFSLog extends FSHLog {
volatile boolean throwSyncException = false;
volatile boolean throwAppendException = false;
volatile boolean throwArchiveException = false;
final AtomicLong rolls = new AtomicLong(0);
public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf)
throws IOException {
super(fs, server, root, logDir, conf);
}
@Override
public Map<byte[], List<byte[]>> rollWriter(boolean force)
throws FailedLogCloseException, IOException {
Map<byte[], List<byte[]>> regions = super.rollWriter(force);
rolls.getAndIncrement();
return regions;
}
@Override
protected void archiveLogFile(Path p) throws IOException {
if (throwArchiveException) {
throw new IOException("throw archival exception");
}
}
@Override
protected void archive(Pair<Path, Long> localLogsToArchive) {
super.archive(localLogsToArchive);
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync(boolean forceSync) throws IOException {
if (throwSyncException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.sync(forceSync);
}
@Override
public void append(Entry entry) throws IOException {
if (throwAppendException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
@Override
public long getSyncedLength() {
return w.getSyncedLength();
}
};
}
}
/**
* Reproduce locking up that happens when we get an exceptions appending and syncing.
* See HBASE-14317.
* First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test
public void testLockupAroundBadAssignSync() throws IOException {
// Make up mocked server and services.
RegionServerServices services = mock(RegionServerServices.class);
when(services.getConfiguration()).thenReturn(CONF);
when(services.isStopped()).thenReturn(false);
when(services.isAborted()).thenReturn(false);
// OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server)services, rootDir, getName(), CONF);
dodgyWAL.init();
LogRoller logRoller = new LogRoller(services);
logRoller.addWAL(dodgyWAL);
logRoller.start();
boolean threwOnSync = false;
boolean threwOnAppend = false;
boolean threwOnBoth = false;
HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
try {
// Get some random bytes.
byte[] value = Bytes.toBytes(getName());
try {
// First get something into memstore
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
region.put(put);
} catch (IOException ioe) {
fail();
}
long rollsCount = dodgyWAL.rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = false;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
region.put(put);
} catch (IOException ioe) {
threwOnAppend = true;
}
while (rollsCount == dodgyWAL.rolls.get()) {
Threads.sleep(100);
}
rollsCount = dodgyWAL.rolls.get();
// When we get to here.. we should be ok. A new WAL has been put in place. There were no
// appends to sync. We should be able to continue.
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
region.put(put);
} catch (IOException ioe) {
threwOnBoth = true;
}
while (rollsCount == dodgyWAL.rolls.get()) {
Threads.sleep(100);
}
// Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
// to just continue.
// So, should be no abort at this stage. Verify.
Mockito.verify(services, Mockito.atLeast(0)).abort(Mockito.anyString(),
Mockito.any(Throwable.class));
try {
dodgyWAL.throwAppendException = false;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
region.put(put);
} catch (IOException ioe) {
threwOnSync = true;
}
// An append in the WAL but the sync failed is a server abort condition. That is our
// current semantic. Verify. It takes a while for abort to be called. Just hang here till it
// happens. If it don't we'll timeout the whole test. That is fine.
while (true) {
try {
Mockito.verify(services, Mockito.atLeast(1)).abort(Mockito.anyString(),
Mockito.any(Throwable.class));
break;
} catch (WantedButNotInvoked t) {
Threads.sleep(1);
}
}
try {
dodgyWAL.throwAppendException = false;
dodgyWAL.throwSyncException = false;
dodgyWAL.throwArchiveException = true;
Pair<Path, Long> pair = new Pair<Path, Long>();
pair.setFirst(new Path("/a/b/"));
pair.setSecond(100L);
dodgyWAL.archive(pair);
} catch (Throwable ioe) {
}
while (true) {
try {
// one more abort needs to be called
Mockito.verify(services, Mockito.atLeast(2)).abort(Mockito.anyString(),
(Throwable) Mockito.anyObject());
break;
} catch (WantedButNotInvoked t) {
Threads.sleep(1);
}
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(services.isStopped()).thenReturn(true);
if (logRoller != null) logRoller.close();
if (region != null) {
try {
region.close(true);
} catch (DroppedSnapshotException e) {
LOG.info("On way out; expected!", e);
}
}
if (dodgyWAL != null) dodgyWAL.close();
assertTrue("The regionserver should have thrown an exception", threwOnBoth);
assertTrue("The regionserver should have thrown an exception", threwOnAppend);
assertTrue("The regionserver should have thrown an exception", threwOnSync);
}
}
/**
* @return A region on which you must call
* {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
*/
public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
Configuration conf, WAL wal) throws IOException {
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false,
Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
}
}