blob: 9b5c523ee4da2b3b76d196ff42580aee14fe7ec1 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
import org.apache.hadoop.hbase.wal.FSHLogProvider;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hbase.wal.WALProvider;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This testcase is used to ensure that the compaction marker will fail a compaction if the RS is
* already dead. It can not eliminate FNFE when scanning but it does reduce the possibility a lot.
*/
@RunWith(Parameterized.class)
@Category({ RegionServerTests.class, LargeTests.class })
public class TestCompactionInDeadRegionServer {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestCompactionInDeadRegionServer.class);
private static final Logger LOG = LoggerFactory.getLogger(TestCompactionInDeadRegionServer.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static final TableName TABLE_NAME = TableName.valueOf("test");
private static final byte[] CF = Bytes.toBytes("cf");
private static final byte[] CQ = Bytes.toBytes("cq");
public static final class IgnoreYouAreDeadRS extends HRegionServer {
public IgnoreYouAreDeadRS(Configuration conf) throws IOException, InterruptedException {
super(conf);
}
@Override
protected boolean tryRegionServerReport(long reportStartTime, long reportEndTime)
throws IOException {
try {
super.tryRegionServerReport(reportStartTime, reportEndTime);
} catch (YouAreDeadException e) {
// ignore, do not abort
}
return true;
}
}
@Parameter
public Class<? extends WALProvider> walProvider;
@Parameters(name = "{index}: wal={0}")
public static List<Object[]> params() {
return Arrays.asList(new Object[] { FSHLogProvider.class },
new Object[] { AsyncFSWALProvider.class });
}
@Before
public void setUp() throws Exception {
UTIL.getConfiguration().setClass(WALFactory.WAL_PROVIDER, walProvider, WALProvider.class);
UTIL.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 2000);
UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, IgnoreYouAreDeadRS.class,
HRegionServer.class);
UTIL.startMiniCluster(2);
Table table = UTIL.createTable(TABLE_NAME, CF);
for (int i = 0; i < 10; i++) {
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
}
UTIL.getAdmin().flush(TABLE_NAME);
for (int i = 10; i < 20; i++) {
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
}
UTIL.getAdmin().flush(TABLE_NAME);
}
@After
public void tearDown() throws Exception {
UTIL.shutdownMiniCluster();
}
@Test
public void test() throws Exception {
HRegionServer regionSvr = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
HRegion region = regionSvr.getRegions(TABLE_NAME).get(0);
String regName = region.getRegionInfo().getEncodedName();
List<HRegion> metaRegs = regionSvr.getRegions(TableName.META_TABLE_NAME);
if (metaRegs != null && !metaRegs.isEmpty()) {
LOG.info("meta is on the same server: " + regionSvr);
// when region is on same server as hbase:meta, reassigning meta would abort the server
// since WAL is broken.
// so the region is moved to a different server
HRegionServer otherRs = UTIL.getOtherRegionServer(regionSvr);
UTIL.moveRegionAndWait(region.getRegionInfo(), otherRs.getServerName());
LOG.info("Moved region: " + regName + " to " + otherRs.getServerName());
}
HRegionServer rsToSuspend = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
region = rsToSuspend.getRegions(TABLE_NAME).get(0);
ZKWatcher watcher = UTIL.getZooKeeperWatcher();
watcher.getRecoverableZooKeeper().delete(
ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, rsToSuspend.getServerName().toString()),
-1);
LOG.info("suspending " + rsToSuspend);
UTIL.waitFor(60000, 1000, new ExplainingPredicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
for (RegionServerThread thread : UTIL.getHBaseCluster().getRegionServerThreads()) {
HRegionServer rs = thread.getRegionServer();
if (rs != rsToSuspend) {
return !rs.getRegions(TABLE_NAME).isEmpty();
}
}
return false;
}
@Override
public String explainFailure() throws Exception {
return "The region for " + TABLE_NAME + " is still on " + rsToSuspend.getServerName();
}
});
try {
region.compact(true);
fail("Should fail as our wal file has already been closed, " +
"and walDir has also been renamed");
} catch (Exception e) {
LOG.debug("expected exception: ", e);
}
Table table = UTIL.getConnection().getTable(TABLE_NAME);
// should not hit FNFE
for (int i = 0; i < 20; i++) {
assertEquals(i, Bytes.toInt(table.get(new Get(Bytes.toBytes(i))).getValue(CF, CQ)));
}
}
}