| /** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.hbase.util; |
| |
| import org.apache.commons.io.IOUtils; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hbase.HColumnDescriptor; |
| import org.apache.hadoop.hbase.HConstants; |
| import org.apache.hadoop.hbase.HRegionInfo; |
| import org.apache.hadoop.hbase.HRegionLocation; |
| import org.apache.hadoop.hbase.HTableDescriptor; |
| import org.apache.hadoop.hbase.MetaTableAccessor; |
| import org.apache.hadoop.hbase.MiniHBaseCluster; |
| import org.apache.hadoop.hbase.ServerName; |
| import org.apache.hadoop.hbase.TableName; |
| import org.apache.hadoop.hbase.client.ClusterConnection; |
| import org.apache.hadoop.hbase.client.Connection; |
| import org.apache.hadoop.hbase.client.ConnectionFactory; |
| import org.apache.hadoop.hbase.client.Delete; |
| import org.apache.hadoop.hbase.client.Get; |
| import org.apache.hadoop.hbase.client.Put; |
| import org.apache.hadoop.hbase.client.RegionLocator; |
| import org.apache.hadoop.hbase.client.Result; |
| import org.apache.hadoop.hbase.client.Table; |
| import org.apache.hadoop.hbase.client.replication.ReplicationAdmin; |
| import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; |
| import org.apache.hadoop.hbase.io.hfile.TestHFile; |
| import org.apache.hadoop.hbase.master.AssignmentManager; |
| import org.apache.hadoop.hbase.master.RegionState; |
| import org.apache.hadoop.hbase.master.RegionStates; |
| import org.apache.hadoop.hbase.master.TableLockManager; |
| import org.apache.hadoop.hbase.regionserver.HRegion; |
| import org.apache.hadoop.hbase.regionserver.HRegionServer; |
| import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl; |
| import org.apache.hadoop.hbase.regionserver.SplitTransactionFactory; |
| import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; |
| import org.apache.hadoop.hbase.replication.ReplicationFactory; |
| import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; |
| import org.apache.hadoop.hbase.replication.ReplicationQueues; |
| import org.apache.hadoop.hbase.replication.ReplicationQueuesArguments; |
| import org.apache.hadoop.hbase.testclassification.LargeTests; |
| import org.apache.hadoop.hbase.testclassification.MiscTests; |
| import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; |
| import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; |
| import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; |
| import org.junit.AfterClass; |
| import org.junit.Assert; |
| import org.junit.Before; |
| import org.junit.BeforeClass; |
| import org.junit.Ignore; |
| import org.junit.Test; |
| import org.junit.experimental.categories.Category; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.concurrent.Callable; |
| import java.util.concurrent.CountDownLatch; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Executors; |
| import java.util.concurrent.Future; |
| import java.util.concurrent.ScheduledThreadPoolExecutor; |
| import java.util.concurrent.SynchronousQueue; |
| import java.util.concurrent.ThreadPoolExecutor; |
| import java.util.concurrent.TimeUnit; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| |
| import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*; |
| import static org.junit.Assert.*; |
| |
| @Category({MiscTests.class, LargeTests.class}) |
| public class TestHBaseFsckOneRS extends BaseTestHBaseFsck { |
| |
| @BeforeClass |
| public static void setUpBeforeClass() throws Exception { |
| TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, |
| MasterSyncObserver.class.getName()); |
| |
| conf.setInt("hbase.regionserver.handler.count", 2); |
| conf.setInt("hbase.regionserver.metahandler.count", 30); |
| |
| conf.setInt("hbase.htable.threads.max", POOL_SIZE); |
| conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE); |
| conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT); |
| conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT); |
| TEST_UTIL.startMiniCluster(1); |
| |
| tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS, |
| new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck")); |
| |
| hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE); |
| |
| AssignmentManager assignmentManager = |
| TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); |
| regionStates = assignmentManager.getRegionStates(); |
| |
| connection = (ClusterConnection) TEST_UTIL.getConnection(); |
| |
| admin = connection.getAdmin(); |
| admin.setBalancerRunning(false, true); |
| |
| TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME); |
| TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME); |
| } |
| |
| @AfterClass |
| public static void tearDownAfterClass() throws Exception { |
| tableExecutorService.shutdown(); |
| hbfsckExecutorService.shutdown(); |
| admin.close(); |
| TEST_UTIL.shutdownMiniCluster(); |
| } |
| |
| @Before |
| public void setUp() { |
| EnvironmentEdgeManager.reset(); |
| } |
| |
| |
| /** |
| * This creates a clean table and confirms that the table is clean. |
| */ |
| @Test(timeout=180000) |
| public void testHBaseFsckClean() throws Exception { |
| assertNoErrors(doFsck(conf, false)); |
| TableName table = TableName.valueOf("tableClean"); |
| try { |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // We created 1 table, should be fine |
| hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * Test thread pooling in the case where there are more regions than threads |
| */ |
| @Test (timeout=180000) |
| public void testHbckThreadpooling() throws Exception { |
| TableName table = |
| TableName.valueOf("tableDupeStartKey"); |
| try { |
| // Create table with 4 regions |
| setupTable(table); |
| |
| // limit number of threads to 1. |
| Configuration newconf = new Configuration(conf); |
| newconf.setInt("hbasefsck.numthreads", 1); |
| assertNoErrors(doFsck(newconf, false)); |
| |
| // We should pass without triggering a RejectedExecutionException |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| @Test (timeout=180000) |
| public void testTableWithNoRegions() throws Exception { |
| // We might end up with empty regions in a table |
| // see also testNoHdfsTable() |
| TableName table = |
| TableName.valueOf(name.getMethodName()); |
| try { |
| // create table with one region |
| HTableDescriptor desc = new HTableDescriptor(table); |
| HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); |
| desc.addFamily(hcd); // If a table has no CF's it doesn't get checked |
| createTable(TEST_UTIL, desc, null); |
| tbl = connection.getTable(table, tableExecutorService); |
| |
| // Mess it up by leaving a hole in the assignment, meta, and hdfs data |
| deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, |
| HConstants.EMPTY_END_ROW, false, false, true); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); |
| |
| doFsck(conf, true); |
| |
| // fix hole |
| doFsck(conf, true); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| @Test (timeout=180000) |
| public void testHbckFixOrphanTable() throws Exception { |
| TableName table = TableName.valueOf("tableInfo"); |
| FileSystem fs = null; |
| Path tableinfo = null; |
| try { |
| setupTable(table); |
| |
| Path hbaseTableDir = FSUtils.getTableDir( |
| FSUtils.getRootDir(conf), table); |
| fs = hbaseTableDir.getFileSystem(conf); |
| FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); |
| tableinfo = status.getPath(); |
| fs.rename(tableinfo, new Path("/.tableinfo")); |
| |
| //to report error if .tableinfo is missing. |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE }); |
| |
| // fix OrphanTable with default .tableinfo (htd not yet cached on master) |
| hbck = doFsck(conf, true); |
| assertNoErrors(hbck); |
| status = null; |
| status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); |
| assertNotNull(status); |
| |
| HTableDescriptor htd = admin.getTableDescriptor(table); |
| htd.setValue("NOT_DEFAULT", "true"); |
| admin.disableTable(table); |
| admin.modifyTable(table, htd); |
| admin.enableTable(table); |
| fs.delete(status.getPath(), true); |
| |
| // fix OrphanTable with cache |
| htd = admin.getTableDescriptor(table); // warms up cached htd on master |
| hbck = doFsck(conf, true); |
| assertNoErrors(hbck); |
| status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); |
| assertNotNull(status); |
| htd = admin.getTableDescriptor(table); |
| assertEquals(htd.getValue("NOT_DEFAULT"), "true"); |
| } finally { |
| if (fs != null) { |
| fs.rename(new Path("/.tableinfo"), tableinfo); |
| } |
| cleanupTable(table); |
| } |
| } |
| |
| @Test (timeout=180000) |
| public void testReadOnlyProperty() throws Exception { |
| HBaseFsck hbck = doFsck(conf, false); |
| Assert.assertEquals("shouldIgnorePreCheckPermission", true, |
| hbck.shouldIgnorePreCheckPermission()); |
| |
| hbck = doFsck(conf, true); |
| Assert.assertEquals("shouldIgnorePreCheckPermission", false, |
| hbck.shouldIgnorePreCheckPermission()); |
| |
| hbck = doFsck(conf, true); |
| hbck.setIgnorePreCheckPermission(true); |
| Assert.assertEquals("shouldIgnorePreCheckPermission", true, |
| hbck.shouldIgnorePreCheckPermission()); |
| } |
| |
| /** |
| * This creates and fixes a bad table where a region is completely contained |
| * by another region, and there is a hole (sort of like a bad split) |
| */ |
| @Test (timeout=180000) |
| public void testOverlapAndOrphan() throws Exception { |
| TableName table = |
| TableName.valueOf("tableOverlapAndOrphan"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by creating an overlap in the metadata |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, |
| true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); |
| admin.enableTable(table); |
| |
| HRegionInfo hriOverlap = |
| createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); |
| TEST_UTIL.assignRegion(hriOverlap); |
| |
| ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); |
| TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // fix the problem. |
| doFsck(conf, true); |
| |
| // verify that overlaps are fixed |
| HBaseFsck hbck2 = doFsck(conf,false); |
| assertNoErrors(hbck2); |
| assertEquals(0, hbck2.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table where a region overlaps two regions -- |
| * a start key contained in another region and its end key is contained in |
| * yet another region. |
| */ |
| @Test (timeout=180000) |
| public void testCoveredStartKey() throws Exception { |
| TableName table = |
| TableName.valueOf("tableCoveredStartKey"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by creating an overlap in the metadata |
| HRegionInfo hriOverlap = |
| createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2")); |
| TEST_UTIL.assignRegion(hriOverlap); |
| |
| ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); |
| TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN, |
| HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); |
| assertEquals(3, hbck.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // fix the problem. |
| doFsck(conf, true); |
| |
| // verify that overlaps are fixed |
| HBaseFsck hbck2 = doFsck(conf, false); |
| assertErrors(hbck2, new HBaseFsck.ErrorReporter.ERROR_CODE[0]); |
| assertEquals(0, hbck2.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with a missing region -- hole in meta |
| * and data missing in the fs. |
| */ |
| @Test (timeout=180000) |
| public void testRegionHole() throws Exception { |
| TableName table = |
| TableName.valueOf("tableRegionHole"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the assignment, meta, and hdfs data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, |
| true, true); |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| doFsck(conf, true); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf,false)); |
| assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * The region is not deployed when the table is disabled. |
| */ |
| @Test (timeout=180000) |
| public void testRegionShouldNotBeDeployed() throws Exception { |
| TableName table = |
| TableName.valueOf("tableRegionShouldNotBeDeployed"); |
| try { |
| LOG.info("Starting testRegionShouldNotBeDeployed."); |
| MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); |
| assertTrue(cluster.waitForActiveAndReadyMaster()); |
| |
| |
| byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"), |
| Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") }; |
| HTableDescriptor htdDisabled = new HTableDescriptor(table); |
| htdDisabled.addFamily(new HColumnDescriptor(FAM)); |
| |
| // Write the .tableinfo |
| FSTableDescriptors fstd = new FSTableDescriptors(conf); |
| fstd.createTableDescriptor(htdDisabled); |
| List<HRegionInfo> disabledRegions = |
| TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS); |
| |
| // Let's just assign everything to first RS |
| HRegionServer hrs = cluster.getRegionServer(0); |
| |
| // Create region files. |
| admin.disableTable(table); |
| admin.enableTable(table); |
| |
| // Disable the table and close its regions |
| admin.disableTable(table); |
| HRegionInfo region = disabledRegions.remove(0); |
| byte[] regionName = region.getRegionName(); |
| |
| // The region should not be assigned currently |
| assertTrue(cluster.getServerWith(regionName) == -1); |
| |
| // Directly open a region on a region server. |
| // If going through AM/ZK, the region won't be open. |
| // Even it is opened, AM will close it which causes |
| // flakiness of this test. |
| HRegion r = HRegion.openHRegion( |
| region, htdDisabled, hrs.getWAL(region), conf); |
| hrs.addToOnlineRegions(r); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED }); |
| |
| // fix this fault |
| doFsck(conf, true); |
| |
| // check result |
| assertNoErrors(doFsck(conf, false)); |
| } finally { |
| admin.enableTable(table); |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This test makes sure that parallel instances of Hbck is disabled. |
| * |
| * @throws Exception |
| */ |
| @Test(timeout=180000) |
| public void testParallelHbck() throws Exception { |
| final ExecutorService service; |
| final Future<HBaseFsck> hbck1,hbck2; |
| |
| class RunHbck implements Callable<HBaseFsck> { |
| boolean fail = true; |
| @Override |
| public HBaseFsck call(){ |
| Configuration c = new Configuration(conf); |
| c.setInt("hbase.hbck.lockfile.attempts", 1); |
| // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry. |
| // To avoid flakiness of the test, set low max wait time. |
| c.setInt("hbase.hbck.lockfile.maxwaittime", 3); |
| try{ |
| return doFsck(c, true); // Exclusive hbck only when fixing |
| } catch(Exception e){ |
| if (e.getMessage().contains("Duplicate hbck")) { |
| fail = false; |
| } |
| } |
| // If we reach here, then an exception was caught |
| if (fail) fail(); |
| return null; |
| } |
| } |
| service = Executors.newFixedThreadPool(2); |
| hbck1 = service.submit(new RunHbck()); |
| hbck2 = service.submit(new RunHbck()); |
| service.shutdown(); |
| //wait for 15 seconds, for both hbck calls finish |
| service.awaitTermination(15, TimeUnit.SECONDS); |
| HBaseFsck h1 = hbck1.get(); |
| HBaseFsck h2 = hbck2.get(); |
| // Make sure only one of the calls was successful |
| assert(h1 == null || h2 == null); |
| if (h1 != null) { |
| assert(h1.getRetCode() >= 0); |
| } |
| if (h2 != null) { |
| assert(h2.getRetCode() >= 0); |
| } |
| } |
| |
| /** |
| * This test makes sure that with enough retries both parallel instances |
| * of hbck will be completed successfully. |
| * |
| * @throws Exception |
| */ |
| @Test (timeout=180000) |
| public void testParallelWithRetriesHbck() throws Exception { |
| final ExecutorService service; |
| final Future<HBaseFsck> hbck1,hbck2; |
| |
| // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and |
| // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out. |
| // |
| // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would |
| // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more |
| // details. |
| final int timeoutInSeconds = 80; |
| final int sleepIntervalInMilliseconds = 200; |
| final int maxSleepTimeInMilliseconds = 6000; |
| final int maxRetryAttempts = 15; |
| |
| class RunHbck implements Callable<HBaseFsck>{ |
| |
| @Override |
| public HBaseFsck call() throws Exception { |
| // Increase retry attempts to make sure the non-active hbck doesn't get starved |
| Configuration c = new Configuration(conf); |
| c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds); |
| c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds); |
| c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds); |
| c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts); |
| return doFsck(c, false); |
| } |
| } |
| |
| service = Executors.newFixedThreadPool(2); |
| hbck1 = service.submit(new RunHbck()); |
| hbck2 = service.submit(new RunHbck()); |
| service.shutdown(); |
| //wait for some time, for both hbck calls finish |
| service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS); |
| HBaseFsck h1 = hbck1.get(); |
| HBaseFsck h2 = hbck2.get(); |
| // Both should be successful |
| assertNotNull(h1); |
| assertNotNull(h2); |
| assert(h1.getRetCode() >= 0); |
| assert(h2.getRetCode() >= 0); |
| |
| } |
| |
| @Test (timeout = 180000) |
| public void testRegionBoundariesCheck() throws Exception { |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); // no errors |
| try { |
| hbck.connect(); // need connection to have access to META |
| hbck.checkRegionBoundaries(); |
| } catch (IllegalArgumentException e) { |
| if (e.getMessage().endsWith("not a valid DFS filename.")) { |
| fail("Table directory path is not valid." + e.getMessage()); |
| } |
| } finally { |
| hbck.close(); |
| } |
| } |
| |
| /** |
| * test region boundaries and make sure store file had been created. |
| * @throws Exception |
| */ |
| @Test(timeout = 180000) |
| public void testRegionBoundariesCheckWithFlushTable() throws Exception { |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); // no errors |
| TableName table = TableName.valueOf("testRegionBoundariesCheckWithFlushTable"); |
| try { |
| setupTable(table); |
| admin.flush(table); |
| hbck.connect(); // need connection to have access to META |
| hbck.checkRegionBoundaries(); |
| assertNoErrors(hbck); // no errors |
| } catch (IllegalArgumentException e) { |
| if (e.getMessage().endsWith("not a valid DFS filename.")) { |
| fail("Table directory path is not valid." + e.getMessage()); |
| } |
| } finally { |
| hbck.close(); |
| } |
| } |
| |
| @Test (timeout=180000) |
| public void testHbckAfterRegionMerge() throws Exception { |
| TableName table = TableName.valueOf("testMergeRegionFilesInHdfs"); |
| Table meta = null; |
| try { |
| // disable CatalogJanitor |
| TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo(); |
| HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo(); |
| |
| int regionCountBeforeMerge = rl.getAllRegionLocations().size(); |
| |
| assertNotEquals(region1, region2); |
| |
| // do a region merge |
| admin.mergeRegionsAsync( |
| region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false); |
| |
| // wait until region merged |
| long timeout = System.currentTimeMillis() + 30 * 1000; |
| while (true) { |
| if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) { |
| break; |
| } else if (System.currentTimeMillis() > timeout) { |
| fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2 |
| .getEncodedName() + " be merged"); |
| } |
| Thread.sleep(10); |
| } |
| |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); // no errors |
| } |
| |
| } finally { |
| TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); |
| cleanupTable(table); |
| IOUtils.closeQuietly(meta); |
| } |
| } |
| /** |
| * This creates entries in hbase:meta with no hdfs data. This should cleanly |
| * remove the table. |
| */ |
| @Test (timeout=180000) |
| public void testNoHdfsTable() throws Exception { |
| TableName table = TableName.valueOf("NoHdfsTable"); |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| // Mess it up by deleting hdfs dirs |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), |
| Bytes.toBytes("A"), false, false, true); // don't rm meta |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), |
| Bytes.toBytes("B"), false, false, true); // don't rm meta |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), |
| Bytes.toBytes("C"), false, false, true); // don't rm meta |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), |
| Bytes.toBytes(""), false, false, true); // don't rm meta |
| |
| // also remove the table directory in hdfs |
| deleteTableDir(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| doFsck(conf, true); // detect dangling regions and remove those |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf,false)); |
| assertFalse("Table " + table + " should have been deleted", admin.tableExists(table)); |
| } |
| |
| /** |
| * when the hbase.version file missing, It is fix the fault. |
| */ |
| @Test (timeout=180000) |
| public void testNoVersionFile() throws Exception { |
| // delete the hbase.version file |
| Path rootDir = FSUtils.getRootDir(conf); |
| FileSystem fs = rootDir.getFileSystem(conf); |
| Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME); |
| fs.delete(versionFile, true); |
| |
| // test |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE }); |
| // fix hbase.version missing |
| doFsck(conf, true); |
| |
| // no version file fixed |
| assertNoErrors(doFsck(conf, false)); |
| } |
| |
| @Test (timeout=180000) |
| public void testNoTableState() throws Exception { |
| // delete the hbase.version file |
| TableName table = |
| TableName.valueOf("testNoTableState"); |
| try { |
| setupTable(table); |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| MetaTableAccessor.deleteTableState(TEST_UTIL.getConnection(), table); |
| |
| // test |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE }); |
| // fix table state missing |
| doFsck(conf, true); |
| |
| assertNoErrors(doFsck(conf, false)); |
| assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(table)); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates two tables and mess both of them and fix them one by one |
| */ |
| @Test (timeout=180000) |
| public void testFixByTable() throws Exception { |
| TableName table1 = |
| TableName.valueOf("testFixByTable1"); |
| TableName table2 = |
| TableName.valueOf("testFixByTable2"); |
| try { |
| setupTable(table1); |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table1); |
| // Mess them up by leaving a hole in the hdfs data |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), |
| Bytes.toBytes("C"), false, false, true); // don't rm meta |
| |
| setupTable(table2); |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table2); |
| // Mess them up by leaving a hole in the hdfs data |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, |
| false, true); // don't rm meta |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); |
| |
| // fix hole in table 1 |
| doFsck(conf, true, table1); |
| // check that hole in table 1 fixed |
| assertNoErrors(doFsck(conf, false, table1)); |
| // check that hole in table 2 still there |
| assertErrors(doFsck(conf, false, table2), new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); |
| |
| // fix hole in table 2 |
| doFsck(conf, true, table2); |
| // check that hole in both tables fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length - 2, countRows()); |
| } finally { |
| cleanupTable(table1); |
| cleanupTable(table2); |
| } |
| } |
| /** |
| * A split parent in meta, in hdfs, and not deployed |
| */ |
| @Test (timeout=180000) |
| public void testLingeringSplitParent() throws Exception { |
| TableName table = |
| TableName.valueOf("testLingeringSplitParent"); |
| Table meta = null; |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| HRegionLocation location; |
| try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { |
| location = rl.getRegionLocation(Bytes.toBytes("B")); |
| } |
| |
| // Delete one region from meta, but not hdfs, unassign it. |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), |
| Bytes.toBytes("C"), true, true, false); |
| |
| // Create a new meta entry to fake it as a split parent. |
| meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); |
| HRegionInfo hri = location.getRegionInfo(); |
| |
| HRegionInfo a = new HRegionInfo(tbl.getName(), |
| Bytes.toBytes("B"), Bytes.toBytes("BM")); |
| HRegionInfo b = new HRegionInfo(tbl.getName(), |
| Bytes.toBytes("BM"), Bytes.toBytes("C")); |
| |
| hri.setOffline(true); |
| hri.setSplit(true); |
| |
| MetaTableAccessor.addRegionToMeta(meta, hri, a, b); |
| meta.close(); |
| admin.flush(TableName.META_TABLE_NAME); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); |
| |
| // regular repair cannot fix lingering split parent |
| hbck = doFsck(conf, true); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| assertFalse(hbck.shouldRerun()); |
| hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); |
| |
| // fix lingering split parent |
| hbck = new HBaseFsck(conf, hbfsckExecutorService); |
| hbck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| hbck.setTimeLag(0); |
| hbck.setFixSplitParents(true); |
| hbck.onlineHbck(); |
| assertTrue(hbck.shouldRerun()); |
| hbck.close(); |
| |
| Get get = new Get(hri.getRegionName()); |
| Result result = meta.get(get); |
| assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, |
| HConstants.SPLITA_QUALIFIER).isEmpty()); |
| assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, |
| HConstants.SPLITB_QUALIFIER).isEmpty()); |
| admin.flush(TableName.META_TABLE_NAME); |
| |
| // fix other issues |
| doFsck(conf, true); |
| |
| // check that all are fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| IOUtils.closeQuietly(meta); |
| } |
| } |
| |
| /** |
| * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for |
| * valid cases where the daughters are there. |
| */ |
| @Test (timeout=180000) |
| public void testValidLingeringSplitParent() throws Exception { |
| TableName table = |
| TableName.valueOf("testLingeringSplitParent"); |
| Table meta = null; |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { |
| HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B")); |
| |
| meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); |
| HRegionInfo hri = location.getRegionInfo(); |
| |
| // do a regular split |
| byte[] regionName = location.getRegionInfo().getRegionName(); |
| admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); |
| TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true); |
| |
| // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on |
| // for some time until children references are deleted. HBCK erroneously sees this as |
| // overlapping regions |
| HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, |
| false, false, null); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported |
| |
| // assert that the split hbase:meta entry is still there. |
| Get get = new Get(hri.getRegionName()); |
| Result result = meta.get(get); |
| assertNotNull(result); |
| assertNotNull(MetaTableAccessor.getHRegionInfo(result)); |
| |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // assert that we still have the split regions |
| assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions |
| // pre-split. |
| assertNoErrors(doFsck(conf, false)); |
| } |
| } finally { |
| cleanupTable(table); |
| IOUtils.closeQuietly(meta); |
| } |
| } |
| |
| /** |
| * Split crashed after write to hbase:meta finished for the parent region, but |
| * failed to write daughters (pre HBASE-7721 codebase) |
| */ |
| @Test(timeout=75000) |
| public void testSplitDaughtersNotInMeta() throws Exception { |
| TableName table = TableName.valueOf("testSplitdaughtersNotInMeta"); |
| Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { |
| HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B")); |
| |
| HRegionInfo hri = location.getRegionInfo(); |
| |
| // Disable CatalogJanitor to prevent it from cleaning up the parent region |
| // after split. |
| admin.enableCatalogJanitor(false); |
| |
| // do a regular split |
| byte[] regionName = location.getRegionInfo().getRegionName(); |
| admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); |
| TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true); |
| |
| PairOfSameType<HRegionInfo> daughters = MetaTableAccessor.getDaughterRegions( |
| meta.get(new Get(regionName))); |
| |
| // Delete daughter regions from meta, but not hdfs, unassign it. |
| |
| ServerName firstSN = |
| rl.getRegionLocation(daughters.getFirst().getStartKey()).getServerName(); |
| ServerName secondSN = |
| rl.getRegionLocation(daughters.getSecond().getStartKey()).getServerName(); |
| |
| undeployRegion(connection, firstSN, daughters.getFirst()); |
| undeployRegion(connection, secondSN, daughters.getSecond()); |
| |
| List<Delete> deletes = new ArrayList<>(); |
| deletes.add(new Delete(daughters.getFirst().getRegionName())); |
| deletes.add(new Delete(daughters.getSecond().getRegionName())); |
| meta.delete(deletes); |
| |
| // Remove daughters from regionStates |
| RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster(). |
| getAssignmentManager().getRegionStates(); |
| regionStates.deleteRegion(daughters.getFirst()); |
| regionStates.deleteRegion(daughters.getSecond()); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT |
| |
| // now fix it. The fix should not revert the region split, but add daughters to META |
| hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, |
| false, false, null); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // assert that the split hbase:meta entry is still there. |
| Get get = new Get(hri.getRegionName()); |
| Result result = meta.get(get); |
| assertNotNull(result); |
| assertNotNull(MetaTableAccessor.getHRegionInfo(result)); |
| |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // assert that we still have the split regions |
| assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions |
| // pre-split. |
| assertNoErrors(doFsck(conf, false)); //should be fixed by now |
| } |
| } finally { |
| admin.enableCatalogJanitor(true); |
| meta.close(); |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with a missing region which is the 1st region -- hole in |
| * meta and data missing in the fs. |
| */ |
| @Test(timeout=120000) |
| public void testMissingFirstRegion() throws Exception { |
| TableName table = TableName.valueOf("testMissingFirstRegion"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the assignment, meta, and hdfs data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true, |
| true, true); |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY }); |
| // fix hole |
| doFsck(conf, true); |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with a missing region which is the 1st region -- hole in |
| * meta and data missing in the fs. |
| */ |
| @Test(timeout=120000) |
| public void testRegionDeployedNotInHdfs() throws Exception { |
| TableName table = |
| TableName.valueOf("testSingleRegionDeployedNotInHdfs"); |
| try { |
| setupTable(table); |
| admin.flush(table); |
| |
| // Mess it up by deleting region dir |
| deleteRegion(conf, tbl.getTableDescriptor(), |
| HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false, |
| false, true); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); |
| // fix hole |
| doFsck(conf, true); |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with missing last region -- hole in meta and data missing in |
| * the fs. |
| */ |
| @Test(timeout=120000) |
| public void testMissingLastRegion() throws Exception { |
| TableName table = |
| TableName.valueOf("testMissingLastRegion"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the assignment, meta, and hdfs data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true, |
| true, true); |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY }); |
| // fix hole |
| doFsck(conf, true); |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * Test -noHdfsChecking option can detect and fix assignments issue. |
| */ |
| @Test (timeout=180000) |
| public void testFixAssignmentsAndNoHdfsChecking() throws Exception { |
| TableName table = |
| TableName.valueOf("testFixAssignmentsAndNoHdfsChecking"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by closing a region |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, |
| false, false, false, HRegionInfo.DEFAULT_REPLICA_ID); |
| |
| // verify there is no other errors |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // verify that noHdfsChecking report the same errors |
| HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.onlineHbck(); |
| assertErrors(fsck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| fsck.close(); |
| |
| // verify that fixAssignments works fine with noHdfsChecking |
| fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.setFixAssignments(true); |
| fsck.onlineHbck(); |
| assertTrue(fsck.shouldRerun()); |
| fsck.onlineHbck(); |
| assertNoErrors(fsck); |
| |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| fsck.close(); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * Test -noHdfsChecking option can detect region is not in meta but deployed. |
| * However, it can not fix it without checking Hdfs because we need to get |
| * the region info from Hdfs in this case, then to patch the meta. |
| */ |
| @Test (timeout=180000) |
| public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception { |
| TableName table = |
| TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by deleting a region from the metadata |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), |
| Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID); |
| |
| // verify there is no other errors |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // verify that noHdfsChecking report the same errors |
| HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.onlineHbck(); |
| assertErrors(fsck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| fsck.close(); |
| |
| // verify that fixMeta doesn't work with noHdfsChecking |
| fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.setFixAssignments(true); |
| fsck.setFixMeta(true); |
| fsck.onlineHbck(); |
| assertFalse(fsck.shouldRerun()); |
| assertErrors(fsck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| fsck.close(); |
| |
| // fix the cluster so other tests won't be impacted |
| fsck = doFsck(conf, true); |
| assertTrue(fsck.shouldRerun()); |
| fsck = doFsck(conf, true); |
| assertNoErrors(fsck); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * Test -fixHdfsHoles doesn't work with -noHdfsChecking option, |
| * and -noHdfsChecking can't detect orphan Hdfs region. |
| */ |
| @Test (timeout=180000) |
| public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception { |
| TableName table = |
| TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by creating an overlap in the metadata |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, |
| true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); |
| admin.enableTable(table); |
| |
| HRegionInfo hriOverlap = |
| createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); |
| TEST_UTIL.assignRegion(hriOverlap); |
| |
| ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); |
| TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); |
| |
| // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION |
| HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.onlineHbck(); |
| assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| fsck.close(); |
| |
| // verify that fixHdfsHoles doesn't work with noHdfsChecking |
| fsck = new HBaseFsck(conf, hbfsckExecutorService); |
| fsck.connect(); |
| HBaseFsck.setDisplayFullReport(); // i.e. -details |
| fsck.setTimeLag(0); |
| fsck.setCheckHdfs(false); |
| fsck.setFixHdfsHoles(true); |
| fsck.setFixHdfsOverlaps(true); |
| fsck.setFixHdfsOrphans(true); |
| fsck.onlineHbck(); |
| assertFalse(fsck.shouldRerun()); |
| assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| fsck.close(); |
| } finally { |
| if (admin.isTableDisabled(table)) { |
| admin.enableTable(table); |
| } |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates a table and then corrupts an hfile. Hbck should quarantine the file. |
| */ |
| @Test(timeout=180000) |
| public void testQuarantineCorruptHFile() throws Exception { |
| TableName table = TableName.valueOf(name.getMethodName()); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| admin.flush(table); // flush is async. |
| |
| FileSystem fs = FileSystem.get(conf); |
| Path hfile = getFlushedHFile(fs, table); |
| |
| // Mess it up by leaving a hole in the assignment, meta, and hdfs data |
| admin.disableTable(table); |
| |
| // create new corrupt file called deadbeef (valid hfile name) |
| Path corrupt = new Path(hfile.getParent(), "deadbeef"); |
| TestHFile.truncateFile(fs, hfile, corrupt); |
| LOG.info("Created corrupted file " + corrupt); |
| HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf)); |
| |
| // we cannot enable here because enable never finished due to the corrupt region. |
| HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table); |
| assertEquals(res.getRetCode(), 0); |
| HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker(); |
| assertEquals(hfcc.getHFilesChecked(), 5); |
| assertEquals(hfcc.getCorrupted().size(), 1); |
| assertEquals(hfcc.getFailures().size(), 0); |
| assertEquals(hfcc.getQuarantined().size(), 1); |
| assertEquals(hfcc.getMissing().size(), 0); |
| |
| // Its been fixed, verify that we can enable. |
| admin.enableTable(table); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates a table and simulates the race situation where a concurrent compaction or split |
| * has removed an hfile after the corruption checker learned about it. |
| */ |
| @Test(timeout=180000) |
| public void testQuarantineMissingHFile() throws Exception { |
| TableName table = TableName.valueOf(name.getMethodName()); |
| |
| // inject a fault in the hfcc created. |
| final FileSystem fs = FileSystem.get(conf); |
| HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) { |
| @Override |
| public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) |
| throws IOException { |
| return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { |
| AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); |
| @Override |
| protected void checkHFile(Path p) throws IOException { |
| if (attemptedFirstHFile.compareAndSet(false, true)) { |
| assertTrue(fs.delete(p, true)); // make sure delete happened. |
| } |
| super.checkHFile(p); |
| } |
| }; |
| } |
| }; |
| doQuarantineTest(table, hbck, 4, 0, 0, 0, 1); // 4 attempted, but 1 missing. |
| hbck.close(); |
| } |
| |
| /** |
| * This creates and fixes a bad table with regions that has startkey == endkey |
| */ |
| @Test (timeout=180000) |
| public void testDegenerateRegions() throws Exception { |
| TableName table = TableName.valueOf("tableDegenerateRegions"); |
| try { |
| setupTable(table); |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Now let's mess it up, by adding a region with a duplicate startkey |
| HRegionInfo hriDupe = |
| createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B")); |
| TEST_UTIL.assignRegion(hriDupe); |
| |
| ServerName server = regionStates.getRegionServerOfRegion(hriDupe); |
| TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT); |
| |
| HBaseFsck hbck = doFsck(conf,false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, |
| HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS }); |
| assertEquals(2, hbck.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // fix the degenerate region. |
| doFsck(conf, true); |
| |
| // check that the degenerate region is gone and no data loss |
| HBaseFsck hbck2 = doFsck(conf,false); |
| assertNoErrors(hbck2); |
| assertEquals(0, hbck2.getOverlapGroups(table).size()); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * Test mission REGIONINFO_QUALIFIER in hbase:meta |
| */ |
| @Test (timeout=180000) |
| public void testMissingRegionInfoQualifier() throws Exception { |
| Connection connection = ConnectionFactory.createConnection(conf); |
| TableName table = TableName.valueOf("testMissingRegionInfoQualifier"); |
| try { |
| setupTable(table); |
| |
| // Mess it up by removing the RegionInfo for one region. |
| final List<Delete> deletes = new LinkedList<Delete>(); |
| Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService); |
| MetaTableAccessor.fullScanRegions(connection, new MetaTableAccessor.Visitor() { |
| |
| @Override |
| public boolean visit(Result rowResult) throws IOException { |
| HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult); |
| if (hri != null && !hri.getTable().isSystemTable()) { |
| Delete delete = new Delete(rowResult.getRow()); |
| delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); |
| deletes.add(delete); |
| } |
| return true; |
| } |
| }); |
| meta.delete(deletes); |
| |
| // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo |
| meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) |
| .addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, |
| Bytes.toBytes("node1:60020"))); |
| meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) |
| .addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, |
| Bytes.toBytes(1362150791183L))); |
| meta.close(); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertTrue(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); |
| |
| // fix reference file |
| hbck = doFsck(conf, true); |
| |
| // check that reference file fixed |
| assertFalse(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); |
| } finally { |
| cleanupTable(table); |
| } |
| connection.close(); |
| } |
| |
| /** |
| * Test pluggable error reporter. It can be plugged in |
| * from system property or configuration. |
| */ |
| @Test (timeout=180000) |
| public void testErrorReporter() throws Exception { |
| try { |
| MockErrorReporter.calledCount = 0; |
| doFsck(conf, false); |
| assertEquals(MockErrorReporter.calledCount, 0); |
| |
| conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName()); |
| doFsck(conf, false); |
| assertTrue(MockErrorReporter.calledCount > 20); |
| } finally { |
| conf.set("hbasefsck.errorreporter", |
| HBaseFsck.PrintingErrorReporter.class.getName()); |
| MockErrorReporter.calledCount = 0; |
| } |
| } |
| |
| @Test(timeout=180000) |
| public void testCheckTableLocks() throws Exception { |
| IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0); |
| EnvironmentEdgeManager.injectEdge(edge); |
| // check no errors |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| |
| ServerName mockName = ServerName.valueOf("localhost", 60000, 1); |
| final TableName tableName = TableName.valueOf("foo"); |
| |
| // obtain one lock |
| final TableLockManager tableLockManager = |
| TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName); |
| TableLockManager.TableLock |
| writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks"); |
| writeLock.acquire(); |
| hbck = doFsck(conf, false); |
| assertNoErrors(hbck); // should not have expired, no problems |
| |
| edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, |
| TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire |
| |
| hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); |
| |
| final CountDownLatch latch = new CountDownLatch(1); |
| new Thread() { |
| @Override |
| public void run() { |
| TableLockManager.TableLock |
| readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks"); |
| try { |
| latch.countDown(); |
| readLock.acquire(); |
| } catch (IOException ex) { |
| fail(); |
| } catch (IllegalStateException ex) { |
| return; // expected, since this will be reaped under us. |
| } |
| fail("should not have come here"); |
| }; |
| }.start(); |
| |
| latch.await(); // wait until thread starts |
| Threads.sleep(300); // wait some more to ensure writeLock.acquire() is called |
| |
| hbck = doFsck(conf, false); |
| // still one expired, one not-expired |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); |
| |
| edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, |
| TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire |
| |
| hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, |
| HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired |
| |
| Configuration localConf = new Configuration(conf); |
| // reaping from ZKInterProcessWriteLock uses znode cTime, |
| // which is not injectable through EnvironmentEdge |
| localConf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); |
| |
| Threads.sleep(10); |
| hbck = doFsck(localConf, true); // now fix both cases |
| |
| hbck = doFsck(localConf, false); |
| assertNoErrors(hbck); |
| |
| // ensure that locks are deleted |
| writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking"); |
| writeLock.acquire(); // this should not block. |
| writeLock.release(); // release for clean state |
| tableLockManager.tableDeleted(tableName); |
| } |
| |
| @Test(timeout=180000) |
| public void testCheckReplication() throws Exception { |
| // check no errors |
| HBaseFsck hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| |
| // create peer |
| ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf); |
| Assert.assertEquals(0, replicationAdmin.getPeersCount()); |
| int zkPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, |
| HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); |
| ReplicationPeerConfig rpc = new ReplicationPeerConfig(); |
| rpc.setClusterKey("127.0.0.1:" + zkPort + ":/hbase"); |
| replicationAdmin.addPeer("1", rpc, null); |
| replicationAdmin.getPeersCount(); |
| Assert.assertEquals(1, replicationAdmin.getPeersCount()); |
| |
| // create replicator |
| ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test Hbase Fsck", connection); |
| ReplicationQueues repQueues = |
| ReplicationFactory.getReplicationQueues(new ReplicationQueuesArguments(conf, connection, |
| zkw)); |
| repQueues.init("server1"); |
| // queues for current peer, no errors |
| repQueues.addLog("1", "file1"); |
| repQueues.addLog("1-server2", "file1"); |
| Assert.assertEquals(2, repQueues.getAllQueues().size()); |
| hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| |
| // queues for removed peer |
| repQueues.addLog("2", "file1"); |
| repQueues.addLog("2-server2", "file1"); |
| Assert.assertEquals(4, repQueues.getAllQueues().size()); |
| hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, |
| HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE }); |
| |
| // fix the case |
| hbck = doFsck(conf, true); |
| hbck = doFsck(conf, false); |
| assertNoErrors(hbck); |
| // ensure only "2" is deleted |
| Assert.assertEquals(2, repQueues.getAllQueues().size()); |
| Assert.assertNull(repQueues.getLogsInQueue("2")); |
| Assert.assertNull(repQueues.getLogsInQueue("2-sever2")); |
| |
| replicationAdmin.removePeer("1"); |
| repQueues.removeAllQueues(); |
| zkw.close(); |
| replicationAdmin.close(); |
| } |
| |
| /** |
| * This creates and fixes a bad table with a missing region -- hole in meta |
| * and data present but .regioninfo missing (an orphan hdfs region)in the fs. |
| */ |
| @Test(timeout=180000) |
| public void testHDFSRegioninfoMissing() throws Exception { |
| TableName table = TableName.valueOf("tableHDFSRegioninfoMissing"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the meta data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, |
| true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| doFsck(conf, true); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with a region that is missing meta and |
| * not assigned to a region server. |
| */ |
| @Test (timeout=180000) |
| public void testNotInMetaOrDeployedHole() throws Exception { |
| TableName table = |
| TableName.valueOf("tableNotInMetaOrDeployedHole"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the meta data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, |
| true, false); // don't rm from fs |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| assertErrors(doFsck(conf, true), |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| @Test (timeout=180000) |
| public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception { |
| TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit"); |
| MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); |
| try { |
| HTableDescriptor desc = new HTableDescriptor(table); |
| desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f"))); |
| createTable(TEST_UTIL, desc, null); |
| |
| tbl = connection.getTable(desc.getTableName()); |
| for (int i = 0; i < 5; i++) { |
| Put p1 = new Put(("r" + i).getBytes()); |
| p1.addColumn(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes()); |
| tbl.put(p1); |
| } |
| admin.flush(desc.getTableName()); |
| List<HRegion> regions = cluster.getRegions(desc.getTableName()); |
| int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); |
| HRegionServer regionServer = cluster.getRegionServer(serverWith); |
| cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); |
| SplitTransactionImpl st = (SplitTransactionImpl) |
| new SplitTransactionFactory(TEST_UTIL.getConfiguration()) |
| .create(regions.get(0), Bytes.toBytes("r3")); |
| st.prepare(); |
| st.stepsBeforePONR(regionServer, regionServer, false); |
| AssignmentManager am = cluster.getMaster().getAssignmentManager(); |
| for (RegionState state : am.getRegionStates().getRegionsInTransition()) { |
| am.regionOffline(state.getRegion()); |
| } |
| Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>(); |
| regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName()); |
| am.assign(regionsMap); |
| am.waitForAssignment(regions.get(0).getRegionInfo()); |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| assertErrors( |
| doFsck(conf, false, true, false, false, false, false, false, false, false, false, false, |
| null), |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(5, countRows()); |
| } finally { |
| if (tbl != null) { |
| tbl.close(); |
| tbl = null; |
| } |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates fixes a bad table with a hole in meta. |
| */ |
| @Test (timeout=180000) |
| public void testNotInMetaHole() throws Exception { |
| TableName table = |
| TableName.valueOf("tableNotInMetaHole"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // Mess it up by leaving a hole in the meta data |
| admin.disableTable(table); |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, |
| true, false); // don't rm from fs |
| admin.enableTable(table); |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| assertErrors(doFsck(conf, true), |
| new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, |
| HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf, false)); |
| assertEquals(ROWKEYS.length, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates and fixes a bad table with a region that is in meta but has |
| * no deployment or data hdfs |
| */ |
| @Test (timeout=180000) |
| public void testNotInHdfs() throws Exception { |
| TableName table = |
| TableName.valueOf("tableNotInHdfs"); |
| try { |
| setupTable(table); |
| assertEquals(ROWKEYS.length, countRows()); |
| |
| // make sure data in regions, if in wal only there is no data loss |
| admin.flush(table); |
| |
| // Mess it up by leaving a hole in the hdfs data |
| deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, |
| false, true); // don't rm meta |
| |
| HBaseFsck hbck = doFsck(conf, false); |
| assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { |
| HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS}); |
| // holes are separate from overlap groups |
| assertEquals(0, hbck.getOverlapGroups(table).size()); |
| |
| // fix hole |
| doFsck(conf, true); |
| |
| // check that hole fixed |
| assertNoErrors(doFsck(conf,false)); |
| assertEquals(ROWKEYS.length - 2, countRows()); |
| } finally { |
| cleanupTable(table); |
| } |
| } |
| |
| /** |
| * This creates a table and simulates the race situation where a concurrent compaction or split |
| * has removed an colfam dir before the corruption checker got to it. |
| */ |
| // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no |
| // files in a column family on initial creation -- as suggested by Matteo. |
| @Ignore |
| @Test(timeout=180000) |
| public void testQuarantineMissingFamdir() throws Exception { |
| TableName table = TableName.valueOf(name.getMethodName()); |
| // inject a fault in the hfcc created. |
| final FileSystem fs = FileSystem.get(conf); |
| HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) { |
| @Override |
| public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) |
| throws IOException { |
| return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { |
| AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); |
| @Override |
| protected void checkColFamDir(Path p) throws IOException { |
| if (attemptedFirstHFile.compareAndSet(false, true)) { |
| assertTrue(fs.delete(p, true)); // make sure delete happened. |
| } |
| super.checkColFamDir(p); |
| } |
| }; |
| } |
| }; |
| doQuarantineTest(table, hbck, 3, 0, 0, 0, 1); |
| hbck.close(); |
| } |
| |
| /** |
| * This creates a table and simulates the race situation where a concurrent compaction or split |
| * has removed a region dir before the corruption checker got to it. |
| */ |
| @Test(timeout=180000) |
| public void testQuarantineMissingRegionDir() throws Exception { |
| TableName table = TableName.valueOf(name.getMethodName()); |
| // inject a fault in the hfcc created. |
| final FileSystem fs = FileSystem.get(conf); |
| HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) { |
| @Override |
| public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) |
| throws IOException { |
| return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { |
| AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); |
| @Override |
| protected void checkRegionDir(Path p) throws IOException { |
| if (attemptedFirstHFile.compareAndSet(false, true)) { |
| assertTrue(fs.delete(p, true)); // make sure delete happened. |
| } |
| super.checkRegionDir(p); |
| } |
| }; |
| } |
| }; |
| doQuarantineTest(table, hbck, 3, 0, 0, 0, 1); |
| hbck.close(); |
| } |
| } |