HBASE-28215 CreateTableProcedure and DeleteTableProcedure should sleep a while before retrying (#5502)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
index 17998fe..23ad3b4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
@@ -35,12 +35,15 @@
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerValidationUtils;
import org.apache.hadoop.hbase.rsgroup.RSGroupInfo;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,6 +54,7 @@
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
@InterfaceAudience.Private
public class CreateTableProcedure extends AbstractStateMachineTableProcedure<CreateTableState> {
@@ -60,6 +64,7 @@
private TableDescriptor tableDescriptor;
private List<RegionInfo> newRegions;
+ private RetryCounter retryCounter;
public CreateTableProcedure() {
// Required by the Procedure framework to create the procedure on replay
@@ -80,7 +85,7 @@
@Override
protected Flow executeFromState(final MasterProcedureEnv env, final CreateTableState state)
- throws InterruptedException {
+ throws InterruptedException, ProcedureSuspendedException {
LOG.info("{} execute state={}", this, state);
try {
switch (state) {
@@ -131,6 +136,7 @@
break;
case CREATE_TABLE_POST_OPERATION:
postCreate(env);
+ retryCounter = null;
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
@@ -139,13 +145,27 @@
if (isRollbackSupported(state)) {
setFailure("master-create-table", e);
} else {
- LOG.warn("Retriable error trying to create table=" + getTableName() + " state=" + state, e);
+ if (retryCounter == null) {
+ retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+ }
+ long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+ LOG.warn("Retriable error trying to create table={},state={},suspend {}secs.",
+ getTableName(), state, backoff / 1000, e);
+ throw suspend(Math.toIntExact(backoff), true);
}
}
+ retryCounter = null;
return Flow.HAS_MORE_STATE;
}
@Override
+ protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
+ setState(ProcedureProtos.ProcedureState.RUNNABLE);
+ env.getProcedureScheduler().addFront(this);
+ return false;
+ }
+
+ @Override
protected void rollbackState(final MasterProcedureEnv env, final CreateTableState state)
throws IOException {
if (state == CreateTableState.CREATE_TABLE_PRE_OPERATION) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index 80fb5d0..8c2f106 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -41,9 +41,12 @@
import org.apache.hadoop.hbase.mob.MobConstants;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -54,6 +57,7 @@
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
@InterfaceAudience.Private
public class DeleteTableProcedure extends AbstractStateMachineTableProcedure<DeleteTableState> {
@@ -61,6 +65,7 @@
private List<RegionInfo> regions;
private TableName tableName;
+ private RetryCounter retryCounter;
public DeleteTableProcedure() {
// Required by the Procedure framework to create the procedure on replay
@@ -79,7 +84,7 @@
@Override
protected Flow executeFromState(final MasterProcedureEnv env, DeleteTableState state)
- throws InterruptedException {
+ throws InterruptedException, ProcedureSuspendedException {
if (LOG.isTraceEnabled()) {
LOG.trace(this + " execute state=" + state);
}
@@ -124,6 +129,7 @@
break;
case DELETE_TABLE_POST_OPERATION:
postDelete(env);
+ retryCounter = null;
LOG.debug("Finished {}", this);
return Flow.NO_MORE_STATE;
default:
@@ -133,13 +139,27 @@
if (isRollbackSupported(state)) {
setFailure("master-delete-table", e);
} else {
- LOG.warn("Retriable error trying to delete table=" + getTableName() + " state=" + state, e);
+ if (retryCounter == null) {
+ retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+ }
+ long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+ LOG.warn("Retriable error trying to delete table={},state={},suspend {}secs.",
+ getTableName(), state, backoff / 1000, e);
+ throw suspend(Math.toIntExact(backoff), true);
}
}
+ retryCounter = null;
return Flow.HAS_MORE_STATE;
}
@Override
+ protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
+ setState(ProcedureProtos.ProcedureState.RUNNABLE);
+ env.getProcedureScheduler().addFront(this);
+ return false;
+ }
+
+ @Override
protected boolean abort(MasterProcedureEnv env) {
// TODO: Current behavior is: with no rollback and no abort support, procedure may get stuck
// looping in retrying failing a step forever. Default behavior of abort is changed to support
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java
new file mode 100644
index 0000000..454a24e
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.util.Optional;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+
+/**
+ * A bad Master Observer to prevent user to create/delete table once.
+ */
+public class BadMasterObserverForCreateDeleteTable implements MasterObserver, MasterCoprocessor {
+ private boolean createFailedOnce = false;
+ private boolean deleteFailedOnce = false;
+
+ @Override
+ public void postCompletedCreateTableAction(ObserverContext<MasterCoprocessorEnvironment> ctx,
+ TableDescriptor desc, RegionInfo[] regions) throws IOException {
+ if (!createFailedOnce && !desc.getTableName().isSystemTable()) {
+ createFailedOnce = true;
+ throw new IOException("execute postCompletedCreateTableAction failed once.");
+ }
+ }
+
+ @Override
+ public void postCompletedDeleteTableAction(ObserverContext<MasterCoprocessorEnvironment> ctx,
+ TableName tableName) throws IOException {
+ if (!deleteFailedOnce && !tableName.isSystemTable()) {
+ deleteFailedOnce = true;
+ throw new IOException("execute postCompletedDeleteTableAction failed once.");
+ }
+ }
+
+ @Override
+ public Optional<MasterObserver> getMasterObserver() {
+ return Optional.of(this);
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java
new file mode 100644
index 0000000..3491aa6
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.coprocessor.BadMasterObserverForCreateDeleteTable;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.ModifyRegionUtils;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestCreateDeleteTableProcedureWithRetry {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestCreateDeleteTableProcedureWithRetry.class);
+
+ private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
+
+ private static final TableName TABLE_NAME =
+ TableName.valueOf(TestCreateDeleteTableProcedureWithRetry.class.getSimpleName());
+
+ private static final String CF = "cf";
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ Configuration conf = UTIL.getConfiguration();
+ conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+ BadMasterObserverForCreateDeleteTable.class.getName());
+ UTIL.startMiniCluster(1);
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testCreateDeleteTableRetry() throws IOException {
+ ProcedureExecutor<MasterProcedureEnv> procExec =
+ UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
+ TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TABLE_NAME, CF);
+ RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null);
+ CreateTableProcedure createProc =
+ new CreateTableProcedure(procExec.getEnvironment(), htd, regions);
+ ProcedureTestingUtility.submitAndWait(procExec, createProc);
+ Assert.assertTrue(UTIL.getAdmin().tableExists(TABLE_NAME));
+ MasterProcedureTestingUtility.validateTableCreation(UTIL.getMiniHBaseCluster().getMaster(),
+ TABLE_NAME, regions, CF);
+
+ UTIL.getAdmin().disableTable(TABLE_NAME);
+ DeleteTableProcedure deleteProc =
+ new DeleteTableProcedure(procExec.getEnvironment(), TABLE_NAME);
+ ProcedureTestingUtility.submitAndWait(procExec, deleteProc);
+ Assert.assertFalse(UTIL.getAdmin().tableExists(TABLE_NAME));
+ MasterProcedureTestingUtility.validateTableDeletion(UTIL.getMiniHBaseCluster().getMaster(),
+ TABLE_NAME);
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
index bed41f4..0bb54cc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
@@ -39,6 +39,7 @@
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerForTest;
@@ -244,7 +245,8 @@
@Override
protected Flow executeFromState(MasterProcedureEnv env,
- MasterProcedureProtos.CreateTableState state) throws InterruptedException {
+ MasterProcedureProtos.CreateTableState state)
+ throws InterruptedException, ProcedureSuspendedException {
if (
!failOnce && state == MasterProcedureProtos.CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT