HBASE-28419 Allow Action and Policies of ServerKillingMonkey to be configurable. (#5743)
Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
index fa001e0..0263a56 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
@@ -20,6 +20,7 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
public interface MonkeyConstants {
@@ -45,6 +46,11 @@
String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period";
String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs";
String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time";
+ String RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = "restart.random.rs.exception.sleep.time";
+ String RESTART_ACTIVE_NAMENODE_SLEEP_TIME = "restart.active.namenode.sleep.time";
+ String RESTART_RANDOM_DATANODE_SLEEP_TIME = "restart.random.datanode.sleep.time";
+ String RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = "restart.random.journalnode.sleep.time";
+ String RESTART_RANDOM_ZKNODE_SLEEP_TIME = "restart.random.zknode.sleep.time";
String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
@@ -92,6 +98,13 @@
long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
boolean DEFAULT_UNBALANCE_KILL_META_RS = true;
long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000;
+
+ long DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+
long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
index 8b3d10c..28dce48 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
@@ -42,9 +42,17 @@
*/
public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
+ private long restartRandomRsExceptMetaSleepTime;
+ private long restartActiveMasterSleepTime;
+ private long rollingBatchRestartRSSleepTime;
+ private long restartActiveNameNodeSleepTime;
+ private long restartRandomDataNodeSleepTime;
+ private long restartRandomJournalNodeSleepTime;
+ private long restartRandomZKNodeSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
+ private long action1Period;
@Override
public ChaosMonkey build() {
@@ -53,15 +61,15 @@
// Destructive actions to mess things around. Cannot run batch restart.
// @formatter:off
Action[] actions1 = new Action[] {
- new RestartRandomRsExceptMetaAction(60000),
- new RestartActiveMasterAction(5000),
+ new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+ new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead.
- new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+ new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
new ForceBalancerAction(),
- new RestartActiveNameNodeAction(60000),
- new RestartRandomDataNodeAction(60000),
- new RestartRandomJournalNodeAction(60000),
- new RestartRandomZKNodeAction(60000),
+ new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
+ new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
+ new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
+ new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
@@ -73,12 +81,33 @@
new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() };
return new PolicyBasedChaosMonkey(properties, util,
- new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
- new PeriodicRandomActionPolicy(60 * 1000, actions1)),
- new PeriodicRandomActionPolicy(60 * 1000, actions2));
+ new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
+ new PeriodicRandomActionPolicy(action1Period, actions1)),
+ new PeriodicRandomActionPolicy(action1Period, actions2));
}
private void loadProperties() {
+ restartRandomRsExceptMetaSleepTime = Long
+ .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+ restartActiveMasterSleepTime =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+ rollingBatchRestartRSSleepTime = Long
+ .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
+ restartActiveNameNodeSleepTime =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
+ restartRandomDataNodeSleepTime =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
+ restartRandomJournalNodeSleepTime = Long
+ .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
+ restartRandomZKNodeSleepTime =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -88,5 +117,8 @@
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+ action1Period =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+ MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
index 9d49a1f..7b58d21 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
@@ -37,9 +37,13 @@
*/
public class ServerKillingMonkeyFactory extends MonkeyFactory {
+ private long restartRandomRsExceptMetaSleepTime;
+ private long restartActiveMasterSleepTime;
+ private long rollingBatchRestartRSSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
+ private long action1Period;
@Override
public ChaosMonkey build() {
@@ -48,10 +52,10 @@
// Destructive actions to mess things around. Cannot run batch restart
// @formatter:off
Action[] actions1 = new Action[] {
- new RestartRandomRsExceptMetaAction(60000),
- new RestartActiveMasterAction(5000),
+ new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+ new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead
- new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+ new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
new ForceBalancerAction(),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
@@ -63,12 +67,21 @@
Action[] actions2 = new Action[] { new DumpClusterStatusAction() };
return new PolicyBasedChaosMonkey(properties, util,
- new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
- new PeriodicRandomActionPolicy(60 * 1000, actions1)),
- new PeriodicRandomActionPolicy(60 * 1000, actions2));
+ new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
+ new PeriodicRandomActionPolicy(action1Period, actions1)),
+ new PeriodicRandomActionPolicy(action1Period, actions2));
}
private void loadProperties() {
+ restartRandomRsExceptMetaSleepTime = Long
+ .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+ restartActiveMasterSleepTime =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+ rollingBatchRestartRSSleepTime = Long
+ .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -78,5 +91,8 @@
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+ action1Period =
+ Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+ MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}