HBASE-28419 Allow Action and Policies of ServerKillingMonkey to be configurable. (#5743)

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
index fa001e0..0263a56 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
@@ -20,6 +20,7 @@
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 public interface MonkeyConstants {
 
@@ -45,6 +46,11 @@
   String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period";
   String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs";
   String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time";
+  String RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = "restart.random.rs.exception.sleep.time";
+  String RESTART_ACTIVE_NAMENODE_SLEEP_TIME = "restart.active.namenode.sleep.time";
+  String RESTART_RANDOM_DATANODE_SLEEP_TIME = "restart.random.datanode.sleep.time";
+  String RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = "restart.random.journalnode.sleep.time";
+  String RESTART_RANDOM_ZKNODE_SLEEP_TIME = "restart.random.zknode.sleep.time";
   String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
   String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time";
   String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
@@ -92,6 +98,13 @@
   long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
   boolean DEFAULT_UNBALANCE_KILL_META_RS = true;
   long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000;
+
+  long DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+  long DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+  long DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+  long DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+  long DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME = TimeUnit.MILLISECONDS.toMillis(60000);
+
   long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
   long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
   float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
index 8b3d10c..28dce48 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
@@ -42,9 +42,17 @@
  */
 public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
 
+  private long restartRandomRsExceptMetaSleepTime;
+  private long restartActiveMasterSleepTime;
+  private long rollingBatchRestartRSSleepTime;
+  private long restartActiveNameNodeSleepTime;
+  private long restartRandomDataNodeSleepTime;
+  private long restartRandomJournalNodeSleepTime;
+  private long restartRandomZKNodeSleepTime;
   private long gracefulRollingRestartTSSLeepTime;
   private long rollingBatchSuspendRSSleepTime;
   private float rollingBatchSuspendtRSRatio;
+  private long action1Period;
 
   @Override
   public ChaosMonkey build() {
@@ -53,15 +61,15 @@
     // Destructive actions to mess things around. Cannot run batch restart.
     // @formatter:off
     Action[] actions1 = new Action[] {
-      new RestartRandomRsExceptMetaAction(60000),
-      new RestartActiveMasterAction(5000),
+      new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+      new RestartActiveMasterAction(restartActiveMasterSleepTime),
       // only allow 2 servers to be dead.
-      new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+      new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
       new ForceBalancerAction(),
-      new RestartActiveNameNodeAction(60000),
-      new RestartRandomDataNodeAction(60000),
-      new RestartRandomJournalNodeAction(60000),
-      new RestartRandomZKNodeAction(60000),
+      new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
+      new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
+      new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
+      new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
       new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
       new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
           rollingBatchSuspendtRSRatio)
@@ -73,12 +81,33 @@
       new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() };
 
     return new PolicyBasedChaosMonkey(properties, util,
-      new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
-        new PeriodicRandomActionPolicy(60 * 1000, actions1)),
-      new PeriodicRandomActionPolicy(60 * 1000, actions2));
+      new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
+        new PeriodicRandomActionPolicy(action1Period, actions1)),
+      new PeriodicRandomActionPolicy(action1Period, actions2));
   }
 
   private void loadProperties() {
+    restartRandomRsExceptMetaSleepTime = Long
+      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+    restartActiveMasterSleepTime =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+    rollingBatchRestartRSSleepTime = Long
+      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
+    restartActiveNameNodeSleepTime =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
+    restartRandomDataNodeSleepTime =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
+    restartRandomJournalNodeSleepTime = Long
+      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
+    restartRandomZKNodeSleepTime =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
     gracefulRollingRestartTSSLeepTime =
       Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
         MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -88,5 +117,8 @@
     rollingBatchSuspendtRSRatio =
       Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
         MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+    action1Period =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+        MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
   }
 }
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
index 9d49a1f..7b58d21 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
@@ -37,9 +37,13 @@
  */
 public class ServerKillingMonkeyFactory extends MonkeyFactory {
 
+  private long restartRandomRsExceptMetaSleepTime;
+  private long restartActiveMasterSleepTime;
+  private long rollingBatchRestartRSSleepTime;
   private long gracefulRollingRestartTSSLeepTime;
   private long rollingBatchSuspendRSSleepTime;
   private float rollingBatchSuspendtRSRatio;
+  private long action1Period;
 
   @Override
   public ChaosMonkey build() {
@@ -48,10 +52,10 @@
     // Destructive actions to mess things around. Cannot run batch restart
     // @formatter:off
     Action[] actions1 = new Action[] {
-      new RestartRandomRsExceptMetaAction(60000),
-      new RestartActiveMasterAction(5000),
+      new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+      new RestartActiveMasterAction(restartActiveMasterSleepTime),
       // only allow 2 servers to be dead
-      new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+      new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
       new ForceBalancerAction(),
       new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
       new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
@@ -63,12 +67,21 @@
     Action[] actions2 = new Action[] { new DumpClusterStatusAction() };
 
     return new PolicyBasedChaosMonkey(properties, util,
-      new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000, actions1),
-        new PeriodicRandomActionPolicy(60 * 1000, actions1)),
-      new PeriodicRandomActionPolicy(60 * 1000, actions2));
+      new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
+        new PeriodicRandomActionPolicy(action1Period, actions1)),
+      new PeriodicRandomActionPolicy(action1Period, actions2));
   }
 
   private void loadProperties() {
+    restartRandomRsExceptMetaSleepTime = Long
+      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+    restartActiveMasterSleepTime =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+    rollingBatchRestartRSSleepTime = Long
+      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
     gracefulRollingRestartTSSLeepTime =
       Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
         MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -78,5 +91,8 @@
     rollingBatchSuspendtRSRatio =
       Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
         MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+    action1Period =
+      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+        MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
   }
 }