Improve TestControllerLeadershipChange test logic to tolerate longer delay when test runs. (#1853)
Change the assert condition to consider real test script execute delay to avoid test failure due to slow test runs.
diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
index 1de1296..379feb9 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
@@ -189,6 +189,7 @@
String resourceName = "testResource";
int numPartition = 1;
int numReplica = 1;
+ int simulatedTransitionDelayMs = 100;
String stateModel = "LeaderStandby";
ObjectName resourceMBeanObjectName = getResourceMonitorObjectName(clusterName, resourceName);
MBeanServer beanServer = ManagementFactory.getPlatformMBeanServer();
@@ -205,7 +206,7 @@
// Create participant
_gSetupTool.addInstanceToCluster(clusterName, instanceName);
MockParticipantManager participant =
- new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
+ new MockParticipantManager(ZK_ADDR, clusterName, instanceName, simulatedTransitionDelayMs);
participant.syncStart();
// Create controller, since this is the only controller, it will be the leader
@@ -246,6 +247,9 @@
Assert.assertTrue(clusterVerifier.verify());
Thread.sleep(1000);
+
+ // The moment before manager1 regain leadership. The topstateless duration will start counting.
+ long start = System.currentTimeMillis();
setLeader(manager1);
Assert.assertTrue(manager1.isLeader());
@@ -258,12 +262,19 @@
_gSetupTool.rebalanceResource(clusterName, resourceName, numReplica);
Assert.assertTrue(clusterVerifier.verifyByPolling());
+ // The moment that partition top state has been recovered. The topstateless duration stopped counting.
+ long end = System.currentTimeMillis();
// Resource lost top state, and manager1 lost leadership for 2000ms, because manager1 will
// clean monitoring cache after re-gaining leadership, so max value of hand off duration should
// not have such a large value
- Assert.assertTrue((long) beanServer
- .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max") < 500);
+ long duration = (long) beanServer
+ .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max");
+ long controllerOpDuration = end - start;
+ Assert.assertTrue(duration >= simulatedTransitionDelayMs && duration <= controllerOpDuration,
+ String.format(
+ "The recorded TopState-less duration is %d. But the controller operation duration is %d.",
+ duration, controllerOpDuration));
participant.syncStop();
manager1.disconnect();