blob: 379575659bdcd8e786a8bba617bb8ea048dfce28 [file] [log] [blame]
package org.apache.helix.manager.zk;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Date;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.PropertyKey;
import org.apache.helix.SystemPropertyKeys;
import org.apache.helix.TestHelper;
import org.apache.helix.ZkTestHelper;
import org.apache.helix.ZkUnitTestBase;
import org.apache.helix.integration.manager.ClusterControllerManager;
import org.apache.helix.integration.manager.MockParticipantManager;
import org.apache.helix.manager.zk.zookeeper.IZkStateListener;
import org.apache.helix.model.LiveInstance;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.helix.zookeeper.impl.client.ZkClient;
import org.testng.Assert;
import org.testng.annotations.Test;
public class TestZkFlapping extends ZkUnitTestBase {
private final int _disconnectThreshold = 5;
class ZkStateCountListener implements IZkStateListener {
int count = 0;
@Override
public void handleStateChanged(KeeperState state) {
if (state == KeeperState.Disconnected) {
count++;
}
}
@Override
public void handleNewSession(final String sessionId) {
}
@Override
public void handleSessionEstablishmentError(Throwable var1) {
}
}
@Test
public void testParticipantFlapping() throws Exception {
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final HelixDataAccessor accessor =
new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient));
final PropertyKey.Builder keyBuilder = accessor.keyBuilder();
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
System.setProperty(SystemPropertyKeys.MAX_DISCONNECT_THRESHOLD,
Integer.toString(_disconnectThreshold));
try {
TestHelper.setupCluster(clusterName, ZK_ADDR, 12918, // participant port
"localhost", // participant name prefix
"TestDB", // resource name prefix
1, // resources
32, // partitions per resource
1, // number of nodes
1, // replicas
"MasterSlave", false);
final String instanceName = "localhost_12918";
MockParticipantManager participant =
new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participant.syncStart();
final ZkClient client = (ZkClient) participant.getZkClient();
final ZkStateCountListener listener = new ZkStateCountListener();
client.subscribeStateChanges(listener);
final AtomicInteger expectDisconnectCnt = new AtomicInteger(0);
final int n = _disconnectThreshold;
for (int i = 0; i < _disconnectThreshold; i++) {
String oldSessionId = ZkTestHelper.getSessionId(client);
ZkTestHelper.simulateZkStateReconnected(client);
expectDisconnectCnt.incrementAndGet();
// wait until we get invoked by zk state change to disconnected
TestHelper.verify(() -> listener.count == expectDisconnectCnt.get(), 30 * 1000);
String newSessionId = ZkTestHelper.getSessionId(client);
Assert.assertEquals(newSessionId, oldSessionId);
}
client.unsubscribeStateChanges(listener);
// make sure participant is NOT disconnected
LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance(instanceName));
Assert.assertNotNull(liveInstance, "Live-instance should exist after " + n + " disconnects");
// trigger flapping
ZkTestHelper.simulateZkStateReconnected(client);
// wait until we get invoked by zk state change to disconnected
boolean success = TestHelper.verify(client::getShutdownTrigger, 30 * 1000);
Assert.assertTrue(success,
"The " + (n + 1) + "th disconnect event should trigger ZkHelixManager#disonnect");
// make sure participant is disconnected
success = TestHelper.verify(() -> {
LiveInstance liveInstance1 = accessor.getProperty(keyBuilder.liveInstance(instanceName));
return liveInstance1 == null;
}, 3 * 1000);
Assert.assertTrue(success, "Live-instance should be gone after " + (n + 1) + " disconnects");
participant.syncStop();
} finally {
System.clearProperty(SystemPropertyKeys.MAX_DISCONNECT_THRESHOLD);
}
deleteCluster(clusterName);
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
@Test
public void testControllerFlapping() throws Exception {
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final HelixDataAccessor accessor =
new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient));
final PropertyKey.Builder keyBuilder = accessor.keyBuilder();
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
System.setProperty(SystemPropertyKeys.MAX_DISCONNECT_THRESHOLD,
Integer.toString(_disconnectThreshold));
try {
TestHelper.setupCluster(clusterName, ZK_ADDR, 12918, // participant port
"localhost", // participant name prefix
"TestDB", // resource name prefix
1, // resources
32, // partitions per resource
1, // number of nodes
1, // replicas
"MasterSlave", false);
ClusterControllerManager controller =
new ClusterControllerManager(ZK_ADDR, clusterName, "controller");
controller.syncStart();
final ZkClient client = (ZkClient) controller.getZkClient();
final ZkStateCountListener listener = new ZkStateCountListener();
client.subscribeStateChanges(listener);
final AtomicInteger expectDisconnectCnt = new AtomicInteger(0);
final int n = _disconnectThreshold;
for (int i = 0; i < n; i++) {
String oldSessionId = ZkTestHelper.getSessionId(client);
ZkTestHelper.simulateZkStateReconnected(client);
expectDisconnectCnt.incrementAndGet();
// wait until we get invoked by zk state change to disconnected
TestHelper.verify(() -> listener.count == expectDisconnectCnt.get(), 30 * 1000);
String newSessionId = ZkTestHelper.getSessionId(client);
Assert.assertEquals(newSessionId, oldSessionId);
}
// make sure controller is NOT disconnected
LiveInstance leader = accessor.getProperty(keyBuilder.controllerLeader());
Assert.assertNotNull(leader, "Leader should exist after " + n + " disconnects");
// trigger flapping
ZkTestHelper.simulateZkStateReconnected(client);
// wait until we get invoked by zk state change to disconnected
boolean success = TestHelper.verify(client::getShutdownTrigger, 30 * 1000);
Assert.assertTrue(success,
"The " + (n + 1) + "th disconnect event should trigger ZkHelixManager#disonnect");
// make sure controller is disconnected
success = TestHelper.verify(() -> {
LiveInstance leader1 = accessor.getProperty(keyBuilder.controllerLeader());
return leader1 == null;
}, 5 * 1000);
Assert.assertTrue(success, "Leader should be gone after " + (n + 1) + " disconnects");
controller.syncStop();
} finally {
System.clearProperty(SystemPropertyKeys.MAX_DISCONNECT_THRESHOLD);
}
deleteCluster(clusterName);
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
}