blob: e6d26f99f209e6491f46c41b8724c28dd7671786 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getLeaderNode;
import static org.apache.solr.cloud.OverseerTaskProcessor.getSortedElectionNodes;
public class OverseerRolesTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Before
public void setupCluster() throws Exception {
configureCluster(4)
.addConfig("conf", configset("cloud-minimal"))
.configure();
}
@After
public void tearDownCluster() throws Exception {
shutdownCluster();
}
private void waitForNewOverseer(int seconds, Predicate<String> state, boolean failOnIntermediateTransition) throws Exception {
TimeOut timeout = new TimeOut(seconds, TimeUnit.SECONDS, TimeSource.NANO_TIME);
String current = null;
while (timeout.hasTimedOut() == false) {
String prev = current;
current = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
if (state.test(current))
return;
else if (failOnIntermediateTransition) {
if (prev != null && current != null && !current.equals(prev)) {
fail ("There was an intermediate transition, previous: "+prev+", intermediate transition: "+current);
}
}
Thread.sleep(100);
}
fail("Timed out waiting for overseer state change. The current overseer is: "+current);
}
private void waitForNewOverseer(int seconds, String expected, boolean failOnIntermediateTransition) throws Exception {
log.info("Expecting node: {}", expected);
waitForNewOverseer(seconds, s -> Objects.equals(s, expected), failOnIntermediateTransition);
}
private JettySolrRunner getOverseerJetty() throws Exception {
String overseer = getLeaderNode(zkClient());
URL overseerUrl = new URL("http://" + overseer.substring(0, overseer.indexOf('_')));
int hostPort = overseerUrl.getPort();
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
try {
if (jetty.getBaseUrl().getPort() == hostPort)
return jetty;
} catch (IllegalStateException e) {
}
}
fail("Couldn't find overseer node " + overseer);
return null; // to keep the compiler happy
}
private void logOverseerState() throws KeeperException, InterruptedException {
if (log.isInfoEnabled()) {
log.info("Overseer: {}", getLeaderNode(zkClient()));
log.info("Election queue: {}", getSortedElectionNodes(zkClient(), "/overseer_elect/election")); // nowarn
}
}
@Test
public void testOverseerRole() throws Exception {
logOverseerState();
List<String> nodes = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient());
// Remove the OVERSEER role, in case it was already assigned by another test in this suite
for (String node: nodes) {
CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient());
}
String overseer1 = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
nodes.remove(overseer1);
Collections.shuffle(nodes, random());
String overseer2 = nodes.get(0);
log.info("### Setting overseer designate {}", overseer2);
CollectionAdminRequest.addRole(overseer2, "overseer").process(cluster.getSolrClient());
waitForNewOverseer(15, overseer2, false);
//add another node as overseer
nodes.remove(overseer2);
Collections.shuffle(nodes, random());
String overseer3 = nodes.get(0);
log.info("### Adding another overseer designate {}", overseer3);
CollectionAdminRequest.addRole(overseer3, "overseer").process(cluster.getSolrClient());
// kill the current overseer, and check that the new designate becomes the new overseer
JettySolrRunner leaderJetty = getOverseerJetty();
logOverseerState();
leaderJetty.stop();
waitForNewOverseer(10, overseer3, false);
// add another node as overseer
nodes.remove(overseer3);
Collections.shuffle(nodes, random());
String overseer4 = nodes.get(0);
log.info("### Adding last overseer designate {}", overseer4);
CollectionAdminRequest.addRole(overseer4, "overseer").process(cluster.getSolrClient());
logOverseerState();
// remove the overseer role from the current overseer
CollectionAdminRequest.removeRole(overseer3, "overseer").process(cluster.getSolrClient());
waitForNewOverseer(15, overseer4, false);
// Add it back again - we now have two delegates, 4 and 3
CollectionAdminRequest.addRole(overseer3, "overseer").process(cluster.getSolrClient());
// explicitly tell the overseer to quit
String leaderId = OverseerCollectionConfigSetProcessor.getLeaderId(zkClient());
String leader = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
log.info("### Sending QUIT to overseer {}", leader);
getOverseerJetty().getCoreContainer().getZkController().getOverseer().getStateUpdateQueue()
.offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
"id", leaderId)));
waitForNewOverseer(15, s -> Objects.equals(leader, s) == false, false);
Thread.sleep(1000);
logOverseerState();
assertTrue("The old leader should have rejoined election",
OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient()).contains(leader));
leaderJetty.start(); // starting this back, just for good measure
}
@Test
public void testDesignatedOverseerRestarts() throws Exception {
logOverseerState();
// Remove the OVERSEER role, in case it was already assigned by another test in this suite
for (String node: OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient())) {
CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient());
}
String overseer1 = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
int counter = 0;
while (overseer1 == null && counter < 10) {
overseer1 = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
Thread.sleep(1000);
}
// Setting overseer role to the current overseer
CollectionAdminRequest.addRole(overseer1, "overseer").process(cluster.getSolrClient());
waitForNewOverseer(15, overseer1, false);
JettySolrRunner leaderJetty = getOverseerJetty();
List<String> nodes = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient());
nodes.remove(overseer1); // remove the designated overseer
logOverseerState();
// kill the current overseer, and check that the next node in the election queue assumes leadership
leaderJetty.stop();
log.info("Killing designated overseer: {}", overseer1);
// after 5 seconds, bring back dead designated overseer and assert that it assumes leadership "right away",
// i.e. without any other node assuming leadership before this node becomes leader.
Thread.sleep(5);
logOverseerState();
log.info("Starting back the prioritized overseer..");
leaderJetty.start();
waitForNewOverseer(15, overseer1, true); // assert that there is just a single leadership transition
}
}