blob: 29156cc566b1019ea6d6bfd6e8e323d67329baa2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@Slow
public class LeaderElectionIntegrationTest extends SolrCloudTestCase {
private final static int NUM_REPLICAS_OF_SHARD1 = 5;
@BeforeClass
public static void beforeClass() {
System.setProperty("solrcloud.skip.autorecovery", "true");
}
@Override
public void setUp() throws Exception {
super.setUp();
configureCluster(6)
.addConfig("conf", configset("cloud-minimal"))
.configure();
}
private void createCollection(String collection) throws IOException, SolrServerException {
assertEquals(0, CollectionAdminRequest.createCollection(collection,
"conf", 2, 1)
.setMaxShardsPerNode(1).process(cluster.getSolrClient()).getStatus());
for (int i = 1; i < NUM_REPLICAS_OF_SHARD1; i++) {
assertTrue(
CollectionAdminRequest.addReplicaToShard(collection, "shard1").process(cluster.getSolrClient()).isSuccess()
);
}
}
@Test
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
// commented 4-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
public void testSimpleSliceLeaderElection() throws Exception {
String collection = "collection1";
createCollection(collection);
List<JettySolrRunner> stoppedRunners = new ArrayList<>();
for (int i = 0; i < 4; i++) {
// who is the leader?
String leader = getLeader(collection);
JettySolrRunner jetty = getRunner(leader);
assertNotNull(jetty);
assertTrue("shard1".equals(jetty.getCoreContainer().getCores().iterator().next()
.getCoreDescriptor().getCloudDescriptor().getShardId()));
jetty.stop();
stoppedRunners.add(jetty);
// poll until leader change is visible
for (int j = 0; j < 90; j++) {
String currentLeader = getLeader(collection);
if(!leader.equals(currentLeader)) {
break;
}
Thread.sleep(500);
}
leader = getLeader(collection);
int retry = 0;
while (jetty == getRunner(leader)) {
if (retry++ == 60) {
break;
}
Thread.sleep(1000);
}
if (jetty == getRunner(leader)) {
cluster.getZkClient().printLayoutToStdOut();
fail("We didn't find a new leader! " + jetty + " was close, but it's still showing as the leader");
}
assertTrue("shard1".equals(getRunner(leader).getCoreContainer().getCores().iterator().next()
.getCoreDescriptor().getCloudDescriptor().getShardId()));
}
for (JettySolrRunner runner : stoppedRunners) {
runner.start();
}
waitForState("Expected to see nodes come back " + collection, collection,
(n, c) -> {
return n.size() == 6;
});
CollectionAdminRequest.deleteCollection(collection).process(cluster.getSolrClient());
// testLeaderElectionAfterClientTimeout
collection = "collection2";
createCollection(collection);
// TODO: work out the best timing here...
System.setProperty("zkClientTimeout", Integer.toString(ZkTestServer.TICK_TIME * 2 + 100));
// timeout the leader
String leader = getLeader(collection);
JettySolrRunner jetty = getRunner(leader);
ZkController zkController = jetty.getCoreContainer().getZkController();
zkController.getZkClient().getSolrZooKeeper().closeCnxn();
cluster.getZkServer().expire(zkController.getZkClient().getSolrZooKeeper().getSessionId());
for (int i = 0; i < 60; i++) { // wait till leader is changed
if (jetty != getRunner(getLeader(collection))) {
break;
}
Thread.sleep(100);
}
// make sure we have waited long enough for the first leader to have come back
Thread.sleep(ZkTestServer.TICK_TIME * 2 + 100);
// kill everyone but the first leader that should have reconnected by now
for (JettySolrRunner jetty2 : cluster.getJettySolrRunners()) {
if (jetty != jetty2) {
jetty2.stop();
}
}
for (int i = 0; i < 320; i++) { // wait till leader is changed
try {
if (jetty == getRunner(getLeader(collection))) {
break;
}
Thread.sleep(100);
} catch (Exception e) {
continue;
}
}
assertEquals(jetty, getRunner(getLeader(collection)));
}
private JettySolrRunner getRunner(String nodeName) {
for (JettySolrRunner jettySolrRunner : cluster.getJettySolrRunners()){
if (!jettySolrRunner.isStopped() && nodeName.equals(jettySolrRunner.getNodeName())) return jettySolrRunner;
}
return null;
}
private String getLeader(String collection) throws InterruptedException {
ZkNodeProps props = cluster.getSolrClient().getZkStateReader().getLeaderRetry(collection, "shard1", 30000);
String leader = props.getStr(ZkStateReader.NODE_NAME_PROP);
return leader;
}
@AfterClass
public static void afterClass() throws InterruptedException {
System.clearProperty("solrcloud.skip.autorecovery");
}
}