blob: 87efa30c735480755613e89d70ce6dc399f2b822 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.File;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.io.FileUtils;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.Diagnostics;
import org.apache.solr.core.MockDirectoryFactory;
import org.apache.solr.util.SolrCLI;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.junit.BeforeClass;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTestCase {
private static final String REMOVE_VERSION_FIELD = "remove.version.field";
private static final String ENABLE_UPDATE_LOG = "enable.update.log";
private static final String ZK_HOST = "zkHost";
private static final String ZOOKEEPER_FORCE_SYNC = "zookeeper.forceSync";
protected static final String DEFAULT_COLLECTION = "collection1";
protected volatile ZkTestServer zkServer;
private AtomicInteger homeCount = new AtomicInteger();
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass
public static void beforeThisClass() throws Exception {
// Only For Manual Testing: this will force an fs based dir factory
//useFactory(null);
}
@Override
public void distribSetUp() throws Exception {
super.distribSetUp();
Path zkDir = testDir.toPath().resolve("zookeeper/server1/data");
zkServer = new ZkTestServer(zkDir);
zkServer.run();
System.setProperty(ZK_HOST, zkServer.getZkAddress());
System.setProperty(ENABLE_UPDATE_LOG, "true");
System.setProperty(REMOVE_VERSION_FIELD, "true");
System.setProperty(ZOOKEEPER_FORCE_SYNC, "false");
System.setProperty(MockDirectoryFactory.SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE, "true");
String schema = getCloudSchemaFile();
if (schema == null) schema = "schema.xml";
zkServer.buildZooKeeper(getCloudSolrConfig(), schema);
// set some system properties for use by tests
System.setProperty("solr.test.sys.prop1", "propone");
System.setProperty("solr.test.sys.prop2", "proptwo");
}
protected String getCloudSolrConfig() {
return "solrconfig-tlog.xml";
}
protected String getCloudSchemaFile() {
return getSchemaFile();
}
@Override
protected void createServers(int numShards) throws Exception {
// give everyone there own solrhome
File controlHome = new File(new File(getSolrHome()).getParentFile(), "control" + homeCount.incrementAndGet());
FileUtils.copyDirectory(new File(getSolrHome()), controlHome);
setupJettySolrHome(controlHome);
controlJetty = createJetty(controlHome, null); // let the shardId default to shard1
controlJetty.start();
controlClient = createNewSolrClient(controlJetty.getLocalPort());
assertTrue(CollectionAdminRequest
.createCollection("control_collection", 1, 1)
.setCreateNodeSet(controlJetty.getNodeName())
.process(controlClient).isSuccess());
ZkStateReader zkStateReader = jettys.get(0).getCoreContainer().getZkController()
.getZkStateReader();
waitForRecoveriesToFinish("control_collection", zkStateReader, false, true, 15);
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= numShards; i++) {
if (sb.length() > 0) sb.append(',');
// give everyone there own solrhome
File jettyHome = new File(new File(getSolrHome()).getParentFile(), "jetty" + homeCount.incrementAndGet());
setupJettySolrHome(jettyHome);
JettySolrRunner j = createJetty(jettyHome, null, "shard" + (i + 2));
j.start();
jettys.add(j);
clients.add(createNewSolrClient(j.getLocalPort()));
sb.append(buildUrl(j.getLocalPort()));
}
shards = sb.toString();
}
protected void waitForRecoveriesToFinish(String collection, ZkStateReader zkStateReader, boolean verbose)
throws Exception {
waitForRecoveriesToFinish(collection, zkStateReader, verbose, true);
}
protected void waitForRecoveriesToFinish(String collection, ZkStateReader zkStateReader, boolean verbose, boolean failOnTimeout)
throws Exception {
waitForRecoveriesToFinish(collection, zkStateReader, verbose, failOnTimeout, 330);
}
public static void waitForRecoveriesToFinish(String collection,
ZkStateReader zkStateReader, boolean verbose, boolean failOnTimeout, long timeoutSeconds)
throws Exception {
log.info("Wait for recoveries to finish - collection: {} failOnTimeout: {} timeout (sec):"
, collection, failOnTimeout, timeoutSeconds);
try {
zkStateReader.waitForState(collection, timeoutSeconds, TimeUnit.SECONDS, (liveNodes, docCollection) -> {
if (docCollection == null)
return false;
boolean sawLiveRecovering = false;
assertNotNull("Could not find collection:" + collection, docCollection);
Map<String,Slice> slices = docCollection.getSlicesMap();
assertNotNull("Could not find collection:" + collection, slices);
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Slice slice = entry.getValue();
if (slice.getState() == Slice.State.CONSTRUCTION) { // similar to replica recovering; pretend its the same
// thing
if (verbose) System.out.println("Found a slice in construction state; will wait.");
sawLiveRecovering = true;
}
Map<String,Replica> shards = slice.getReplicasMap();
for (Map.Entry<String,Replica> shard : shards.entrySet()) {
if (verbose) System.out.println("replica:" + shard.getValue().getName() + " rstate:"
+ shard.getValue().getStr(ZkStateReader.STATE_PROP)
+ " live:"
+ liveNodes.contains(shard.getValue().getNodeName()));
final Replica.State state = shard.getValue().getState();
if ((state == Replica.State.RECOVERING || state == Replica.State.DOWN
|| state == Replica.State.RECOVERY_FAILED)
&& liveNodes.contains(shard.getValue().getStr(ZkStateReader.NODE_NAME_PROP))) {
return false;
}
}
}
if (!sawLiveRecovering) {
if (!sawLiveRecovering) {
if (verbose) System.out.println("no one is recoverying");
} else {
if (verbose) System.out.println("Gave up waiting for recovery to finish..");
return false;
}
return true;
} else {
return false;
}
});
} catch (TimeoutException | InterruptedException e) {
Diagnostics.logThreadDumps("Gave up waiting for recovery to finish. THREAD DUMP:");
zkStateReader.getZkClient().printLayoutToStdOut();
fail("There are still nodes recoverying - waited for " + timeoutSeconds + " seconds");
}
log.info("Recoveries finished - collection: {}", collection);
}
public static void waitForCollectionToDisappear(String collection,
ZkStateReader zkStateReader, boolean verbose, boolean failOnTimeout, int timeoutSeconds)
throws Exception {
log.info("Wait for collection to disappear - collection: {} failOnTimeout:{} timeout (sec):{}"
, collection, failOnTimeout, timeoutSeconds);
zkStateReader.waitForState(collection, timeoutSeconds, TimeUnit.SECONDS, (docCollection) -> {
if (docCollection == null)
return true;
return false;
});
log.info("Collection has disappeared - collection: {}", collection);
}
static void waitForNewLeader(CloudSolrClient cloudClient, String shardName, Replica oldLeader, TimeOut timeOut)
throws Exception {
log.info("Will wait for a node to become leader for {} secs", timeOut.timeLeft(SECONDS));
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
zkStateReader.forceUpdateCollection(DEFAULT_COLLECTION);
for (; ; ) {
ClusterState clusterState = zkStateReader.getClusterState();
DocCollection coll = clusterState.getCollection("collection1");
Slice slice = coll.getSlice(shardName);
if (slice.getLeader() != null && !slice.getLeader().equals(oldLeader) && slice.getLeader().getState() == Replica.State.ACTIVE) {
if (log.isInfoEnabled()) {
log.info("Old leader {}, new leader {}. New leader got elected in {} ms"
, oldLeader, slice.getLeader(), timeOut.timeElapsed(MILLISECONDS));
}
break;
}
if (timeOut.hasTimedOut()) {
Diagnostics.logThreadDumps("Could not find new leader in specified timeout");
zkStateReader.getZkClient().printLayoutToStdOut();
fail("Could not find new leader even after waiting for " + timeOut.timeElapsed(MILLISECONDS) + "ms");
}
Thread.sleep(100);
}
zkStateReader.waitForState("collection1", timeOut.timeLeft(SECONDS), TimeUnit.SECONDS, (docCollection) -> {
if (docCollection == null)
return false;
Slice slice = docCollection.getSlice(shardName);
if (slice != null && slice.getLeader() != null && !slice.getLeader().equals(oldLeader) && slice.getLeader().getState() == Replica.State.ACTIVE) {
if (log.isInfoEnabled()) {
log.info("Old leader {}, new leader {}. New leader got elected in {} ms"
, oldLeader, slice.getLeader(), timeOut.timeElapsed(MILLISECONDS));
}
return true;
}
return false;
});
}
public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName,
Replica.State expectedState) throws InterruptedException, TimeoutException {
log.info("verifyReplicaStatus ({}) shard={} coreNodeName={}", collection, shard, coreNodeName);
reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS,
(collectionState) -> collectionState != null && collectionState.getSlice(shard) != null
&& collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null
&& collectionState.getSlice(shard).getReplicasMap().get(coreNodeName).getState() == expectedState);
}
protected static void assertAllActive(String collection, ZkStateReader zkStateReader)
throws KeeperException, InterruptedException {
zkStateReader.forceUpdateCollection(collection);
ClusterState clusterState = zkStateReader.getClusterState();
final DocCollection docCollection = clusterState.getCollectionOrNull(collection);
if (docCollection == null || docCollection.getSlices() == null) {
throw new IllegalArgumentException("Cannot find collection:" + collection);
}
Map<String,Slice> slices = docCollection.getSlicesMap();
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Slice slice = entry.getValue();
if (slice.getState() != Slice.State.ACTIVE) {
fail("Not all shards are ACTIVE - found a shard " + slice.getName() + " that is: " + slice.getState());
}
Map<String,Replica> shards = slice.getReplicasMap();
for (Map.Entry<String,Replica> shard : shards.entrySet()) {
Replica replica = shard.getValue();
if (replica.getState() != Replica.State.ACTIVE) {
fail("Not all replicas are ACTIVE - found a replica " + replica.getName() + " that is: " + replica.getState());
}
}
}
}
@Override
public void distribTearDown() throws Exception {
resetExceptionIgnores();
try {
zkServer.shutdown();
} catch (Exception e) {
throw new RuntimeException("Exception shutting down Zk Test Server.", e);
} finally {
try {
super.distribTearDown();
} finally {
System.clearProperty(ZK_HOST);
System.clearProperty("collection");
System.clearProperty(ENABLE_UPDATE_LOG);
System.clearProperty(REMOVE_VERSION_FIELD);
System.clearProperty("solr.directoryFactory");
System.clearProperty("solr.test.sys.prop1");
System.clearProperty("solr.test.sys.prop2");
System.clearProperty(ZOOKEEPER_FORCE_SYNC);
System.clearProperty(MockDirectoryFactory.SOLR_TESTS_ALLOW_READING_FILES_STILL_OPEN_FOR_WRITE);
}
}
}
protected void printLayout() throws Exception {
SolrZkClient zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT);
zkClient.printLayoutToStdOut();
zkClient.close();
}
protected void restartZk(int pauseMillis) throws Exception {
log.info("Restarting ZK with a pause of {}ms in between", pauseMillis);
zkServer.shutdown();
// disconnect enough to test stalling, if things stall, then clientSoTimeout w""ill be hit
Thread.sleep(pauseMillis);
zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
zkServer.run(false);
}
// Copy a configset up from some path on the local machine to ZK.
// Example usage:
//
// copyConfigUp(TEST_PATH().resolve("configsets"), "cloud-minimal", "configset-name", zk_address);
static protected void copyConfigUp(Path configSetDir, String srcConfigSet, String dstConfigName, String zkAddr) throws Exception {
String[] args = new String[]{
"-confname", dstConfigName,
"-confdir", srcConfigSet,
"-zkHost", zkAddr,
"-configsetsDir", configSetDir.toAbsolutePath().toString(),
};
SolrCLI.ConfigSetUploadTool tool = new SolrCLI.ConfigSetUploadTool();
int res = tool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(tool.getOptions()), args));
assertEquals("Tool should have returned 0 for success, returned: " + res, res, 0);
}
}