blob: ded55877ee3683fb63a97c0e4e787a2e8bcfb465 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Slow
@LuceneTestCase.Nightly // MRM TODO: look at setErrorHook
public class ChaosMonkeySafeLeaderTest extends SolrCloudBridgeTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final Integer RUN_LENGTH = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.runlength", "-1"));
private ClusterChaosMonkey chaosMonkey;
@BeforeClass
public static void beforeSuperClass() throws Exception {
useFactory(null);
// schemaString = "schema15.xml"; // we need a string id
System.setProperty("solr.autoCommit.maxTime", "15000");
System.setProperty("solr.httpclient.retries", "1");
System.setProperty("solr.retries.on.forward", "1");
System.setProperty("solr.retries.to.followers", "1");
useFactory(null);
System.setProperty("solr.suppressDefaultConfigBootstrap", "false");
createControl = false;
sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
if (sliceCount == -1) {
sliceCount = random().nextInt(TEST_NIGHTLY ? 5 : 3) + 1;
}
replicationFactor = 3;
// int numShards = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.shardcount", "-1"));
// if (numShards == -1) {
// // we make sure that there's at least one shard with more than one replica
// // so that the ChaosMonkey has something to kill
// numShards = sliceCount + random().nextInt(TEST_NIGHTLY ? 12 : 2) + 1;
// }
numJettys = sliceCount * replicationFactor;
}
@AfterClass
public static void afterSuperClass() {
System.clearProperty("solr.autoCommit.maxTime");
//clearErrorHook();
}
@Before
public void setUp() throws Exception {
super.setUp();
chaosMonkey = new ClusterChaosMonkey(cluster, COLLECTION);
//setErrorHook();
}
protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
public String[] getFieldNames() {
return fieldNames;
}
public RandVal[] getRandValues() {
return randVals;
}
public ChaosMonkeySafeLeaderTest() throws Exception {
super();
}
@Test
public void test() throws Exception {
handle.clear();
handle.put("timestamp", SKIPVAL);
// randomly turn on 1 seconds 'soft' commit
//randomlyEnableAutoSoftCommit();
cluster.waitForActiveCollection(COLLECTION, sliceCount, sliceCount * replicationFactor);
tryDelete();
List<StoppableIndexingThread> threads = new ArrayList<>();
int threadCount = 2;
int batchSize = 1;
if (random().nextBoolean()) {
batchSize = random().nextInt(98) + 2;
}
boolean pauseBetweenUpdates = TEST_NIGHTLY ? random().nextBoolean() : true;
int maxUpdates = -1;
if (!pauseBetweenUpdates) {
maxUpdates = 1000 + random().nextInt(1000);
} else {
maxUpdates = 1500;
}
for (int i = 0; i < threadCount; i++) {
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true, maxUpdates, batchSize, pauseBetweenUpdates); // random().nextInt(999) + 1
indexThread.setUseLongId(true);
threads.add(indexThread);
indexThread.start();
}
chaosMonkey.startTheMonkey(false, 5000);
try {
long runLength;
if (RUN_LENGTH != -1) {
runLength = RUN_LENGTH;
} else {
int[] runTimes;
if (TEST_NIGHTLY) {
runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000,
30000, 45000, 90000};
} else {
runTimes = new int[] {15000};
}
runLength = runTimes[random().nextInt(runTimes.length)];
}
Thread.sleep(runLength);
} finally {
chaosMonkey.stopTheMonkey();
}
for (StoppableIndexingThread indexThread : threads) {
indexThread.safeStop();
}
// wait for stop...
for (StoppableIndexingThread indexThread : threads) {
indexThread.join();
}
for (StoppableIndexingThread indexThread : threads) {
assertTrue(String.valueOf(indexThread.getFailCount()), indexThread.getFailCount() < 10);
}
while (true) {
cluster.getSolrClient().getZkStateReader().waitForState(COLLECTION, 15, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
if (collectionState == null) return false;
Collection<Slice> slices = collectionState.getSlices();
for (Slice slice : slices) {
for (Replica replica : slice.getReplicas()) {
if (cluster.getSolrClient().getZkStateReader().isNodeLive(replica.getNodeName())) {
if (replica.getState() != Replica.State.ACTIVE) {
return false;
}
}
}
}
return true;
});
Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getCollectionOrNull(COLLECTION).getSlices();
try {
for (Slice slice : slices) {
cluster.getSolrClient().getZkStateReader().getLeaderRetry(cluster.getSolrClient().getHttpClient(), COLLECTION, slice.getName(), 5000, true);
}
break;
} catch (Exception e) {
log.error("exception waiting for leaders", e);
Thread.sleep(150);
continue;
}
}
commit();
// MRM TODO: make test fail on compare fail
cluster.getSolrClient().getZkStateReader().checkShardConsistency(COLLECTION);
if (VERBOSE) System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
// try and make a collection to make sure the overseer has survived the expiration and session loss
// sometimes we restart zookeeper as well
// if (TEST_NIGHTLY && random().nextBoolean()) {
// zkServer.shutdown();
// zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
// zkServer.run(false);
// }
// try (CloudHttp2SolrClient client = createCloudClient("collection1")) {
// createCollection(null, "testcollection", 1, 1, 1, client, null, "_default");
//
// }
List<Integer> numShardsNumReplicas = new ArrayList<>(2);
numShardsNumReplicas.add(1);
numShardsNumReplicas.add(1);
// checkForCollection("testcollection",numShardsNumReplicas, null);
}
private void tryDelete() throws Exception {
long start = System.nanoTime();
long timeout = start + TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
try {
del("*:*");
break;
} catch (SolrServerException e) {
// cluster may not be up yet
log.error("", e);
}
Thread.sleep(100);
}
}
// skip the randoms - they can deadlock...
@Override
protected void indexr(Object... fields) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
addFields(doc, "rnd_b", true);
indexDoc(doc);
}
}