blob: 11f258f6d9fa191530d5e4d08264db69cf8cf26b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud.autoscaling;
import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.CloudTestUtils.AutoScalingRequest;
import org.apache.solr.cloud.CloudUtil;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.util.LogLevel;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
@LuceneTestCase.Slow
public class IndexSizeTriggerMixedBoundsTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static SolrCloudManager cloudManager;
private static SolrClient solrClient;
private static TimeSource timeSource;
private static int SPEED = 1;
static Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
static CountDownLatch listenerCreated = new CountDownLatch(1);
static CountDownLatch finished = new CountDownLatch(1);
@BeforeClass
public static void setupCluster() throws Exception {
System.setProperty("metricsEnabled", "true");
configureCluster(2)
.addConfig("conf", configset("cloud-minimal"))
.configure();
cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
solrClient = cluster.getSolrClient();
timeSource = cloudManager.getTimeSource();
}
@After
public void restoreDefaults() throws Exception {
cluster.deleteAllCollections();
cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), -1);
cloudManager.getTimeSource().sleep(5000);
listenerEvents.clear();
listenerCreated = new CountDownLatch(1);
finished = new CountDownLatch(1);
}
@AfterClass
public static void teardown() throws Exception {
solrClient = null;
cloudManager = null;
}
public static class CapturingTriggerListener extends TriggerListenerBase {
@Override
public void configure(SolrResourceLoader loader, SolrCloudManager cloudManager, AutoScalingConfig.TriggerListenerConfig config) throws TriggerValidationException {
super.configure(loader, cloudManager, config);
listenerCreated.countDown();
}
@Override
public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
ActionContext context, Throwable error, String message) {
List<CapturedEvent> lst = listenerEvents.computeIfAbsent(config.name, s -> new ArrayList<>());
CapturedEvent ev = new CapturedEvent(timeSource.getTimeNs(), context, config, stage, actionName, event, message);
log.info("=======> {}", ev);
lst.add(ev);
}
}
public static class FinishedProcessingListener extends TriggerListenerBase {
@Override
public void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName, ActionContext context, Throwable error, String message) throws Exception {
finished.countDown();
}
}
@Test
@SuppressWarnings({"unchecked"})
public void testMixedBounds() throws Exception {
String collectionName = "testMixedBounds_collection";
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
"conf", 2, 2).setMaxShardsPerNode(2);
create.process(solrClient);
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(2, 2, false, true));
for (int j = 0; j < 10; j++) {
UpdateRequest ureq = new UpdateRequest();
ureq.setParam("collection", collectionName);
for (int i = 0; i < 100; i++) {
SolrInputDocument doc = new SolrInputDocument("id", "id-" + (i * 100) + "-" + j);
doc.addField("foo", TestUtil.randomSimpleString(random(), 130, 130));
ureq.add(doc);
}
solrClient.request(ureq);
}
solrClient.commit(collectionName);
// check the actual size of shard to set the threshold
QueryResponse rsp = solrClient.query(params(CommonParams.QT, "/admin/metrics", "group", "core"));
NamedList<Object> nl = rsp.getResponse();
nl = (NamedList<Object>)nl.get("metrics");
int maxSize = 0;
for (Iterator<Map.Entry<String, Object>> it = nl.iterator(); it.hasNext(); ) {
Map.Entry<String, Object> e = it.next();
NamedList<Object> metrics = (NamedList<Object>)e.getValue();
Object o = metrics.get("INDEX.sizeInBytes");
assertNotNull("INDEX.sizeInBytes missing: " + metrics, o);
assertTrue("not a number", o instanceof Number);
if (maxSize < ((Number)o).intValue()) {
maxSize = ((Number)o).intValue();
}
}
assertTrue("maxSize should be non-zero", maxSize > 0);
int aboveBytes = maxSize * 2 / 3;
// need to wait for recovery after splitting
long waitForSeconds = 10 + random().nextInt(5);
// the trigger is initially disabled so that we have time to add listeners
// and have them capture all events once the trigger is enabled
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'index_size_trigger4'," +
"'event' : 'indexSize'," +
"'waitFor' : '" + waitForSeconds + "s'," +
// don't hit this limit when indexing
"'aboveDocs' : 10000," +
// hit this limit when deleting
"'belowDocs' : 100," +
// hit this limit when indexing
"'aboveBytes' : " + aboveBytes + "," +
// don't hit this limit when deleting
"'belowBytes' : 10," +
"'enabled' : false," +
"'actions' : [{'name' : 'compute_plan', 'class' : 'solr.ComputePlanAction'}," +
"{'name' : 'execute_plan', 'class' : '" + ExecutePlanAction.class.getName() + "'}]" +
"}}";
@SuppressWarnings({"rawtypes"})
SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
NamedList<Object> response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
String setListenerCommand = "{" +
"'set-listener' : " +
"{" +
"'name' : 'capturing4'," +
"'trigger' : 'index_size_trigger4'," +
"'stage' : ['STARTED','ABORTED','SUCCEEDED','FAILED']," +
"'beforeAction' : ['compute_plan','execute_plan']," +
"'afterAction' : ['compute_plan','execute_plan']," +
"'class' : '" + CapturingTriggerListener.class.getName() + "'" +
"}" +
"}";
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
setListenerCommand = "{" +
"'set-listener' : " +
"{" +
"'name' : 'finished'," +
"'trigger' : 'index_size_trigger4'," +
"'stage' : ['SUCCEEDED']," +
"'class' : '" + FinishedProcessingListener.class.getName() + "'" +
"}" +
"}";
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setListenerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
// now enable the trigger
String resumeTriggerCommand = "{" +
"'resume-trigger' : {" +
"'name' : 'index_size_trigger4'" +
"}" +
"}";
log.info("-- resuming trigger");
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
boolean await = finished.await(90000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("did not finish processing in time", await);
log.info("-- suspending trigger");
// suspend the trigger to avoid generating more events
String suspendTriggerCommand = "{" +
"'suspend-trigger' : {" +
"'name' : 'index_size_trigger4'" +
"}" +
"}";
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
assertEquals(1, listenerEvents.size());
List<CapturedEvent> events = listenerEvents.get("capturing4");
assertNotNull("'capturing4' events not found", events);
assertEquals("events: " + events, 6, events.size());
assertEquals(TriggerEventProcessorStage.STARTED, events.get(0).stage);
assertEquals(TriggerEventProcessorStage.BEFORE_ACTION, events.get(1).stage);
assertEquals(TriggerEventProcessorStage.AFTER_ACTION, events.get(2).stage);
assertEquals(TriggerEventProcessorStage.BEFORE_ACTION, events.get(3).stage);
assertEquals(TriggerEventProcessorStage.AFTER_ACTION, events.get(4).stage);
assertEquals(TriggerEventProcessorStage.SUCCEEDED, events.get(5).stage);
// collection should have 2 inactive and 4 active shards
CloudUtil.waitForState(cloudManager, "failed to create " + collectionName, collectionName,
CloudUtil.clusterShape(6, 2, true, true));
// check ops
List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>) events.get(4).event.getProperty(TriggerEvent.REQUESTED_OPS);
assertNotNull("should contain requestedOps", ops);
assertEquals("number of ops", 2, ops.size());
boolean shard1 = false;
boolean shard2 = false;
for (TriggerEvent.Op op : ops) {
assertEquals(CollectionParams.CollectionAction.SPLITSHARD, op.getAction());
Set<Pair<String, String>> hints = (Set<Pair<String, String>>)op.getHints().get(Suggester.Hint.COLL_SHARD);
assertNotNull("hints", hints);
assertEquals("hints", 1, hints.size());
Pair<String, String> p = hints.iterator().next();
assertEquals(collectionName, p.first());
if (p.second().equals("shard1")) {
shard1 = true;
} else if (p.second().equals("shard2")) {
shard2 = true;
} else {
fail("unexpected shard name " + p.second());
}
}
assertTrue("shard1 should be split", shard1);
assertTrue("shard2 should be split", shard2);
// now delete most of docs to trigger belowDocs condition
listenerEvents.clear();
finished = new CountDownLatch(1);
// suspend the trigger first so that we can safely delete all docs
suspendTriggerCommand = "{" +
"'suspend-trigger' : {" +
"'name' : 'index_size_trigger4'" +
"}" +
"}";
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
log.info("-- deleting documents");
for (int j = 0; j < 10; j++) {
UpdateRequest ureq = new UpdateRequest();
ureq.setParam("collection", collectionName);
for (int i = 0; i < 98; i++) {
ureq.deleteById("id-" + (i * 100) + "-" + j);
}
solrClient.request(ureq);
}
cloudManager.getTimeSource().sleep(5000);
// make sure the actual index size is reduced by deletions, otherwise we may still violate aboveBytes
UpdateRequest ur = new UpdateRequest();
ur.setParam(UpdateParams.COMMIT, "true");
ur.setParam(UpdateParams.EXPUNGE_DELETES, "true");
ur.setParam(UpdateParams.OPTIMIZE, "true");
ur.setParam(UpdateParams.MAX_OPTIMIZE_SEGMENTS, "1");
ur.setParam(UpdateParams.WAIT_SEARCHER, "true");
ur.setParam(UpdateParams.OPEN_SEARCHER, "true");
log.info("-- requesting optimize / expungeDeletes / commit");
solrClient.request(ur, collectionName);
// wait for the segments to merge to reduce the index size
cloudManager.getTimeSource().sleep(50000);
// add some docs so that every shard gets an update
// we can reduce the number of docs here but this also works
for (int j = 0; j < 1; j++) {
UpdateRequest ureq = new UpdateRequest();
ureq.setParam("collection", collectionName);
for (int i = 0; i < 98; i++) {
ureq.add("id", "id-" + (i * 100) + "-" + j);
}
solrClient.request(ureq);
}
log.info("-- requesting commit");
solrClient.commit(collectionName, true, true);
// resume the trigger
log.info("-- resuming trigger");
// resume trigger
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, resumeTriggerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
await = finished.await(90000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("did not finish processing in time", await);
log.info("-- suspending trigger");
req = AutoScalingRequest.create(SolrRequest.METHOD.POST, suspendTriggerCommand);
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
assertEquals(1, listenerEvents.size());
events = listenerEvents.get("capturing4");
assertNotNull("'capturing4' events not found", events);
assertEquals("events: " + events, 6, events.size());
assertEquals(TriggerEventProcessorStage.STARTED, events.get(0).stage);
assertEquals(TriggerEventProcessorStage.BEFORE_ACTION, events.get(1).stage);
assertEquals(TriggerEventProcessorStage.AFTER_ACTION, events.get(2).stage);
assertEquals(TriggerEventProcessorStage.BEFORE_ACTION, events.get(3).stage);
assertEquals(TriggerEventProcessorStage.AFTER_ACTION, events.get(4).stage);
assertEquals(TriggerEventProcessorStage.SUCCEEDED, events.get(5).stage);
// check ops
ops = (List<TriggerEvent.Op>) events.get(4).event.getProperty(TriggerEvent.REQUESTED_OPS);
assertNotNull("should contain requestedOps", ops);
assertTrue("number of ops: " + ops, ops.size() > 0);
for (TriggerEvent.Op op : ops) {
assertEquals(CollectionParams.CollectionAction.MERGESHARDS, op.getAction());
Set<Pair<String, String>> hints = (Set<Pair<String, String>>)op.getHints().get(Suggester.Hint.COLL_SHARD);
assertNotNull("hints", hints);
assertEquals("hints", 2, hints.size());
Pair<String, String> p = hints.iterator().next();
assertEquals(collectionName, p.first());
}
// TODO: fix this once MERGESHARDS is supported
List<TriggerEvent.Op> unsupportedOps = (List<TriggerEvent.Op>)events.get(2).context.get("properties.unsupportedOps");
assertNotNull("should have unsupportedOps", unsupportedOps);
assertEquals(unsupportedOps.toString() + "\n" + ops, ops.size(), unsupportedOps.size());
unsupportedOps.forEach(op -> assertEquals(CollectionParams.CollectionAction.MERGESHARDS, op.getAction()));
}
}