| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.cloud; |
| |
| import org.apache.http.NoHttpResponseException; |
| import org.apache.solr.client.solrj.cloud.SocketProxy; |
| import org.apache.solr.client.solrj.embedded.JettySolrRunner; |
| import org.apache.solr.client.solrj.impl.HttpSolrClient; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.cloud.Replica; |
| import org.apache.solr.util.RTimer; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import java.io.File; |
| import java.lang.invoke.MethodHandles; |
| import java.util.List; |
| |
| public class HttpPartitionOnCommitTest extends BasicDistributedZkTest { |
| |
| private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); |
| |
| private static final long sleepMsBeforeHealPartition = 2000L; |
| |
| private final boolean onlyLeaderIndexes = random().nextBoolean(); |
| |
| @BeforeClass |
| public static void setupSysProps() { |
| System.setProperty("socketTimeout", "5000"); |
| System.setProperty("distribUpdateSoTimeout", "5000"); |
| System.setProperty("solr.httpclient.retries", "0"); |
| System.setProperty("solr.retries.on.forward", "0"); |
| System.setProperty("solr.retries.to.followers", "0"); |
| } |
| |
| public HttpPartitionOnCommitTest() { |
| super(); |
| sliceCount = 1; |
| fixShardCount(4); |
| } |
| |
| @Override |
| protected boolean useTlogReplicas() { |
| return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use |
| } |
| |
| @Override |
| @Test |
| public void test() throws Exception { |
| oneShardTest(); |
| multiShardTest(); |
| } |
| |
| private void multiShardTest() throws Exception { |
| |
| log.info("Running multiShardTest"); |
| |
| // create a collection that has 2 shard and 2 replicas |
| String testCollectionName = "c8n_2x2_commits"; |
| createCollection(testCollectionName, "conf1", 2, 2, 1); |
| cloudClient.setDefaultCollection(testCollectionName); |
| |
| List<Replica> notLeaders = |
| ensureAllReplicasAreActive(testCollectionName, "shard1", 2, 2, 30); |
| assertTrue("Expected 1 replicas for collection " + testCollectionName |
| + " but found " + notLeaders.size() + "; clusterState: " |
| + printClusterStateInfo(), |
| notLeaders.size() == 1); |
| |
| if (log.isInfoEnabled()) { |
| log.info("All replicas active for {}", testCollectionName); |
| } |
| |
| // let's put the leader in its own partition, no replicas can contact it now |
| Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1"); |
| if (log.isInfoEnabled()) { |
| log.info("Creating partition to leader at {}", leader.getCoreUrl()); |
| } |
| SocketProxy leaderProxy = getProxyForReplica(leader); |
| leaderProxy.close(); |
| |
| // let's find the leader of shard2 and ask him to commit |
| Replica shard2Leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard2"); |
| sendCommitWithRetry(shard2Leader); |
| |
| Thread.sleep(sleepMsBeforeHealPartition); |
| |
| cloudClient.getZkStateReader().forceUpdateCollection(testCollectionName); // get the latest state |
| leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1"); |
| assertSame("Leader was not active", Replica.State.ACTIVE, leader.getState()); |
| |
| if (log.isInfoEnabled()) { |
| log.info("Healing partitioned replica at {}", leader.getCoreUrl()); |
| } |
| leaderProxy.reopen(); |
| Thread.sleep(sleepMsBeforeHealPartition); |
| |
| // try to clean up |
| attemptCollectionDelete(cloudClient, testCollectionName); |
| |
| log.info("multiShardTest completed OK"); |
| } |
| |
| private void oneShardTest() throws Exception { |
| log.info("Running oneShardTest"); |
| |
| // create a collection that has 1 shard and 3 replicas |
| String testCollectionName = "c8n_1x3_commits"; |
| createCollection(testCollectionName, "conf1", 1, 3, 1); |
| cloudClient.setDefaultCollection(testCollectionName); |
| |
| List<Replica> notLeaders = |
| ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 30); |
| assertTrue("Expected 2 replicas for collection " + testCollectionName |
| + " but found " + notLeaders.size() + "; clusterState: " |
| + printClusterStateInfo(), |
| notLeaders.size() == 2); |
| |
| log.info("All replicas active for {}", testCollectionName); |
| |
| // let's put the leader in its own partition, no replicas can contact it now |
| Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1"); |
| if (log.isInfoEnabled()) { |
| log.info("Creating partition to leader at {}", leader.getCoreUrl()); |
| } |
| |
| SocketProxy leaderProxy = getProxyForReplica(leader); |
| leaderProxy.close(); |
| |
| Replica replica = notLeaders.get(0); |
| sendCommitWithRetry(replica); |
| Thread.sleep(sleepMsBeforeHealPartition); |
| |
| cloudClient.getZkStateReader().forceUpdateCollection(testCollectionName); // get the latest state |
| leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1"); |
| assertSame("Leader was not active", Replica.State.ACTIVE, leader.getState()); |
| |
| if (log.isInfoEnabled()) { |
| log.info("Healing partitioned replica at {}", leader.getCoreUrl()); |
| } |
| leaderProxy.reopen(); |
| Thread.sleep(sleepMsBeforeHealPartition); |
| |
| // try to clean up |
| attemptCollectionDelete(cloudClient, testCollectionName); |
| |
| log.info("oneShardTest completed OK"); |
| } |
| |
| /** |
| * Overrides the parent implementation to install a SocketProxy in-front of the Jetty server. |
| */ |
| @Override |
| public JettySolrRunner createJetty(File solrHome, String dataDir, |
| String shardList, String solrConfigOverride, String schemaOverride, Replica.Type replicaType) |
| throws Exception { |
| return createProxiedJetty(solrHome, dataDir, shardList, solrConfigOverride, schemaOverride, replicaType); |
| } |
| |
| protected void sendCommitWithRetry(Replica replica) throws Exception { |
| String replicaCoreUrl = replica.getCoreUrl(); |
| log.info("Sending commit request to: {}", replicaCoreUrl); |
| final RTimer timer = new RTimer(); |
| try (HttpSolrClient client = getHttpSolrClient(replicaCoreUrl)) { |
| try { |
| client.commit(); |
| |
| if (log.isInfoEnabled()) { |
| log.info("Sent commit request to {} OK, took {}ms", replicaCoreUrl, timer.getTime()); |
| } |
| } catch (Exception exc) { |
| Throwable rootCause = SolrException.getRootCause(exc); |
| if (rootCause instanceof NoHttpResponseException) { |
| log.warn("No HTTP response from sending commit request to {}; will re-try after waiting 3 seconds", replicaCoreUrl); |
| Thread.sleep(3000); |
| client.commit(); |
| log.info("Second attempt at sending commit to {} succeeded", replicaCoreUrl); |
| } else { |
| throw exc; |
| } |
| } |
| } |
| } |
| |
| } |